From 93bdfcb2105afe7a45d512984b855ce97937cfc1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 8 May 2019 19:13:46 -0400 Subject: [PATCH] Update bcachefs sources to 454bd4f82d bcachefs: Fix for the stripes mark path and gc --- .bcachefs_revision | 2 +- include/trace/events/bcachefs.h | 72 +++++++++++++++++++++++++++++++++ libbcachefs/btree_cache.c | 2 + libbcachefs/btree_iter.c | 14 +++++-- libbcachefs/btree_types.h | 1 + libbcachefs/btree_update_leaf.c | 7 ++++ libbcachefs/buckets.c | 2 +- libbcachefs/buckets_types.h | 2 +- libbcachefs/fs-io.c | 12 +++--- libbcachefs/fs.c | 2 +- libbcachefs/migrate.c | 22 +++------- libbcachefs/move.c | 66 ++---------------------------- libbcachefs/opts.h | 2 +- libbcachefs/replicas.c | 58 ++++++++++++++++++++++++++ libbcachefs/replicas.h | 1 + libbcachefs/super.c | 7 ++-- libbcachefs/util.c | 2 +- 17 files changed, 177 insertions(+), 97 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index 666368e..def8737 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -6f603b8d79efa7d9ac04ea0c38ef1bbaa10fd678 +454bd4f82d85bb42a86b8eb0172b13e86e5788a7 diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 6781a5b..5fa570a 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -498,6 +498,78 @@ TRACE_EVENT(copygc, __entry->buckets_moved, __entry->buckets_not_moved) ); +DECLARE_EVENT_CLASS(transaction_restart, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip), + + TP_STRUCT__entry( + __array(char, name, 16) + __field(unsigned long, ip ) + ), + + TP_fast_assign( + memcpy(__entry->name, c->name, 16); + __entry->ip = ip; + ), + + TP_printk("%pf", (void *) __entry->ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_btree_node_reused, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_would_deadlock, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_iters_realloced, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_mem_realloced, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_journal_res_get, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_journal_preres_get, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_mark_replicas, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_fault_inject, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_btree_node_split, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_traverse, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + +DEFINE_EVENT(transaction_restart, trans_restart_atomic, + TP_PROTO(struct bch_fs *c, unsigned long ip), + TP_ARGS(c, ip) +); + #endif /* _TRACE_BCACHE_H */ /* This part must be outside protection */ diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 074ea6f..2a20bde 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -732,6 +732,8 @@ retry: goto retry; trans_restart(); + trace_trans_restart_btree_node_reused(c, + iter->trans->ip); return ERR_PTR(-EINTR); } } diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 49ddf05..5631f98 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -251,12 +251,15 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, } } - if (ret) - __btree_node_lock_type(iter->trans->c, b, type); - else + if (unlikely(!ret)) { trans_restart(); + trace_trans_restart_would_deadlock(iter->trans->c, + iter->trans->ip); + return false; + } - return ret; + __btree_node_lock_type(iter->trans->c, b, type); + return true; } /* Btree iterator locking: */ @@ -1692,6 +1695,7 @@ success: if (trans->iters_live) { trans_restart(); + trace_trans_restart_iters_realloced(trans->c, trans->ip); return -EINTR; } @@ -1859,6 +1863,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, if (old_bytes) { trans_restart(); + trace_trans_restart_mem_realloced(trans->c, trans->ip); return ERR_PTR(-EINTR); } } @@ -1935,6 +1940,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c) memset(trans, 0, offsetof(struct btree_trans, iters_onstack)); trans->c = c; + trans->ip = _RET_IP_; trans->size = ARRAY_SIZE(trans->iters_onstack); trans->iters = trans->iters_onstack; trans->updates = trans->updates_onstack; diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index ae273ab..57ef501 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -268,6 +268,7 @@ struct btree_insert_entry { struct btree_trans { struct bch_fs *c; + unsigned long ip; size_t nr_restarts; u64 commit_start; diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index d052ca5..dde1fc1 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -439,6 +439,7 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans) if (!bch2_btree_trans_relock(trans)) { trans_restart(" (iter relock after journal preres get blocked)"); + trace_trans_restart_journal_preres_get(c, trans->ip); return -EINTR; } @@ -575,6 +576,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, if (race_fault()) { ret = -EINTR; trans_restart(" (race)"); + trace_trans_restart_fault_inject(c, trans->ip); goto out; } @@ -725,6 +727,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, */ if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) { trans_restart(" (split)"); + trace_trans_restart_btree_node_split(c, trans->ip); ret = -EINTR; } break; @@ -744,6 +747,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, return 0; trans_restart(" (iter relock after marking replicas)"); + trace_trans_restart_mark_replicas(c, trans->ip); ret = -EINTR; break; case BTREE_INSERT_NEED_JOURNAL_RES: @@ -757,6 +761,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, return 0; trans_restart(" (iter relock after journal res get blocked)"); + trace_trans_restart_journal_res_get(c, trans->ip); ret = -EINTR; break; default: @@ -769,6 +774,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret2) { trans_restart(" (traverse)"); + trace_trans_restart_traverse(c, trans->ip); return ret2; } @@ -780,6 +786,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, return 0; trans_restart(" (atomic)"); + trace_trans_restart_atomic(c, trans->ip); } return ret; diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 58f2589..9f09e5b 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -983,7 +983,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, return -1; } - if (m->alive) + if (!gc && m->alive) bch2_stripes_heap_del(c, m, idx); memset(m, 0, sizeof(*m)); diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index 974daa7..a333b9e 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -97,7 +97,7 @@ struct bch_fs_usage_short { struct replicas_delta { s64 delta; struct bch_replicas_entry r; -}; +} __packed; struct replicas_delta_list { struct bch_fs_usage fs_usage; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index b2ea783..7133482 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -1189,9 +1189,10 @@ static void bch2_writepage_io_done(struct closure *cl) unsigned i; if (io->op.op.error) { - bio_for_each_segment_all(bvec, bio, i) + bio_for_each_segment_all(bvec, bio, i) { SetPageError(bvec->bv_page); - set_bit(AS_EIO, &io->op.inode->v.i_mapping->flags); + mapping_set_error(bvec->bv_page->mapping, -EIO); + } } /* @@ -2079,10 +2080,9 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (ret) return ret; out: - if (c->opts.journal_flush_disabled) - return 0; - - ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq); + if (!c->opts.journal_flush_disabled) + ret = bch2_journal_flush_seq(&c->journal, + inode->ei_journal_seq); ret2 = file_check_and_advance_wb_err(file); return ret ?: ret2; diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index af58d00..dc6c7df 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -395,7 +395,7 @@ retry: if (!tmpfile) { bch2_inode_update_after_write(c, dir, &dir_u, ATTR_MTIME|ATTR_CTIME); - journal_seq_copy(dir, inode->ei_journal_seq); + journal_seq_copy(dir, journal_seq); mutex_unlock(&dir->ei_update_lock); } diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 822b3fc..190b545 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -47,10 +47,6 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH); - mutex_lock(&c->replicas_gc_lock); - bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED)); - - while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { if (!bkey_extent_is_data(k.k) || @@ -96,12 +92,9 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) break; } - BUG_ON(ret == -EINTR); - - bch2_trans_exit(&trans); + ret = bch2_trans_exit(&trans) ?: ret; - bch2_replicas_gc_end(c, ret); - mutex_unlock(&c->replicas_gc_lock); + BUG_ON(ret == -EINTR); return ret; } @@ -122,9 +115,6 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) bch2_trans_init(&trans, c); closure_init_stack(&cl); - mutex_lock(&c->replicas_gc_lock); - bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); - for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH, b) { @@ -177,10 +167,9 @@ retry: ret = 0; err: - bch2_trans_exit(&trans); + ret = bch2_trans_exit(&trans) ?: ret; - ret = bch2_replicas_gc_end(c, ret); - mutex_unlock(&c->replicas_gc_lock); + BUG_ON(ret == -EINTR); return ret; } @@ -188,5 +177,6 @@ err: int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags) { return bch2_dev_usrdata_drop(c, dev_idx, flags) ?: - bch2_dev_metadata_drop(c, dev_idx, flags); + bch2_dev_metadata_drop(c, dev_idx, flags) ?: + bch2_replicas_gc2(c); } diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 946e616..d39f563 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -620,64 +620,6 @@ out: return ret; } -static int bch2_gc_data_replicas(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter *iter; - struct bkey_s_c k; - int ret; - - bch2_trans_init(&trans, c); - - mutex_lock(&c->replicas_gc_lock); - bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED)); - - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - ret = bch2_mark_bkey_replicas(c, k); - if (ret) - break; - } - ret = bch2_trans_exit(&trans) ?: ret; - - bch2_replicas_gc_end(c, ret); - mutex_unlock(&c->replicas_gc_lock); - - return ret; -} - -static int bch2_gc_btree_replicas(struct bch_fs *c) -{ - struct btree_trans trans; - struct btree_iter *iter; - struct btree *b; - unsigned id; - int ret = 0; - - bch2_trans_init(&trans, c); - - mutex_lock(&c->replicas_gc_lock); - bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE); - - for (id = 0; id < BTREE_ID_NR; id++) { - for_each_btree_node(&trans, iter, id, POS_MIN, - BTREE_ITER_PREFETCH, b) { - ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); - - bch2_trans_cond_resched(&trans); - } - - ret = bch2_trans_iter_free(&trans, iter) ?: ret; - } - - bch2_trans_exit(&trans); - - bch2_replicas_gc_end(c, ret); - mutex_unlock(&c->replicas_gc_lock); - - return ret; -} - static int bch2_move_btree(struct bch_fs *c, move_pred_fn pred, void *arg, @@ -804,14 +746,14 @@ int bch2_data_job(struct bch_fs *c, bch2_journal_meta(&c->journal); } - ret = bch2_gc_btree_replicas(c) ?: ret; + ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_move_data(c, NULL, writepoint_hashed((unsigned long) current), op.start, op.end, rereplicate_pred, c, stats) ?: ret; - ret = bch2_gc_data_replicas(c) ?: ret; + ret = bch2_replicas_gc2(c) ?: ret; break; case BCH_DATA_OP_MIGRATE: if (op.migrate.dev >= c->sb.nr_devices) @@ -821,14 +763,14 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret; - ret = bch2_gc_btree_replicas(c) ?: ret; + ret = bch2_replicas_gc2(c) ?: ret; ret = bch2_move_data(c, NULL, writepoint_hashed((unsigned long) current), op.start, op.end, migrate_pred, &op, stats) ?: ret; - ret = bch2_gc_data_replicas(c) ?: ret; + ret = bch2_replicas_gc2(c) ?: ret; break; default: ret = -EINVAL; diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index a95e144..390bf92 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -219,7 +219,7 @@ enum opt_type { x(fsck, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ - NO_SB_OPT, true, \ + NO_SB_OPT, false, \ NULL, "Run fsck on mount") \ x(fix_errors, u8, \ OPT_MOUNT, \ diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index eb44119..319c7dc 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -567,6 +567,64 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) return 0; } +int bch2_replicas_gc2(struct bch_fs *c) +{ + struct bch_replicas_cpu new = { 0 }; + unsigned i, nr; + int ret = 0; + + bch2_journal_meta(&c->journal); +retry: + nr = READ_ONCE(c->replicas.nr); + new.entry_size = READ_ONCE(c->replicas.entry_size); + new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); + if (!new.entries) + return -ENOMEM; + + mutex_lock(&c->sb_lock); + percpu_down_write(&c->mark_lock); + + if (nr != c->replicas.nr || + new.entry_size != c->replicas.entry_size) { + percpu_up_write(&c->mark_lock); + mutex_unlock(&c->sb_lock); + kfree(new.entries); + goto retry; + } + + for (i = 0; i < c->replicas.nr; i++) { + struct bch_replicas_entry *e = + cpu_replicas_entry(&c->replicas, i); + + if (e->data_type == BCH_DATA_JOURNAL || + c->usage_base->replicas[i] || + percpu_u64_get(&c->usage[0]->replicas[i]) || + percpu_u64_get(&c->usage[1]->replicas[i])) + memcpy(cpu_replicas_entry(&new, new.nr++), + e, new.entry_size); + } + + bch2_cpu_replicas_sort(&new); + + if (bch2_cpu_replicas_to_sb_replicas(c, &new)) { + ret = -ENOSPC; + goto err; + } + + ret = replicas_table_update(c, &new); +err: + kfree(new.entries); + + percpu_up_write(&c->mark_lock); + + if (!ret) + bch2_write_super(c); + + mutex_unlock(&c->sb_lock); + + return ret; +} + int bch2_replicas_set_usage(struct bch_fs *c, struct bch_replicas_entry *r, u64 sectors) diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index 2ffafad..bca82e0 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -58,6 +58,7 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *); int bch2_replicas_gc_end(struct bch_fs *, int); int bch2_replicas_gc_start(struct bch_fs *, unsigned); +int bch2_replicas_gc2(struct bch_fs *); int bch2_replicas_set_usage(struct bch_fs *, struct bch_replicas_entry *, diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 0cbc7ee..aa3adbf 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -735,9 +735,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) sizeof(struct btree_node_iter_set); if (!(c->wq = alloc_workqueue("bcachefs", - WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || + WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || !(c->copygc_wq = alloc_workqueue("bcache_copygc", - WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || + WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || percpu_ref_init(&c->writes, bch2_writes_disabled, @@ -925,7 +925,8 @@ err: break; } - BUG_ON(!ret); + if (ret >= 0) + ret = -EIO; goto out; } diff --git a/libbcachefs/util.c b/libbcachefs/util.c index fea80e2..94dd651 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -142,10 +142,10 @@ void bch2_flags_to_text(struct printbuf *out, nr++; while (flags && (bit = __ffs(flags)) < nr) { - pr_buf(out, "%s", list[bit]); if (!first) pr_buf(out, ","); first = false; + pr_buf(out, "%s", list[bit]); flags ^= 1 << bit; } } -- 2.39.5