]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 454bd4f82d bcachefs: Fix for the stripes mark path and gc
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 8 May 2019 23:13:46 +0000 (19:13 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Wed, 8 May 2019 23:13:46 +0000 (19:13 -0400)
17 files changed:
.bcachefs_revision
include/trace/events/bcachefs.h
libbcachefs/btree_cache.c
libbcachefs/btree_iter.c
libbcachefs/btree_types.h
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/buckets_types.h
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/migrate.c
libbcachefs/move.c
libbcachefs/opts.h
libbcachefs/replicas.c
libbcachefs/replicas.h
libbcachefs/super.c
libbcachefs/util.c

index 666368e80f8d2961ec3e99c947940c604ddc88a7..def87375399990b1c6d54ac8a8da8cffb86c38fc 100644 (file)
@@ -1 +1 @@
-6f603b8d79efa7d9ac04ea0c38ef1bbaa10fd678
+454bd4f82d85bb42a86b8eb0172b13e86e5788a7
index 6781a5b589531bbd4ae7a8fd793ca769fc85d728..5fa570a516a70ca8928da22f43d2282ded0b4b8f 100644 (file)
@@ -498,6 +498,78 @@ TRACE_EVENT(copygc,
                __entry->buckets_moved, __entry->buckets_not_moved)
 );
 
+DECLARE_EVENT_CLASS(transaction_restart,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip),
+
+       TP_STRUCT__entry(
+               __array(char,                   name,   16)
+               __field(unsigned long,          ip      )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->name, c->name, 16);
+               __entry->ip = ip;
+       ),
+
+       TP_printk("%pf", (void *) __entry->ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_btree_node_reused,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_would_deadlock,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_iters_realloced,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_mem_realloced,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_journal_res_get,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_journal_preres_get,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_mark_replicas,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_fault_inject,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_btree_node_split,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_traverse,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart,      trans_restart_atomic,
+       TP_PROTO(struct bch_fs *c, unsigned long ip),
+       TP_ARGS(c, ip)
+);
+
 #endif /* _TRACE_BCACHE_H */
 
 /* This part must be outside protection */
index 074ea6f16c9dc42432f65e65f89ec1257e20cf89..2a20bdef8fd090c624ef6548d10b40f26d0f8033 100644 (file)
@@ -732,6 +732,8 @@ retry:
                                goto retry;
 
                        trans_restart();
+                       trace_trans_restart_btree_node_reused(c,
+                                               iter->trans->ip);
                        return ERR_PTR(-EINTR);
                }
        }
index 49ddf05cc9a9522d8312c4ccff8d8aabaef485fd..5631f98f38456df739b8e28852da2b9e212c694b 100644 (file)
@@ -251,12 +251,15 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
                }
        }
 
-       if (ret)
-               __btree_node_lock_type(iter->trans->c, b, type);
-       else
+       if (unlikely(!ret)) {
                trans_restart();
+               trace_trans_restart_would_deadlock(iter->trans->c,
+                                                  iter->trans->ip);
+               return false;
+       }
 
-       return ret;
+       __btree_node_lock_type(iter->trans->c, b, type);
+       return true;
 }
 
 /* Btree iterator locking: */
@@ -1692,6 +1695,7 @@ success:
 
        if (trans->iters_live) {
                trans_restart();
+               trace_trans_restart_iters_realloced(trans->c, trans->ip);
                return -EINTR;
        }
 
@@ -1859,6 +1863,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans,
 
                if (old_bytes) {
                        trans_restart();
+                       trace_trans_restart_mem_realloced(trans->c, trans->ip);
                        return ERR_PTR(-EINTR);
                }
        }
@@ -1935,6 +1940,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c)
        memset(trans, 0, offsetof(struct btree_trans, iters_onstack));
 
        trans->c                = c;
+       trans->ip               = _RET_IP_;
        trans->size             = ARRAY_SIZE(trans->iters_onstack);
        trans->iters            = trans->iters_onstack;
        trans->updates          = trans->updates_onstack;
index ae273ab7aa1a575afa1e2abb689ef96fe1594206..57ef50142ee11395f27d30ba4043049a64c1bdd4 100644 (file)
@@ -268,6 +268,7 @@ struct btree_insert_entry {
 
 struct btree_trans {
        struct bch_fs           *c;
+       unsigned long           ip;
        size_t                  nr_restarts;
        u64                     commit_start;
 
index d052ca541965b8cc71f5697dbca645996d1ac032..dde1fc1f97a2f9cecf2438671e621040a5443198 100644 (file)
@@ -439,6 +439,7 @@ static int bch2_trans_journal_preres_get(struct btree_trans *trans)
 
        if (!bch2_btree_trans_relock(trans)) {
                trans_restart(" (iter relock after journal preres get blocked)");
+               trace_trans_restart_journal_preres_get(c, trans->ip);
                return -EINTR;
        }
 
@@ -575,6 +576,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        if (race_fault()) {
                ret = -EINTR;
                trans_restart(" (race)");
+               trace_trans_restart_fault_inject(c, trans->ip);
                goto out;
        }
 
@@ -725,6 +727,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                 */
                if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
                        trans_restart(" (split)");
+                       trace_trans_restart_btree_node_split(c, trans->ip);
                        ret = -EINTR;
                }
                break;
@@ -744,6 +747,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                        return 0;
 
                trans_restart(" (iter relock after marking replicas)");
+               trace_trans_restart_mark_replicas(c, trans->ip);
                ret = -EINTR;
                break;
        case BTREE_INSERT_NEED_JOURNAL_RES:
@@ -757,6 +761,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                        return 0;
 
                trans_restart(" (iter relock after journal res get blocked)");
+               trace_trans_restart_journal_res_get(c, trans->ip);
                ret = -EINTR;
                break;
        default:
@@ -769,6 +774,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
 
                if (ret2) {
                        trans_restart(" (traverse)");
+                       trace_trans_restart_traverse(c, trans->ip);
                        return ret2;
                }
 
@@ -780,6 +786,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                        return 0;
 
                trans_restart(" (atomic)");
+               trace_trans_restart_atomic(c, trans->ip);
        }
 
        return ret;
index 58f25894d48a2735c0a9fce66d1a2f747da9b1d1..9f09e5be09378958332d10f4b2a6bc5ccc5dd193 100644 (file)
@@ -983,7 +983,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
                return -1;
        }
 
-       if (m->alive)
+       if (!gc && m->alive)
                bch2_stripes_heap_del(c, m, idx);
 
        memset(m, 0, sizeof(*m));
index 974daa7ef2d3d665f47383c2e58aae877a823851..a333b9ec930277e8c8eaa4a634f16ab6af4909cc 100644 (file)
@@ -97,7 +97,7 @@ struct bch_fs_usage_short {
 struct replicas_delta {
        s64                     delta;
        struct bch_replicas_entry r;
-};
+} __packed;
 
 struct replicas_delta_list {
        struct bch_fs_usage     fs_usage;
index b2ea783fe38afc29855b47eca4803b84cf25e9f3..7133482eeab6b1f75db098c5701e8cd8564aa4b6 100644 (file)
@@ -1189,9 +1189,10 @@ static void bch2_writepage_io_done(struct closure *cl)
        unsigned i;
 
        if (io->op.op.error) {
-               bio_for_each_segment_all(bvec, bio, i)
+               bio_for_each_segment_all(bvec, bio, i) {
                        SetPageError(bvec->bv_page);
-               set_bit(AS_EIO, &io->op.inode->v.i_mapping->flags);
+                       mapping_set_error(bvec->bv_page->mapping, -EIO);
+               }
        }
 
        /*
@@ -2079,10 +2080,9 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        if (ret)
                return ret;
 out:
-       if (c->opts.journal_flush_disabled)
-               return 0;
-
-       ret = bch2_journal_flush_seq(&c->journal, inode->ei_journal_seq);
+       if (!c->opts.journal_flush_disabled)
+               ret = bch2_journal_flush_seq(&c->journal,
+                                            inode->ei_journal_seq);
        ret2 = file_check_and_advance_wb_err(file);
 
        return ret ?: ret2;
index af58d00f33157e632d6a106f3a69ff256054c70f..dc6c7dfbfcf68040c648f9281604fa2b5eb97f5c 100644 (file)
@@ -395,7 +395,7 @@ retry:
        if (!tmpfile) {
                bch2_inode_update_after_write(c, dir, &dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
-               journal_seq_copy(dir, inode->ei_journal_seq);
+               journal_seq_copy(dir, journal_seq);
                mutex_unlock(&dir->ei_update_lock);
        }
 
index 822b3fce09b4cd50687707a5db6b6f5c5a70a9d8..190b545b5e456d92748ae11322dc0dab197de6c1 100644 (file)
@@ -47,10 +47,6 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   POS_MIN, BTREE_ITER_PREFETCH);
 
-       mutex_lock(&c->replicas_gc_lock);
-       bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
-
-
        while ((k = bch2_btree_iter_peek(iter)).k &&
               !(ret = bkey_err(k))) {
                if (!bkey_extent_is_data(k.k) ||
@@ -96,12 +92,9 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
                        break;
        }
 
-       BUG_ON(ret == -EINTR);
-
-       bch2_trans_exit(&trans);
+       ret = bch2_trans_exit(&trans) ?: ret;
 
-       bch2_replicas_gc_end(c, ret);
-       mutex_unlock(&c->replicas_gc_lock);
+       BUG_ON(ret == -EINTR);
 
        return ret;
 }
@@ -122,9 +115,6 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
        bch2_trans_init(&trans, c);
        closure_init_stack(&cl);
 
-       mutex_lock(&c->replicas_gc_lock);
-       bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
-
        for (id = 0; id < BTREE_ID_NR; id++) {
                for_each_btree_node(&trans, iter, id, POS_MIN,
                                    BTREE_ITER_PREFETCH, b) {
@@ -177,10 +167,9 @@ retry:
 
        ret = 0;
 err:
-       bch2_trans_exit(&trans);
+       ret = bch2_trans_exit(&trans) ?: ret;
 
-       ret = bch2_replicas_gc_end(c, ret);
-       mutex_unlock(&c->replicas_gc_lock);
+       BUG_ON(ret == -EINTR);
 
        return ret;
 }
@@ -188,5 +177,6 @@ err:
 int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 {
        return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
-               bch2_dev_metadata_drop(c, dev_idx, flags);
+               bch2_dev_metadata_drop(c, dev_idx, flags) ?:
+               bch2_replicas_gc2(c);
 }
index 946e616228c96e7b5388f85ac5c53ee3686c72ff..d39f5633a8245517028431c68026197df12797f5 100644 (file)
@@ -620,64 +620,6 @@ out:
        return ret;
 }
 
-static int bch2_gc_data_replicas(struct bch_fs *c)
-{
-       struct btree_trans trans;
-       struct btree_iter *iter;
-       struct bkey_s_c k;
-       int ret;
-
-       bch2_trans_init(&trans, c);
-
-       mutex_lock(&c->replicas_gc_lock);
-       bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
-
-       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN,
-                          BTREE_ITER_PREFETCH, k, ret) {
-               ret = bch2_mark_bkey_replicas(c, k);
-               if (ret)
-                       break;
-       }
-       ret = bch2_trans_exit(&trans) ?: ret;
-
-       bch2_replicas_gc_end(c, ret);
-       mutex_unlock(&c->replicas_gc_lock);
-
-       return ret;
-}
-
-static int bch2_gc_btree_replicas(struct bch_fs *c)
-{
-       struct btree_trans trans;
-       struct btree_iter *iter;
-       struct btree *b;
-       unsigned id;
-       int ret = 0;
-
-       bch2_trans_init(&trans, c);
-
-       mutex_lock(&c->replicas_gc_lock);
-       bch2_replicas_gc_start(c, 1 << BCH_DATA_BTREE);
-
-       for (id = 0; id < BTREE_ID_NR; id++) {
-               for_each_btree_node(&trans, iter, id, POS_MIN,
-                                   BTREE_ITER_PREFETCH, b) {
-                       ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
-
-                       bch2_trans_cond_resched(&trans);
-               }
-
-               ret = bch2_trans_iter_free(&trans, iter) ?: ret;
-       }
-
-       bch2_trans_exit(&trans);
-
-       bch2_replicas_gc_end(c, ret);
-       mutex_unlock(&c->replicas_gc_lock);
-
-       return ret;
-}
-
 static int bch2_move_btree(struct bch_fs *c,
                           move_pred_fn pred,
                           void *arg,
@@ -804,14 +746,14 @@ int bch2_data_job(struct bch_fs *c,
                        bch2_journal_meta(&c->journal);
                }
 
-               ret = bch2_gc_btree_replicas(c) ?: ret;
+               ret = bch2_replicas_gc2(c) ?: ret;
 
                ret = bch2_move_data(c, NULL,
                                     writepoint_hashed((unsigned long) current),
                                     op.start,
                                     op.end,
                                     rereplicate_pred, c, stats) ?: ret;
-               ret = bch2_gc_data_replicas(c) ?: ret;
+               ret = bch2_replicas_gc2(c) ?: ret;
                break;
        case BCH_DATA_OP_MIGRATE:
                if (op.migrate.dev >= c->sb.nr_devices)
@@ -821,14 +763,14 @@ int bch2_data_job(struct bch_fs *c,
                ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
 
                ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
-               ret = bch2_gc_btree_replicas(c) ?: ret;
+               ret = bch2_replicas_gc2(c) ?: ret;
 
                ret = bch2_move_data(c, NULL,
                                     writepoint_hashed((unsigned long) current),
                                     op.start,
                                     op.end,
                                     migrate_pred, &op, stats) ?: ret;
-               ret = bch2_gc_data_replicas(c) ?: ret;
+               ret = bch2_replicas_gc2(c) ?: ret;
                break;
        default:
                ret = -EINVAL;
index a95e1447e85df7151f1c011b5a4f7be39e3c187a..390bf92154313f299b60b5ba5a47581973634f90 100644 (file)
@@ -219,7 +219,7 @@ enum opt_type {
        x(fsck,                         u8,                             \
          OPT_MOUNT,                                                    \
          OPT_BOOL(),                                                   \
-         NO_SB_OPT,                    true,                           \
+         NO_SB_OPT,                    false,                          \
          NULL,         "Run fsck on mount")                            \
        x(fix_errors,                   u8,                             \
          OPT_MOUNT,                                                    \
index eb44119466d8653a007abecca2be3943688615ff..319c7dc4ef9a46337ffc8f16a08147086cd7e894 100644 (file)
@@ -567,6 +567,64 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
        return 0;
 }
 
+int bch2_replicas_gc2(struct bch_fs *c)
+{
+       struct bch_replicas_cpu new = { 0 };
+       unsigned i, nr;
+       int ret = 0;
+
+       bch2_journal_meta(&c->journal);
+retry:
+       nr              = READ_ONCE(c->replicas.nr);
+       new.entry_size  = READ_ONCE(c->replicas.entry_size);
+       new.entries     = kcalloc(nr, new.entry_size, GFP_KERNEL);
+       if (!new.entries)
+               return -ENOMEM;
+
+       mutex_lock(&c->sb_lock);
+       percpu_down_write(&c->mark_lock);
+
+       if (nr                  != c->replicas.nr ||
+           new.entry_size      != c->replicas.entry_size) {
+               percpu_up_write(&c->mark_lock);
+               mutex_unlock(&c->sb_lock);
+               kfree(new.entries);
+               goto retry;
+       }
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+
+               if (e->data_type == BCH_DATA_JOURNAL ||
+                   c->usage_base->replicas[i] ||
+                   percpu_u64_get(&c->usage[0]->replicas[i]) ||
+                   percpu_u64_get(&c->usage[1]->replicas[i]))
+                       memcpy(cpu_replicas_entry(&new, new.nr++),
+                              e, new.entry_size);
+       }
+
+       bch2_cpu_replicas_sort(&new);
+
+       if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
+               ret = -ENOSPC;
+               goto err;
+       }
+
+       ret = replicas_table_update(c, &new);
+err:
+       kfree(new.entries);
+
+       percpu_up_write(&c->mark_lock);
+
+       if (!ret)
+               bch2_write_super(c);
+
+       mutex_unlock(&c->sb_lock);
+
+       return ret;
+}
+
 int bch2_replicas_set_usage(struct bch_fs *c,
                            struct bch_replicas_entry *r,
                            u64 sectors)
index 2ffafad7c6314b37ba26693005865d3951233c5e..bca82e04e28dfc1fa66857fb970b37e22895414c 100644 (file)
@@ -58,6 +58,7 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
 
 int bch2_replicas_gc_end(struct bch_fs *, int);
 int bch2_replicas_gc_start(struct bch_fs *, unsigned);
+int bch2_replicas_gc2(struct bch_fs *);
 
 int bch2_replicas_set_usage(struct bch_fs *,
                            struct bch_replicas_entry *,
index 0cbc7eedd72866ef1150e1d8576522ed6f39e7e4..aa3adbf2babf11a95f62acbe3ad969b8c2358ea9 100644 (file)
@@ -735,9 +735,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                sizeof(struct btree_node_iter_set);
 
        if (!(c->wq = alloc_workqueue("bcachefs",
-                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
+                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
            !(c->copygc_wq = alloc_workqueue("bcache_copygc",
-                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
+                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
            !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
                                WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
            percpu_ref_init(&c->writes, bch2_writes_disabled,
@@ -925,7 +925,8 @@ err:
                break;
        }
 
-       BUG_ON(!ret);
+       if (ret >= 0)
+               ret = -EIO;
        goto out;
 }
 
index fea80e248667c5efc3786aeb1676c89ad7cca218..94dd651949caeb1982f7abd101ee323dddf4748e 100644 (file)
@@ -142,10 +142,10 @@ void bch2_flags_to_text(struct printbuf *out,
                nr++;
 
        while (flags && (bit = __ffs(flags)) < nr) {
-               pr_buf(out, "%s", list[bit]);
                if (!first)
                        pr_buf(out, ",");
                first = false;
+               pr_buf(out, "%s", list[bit]);
                flags ^= 1 << bit;
        }
 }