]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 8c94740b1bf8 bcachefs: Add missing vaidation for jset_entr...
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 26 Nov 2023 02:51:30 +0000 (21:51 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 26 Nov 2023 02:51:30 +0000 (21:51 -0500)
36 files changed:
.bcachefs_revision
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/bcachefs_ioctl.h
libbcachefs/btree_gc.c
libbcachefs/btree_trans_commit.c
libbcachefs/btree_write_buffer.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/buckets_types.h
libbcachefs/chardev.c
libbcachefs/compress.c
libbcachefs/data_update.c
libbcachefs/data_update.h
libbcachefs/errcode.h
libbcachefs/extents.c
libbcachefs/io_read.c
libbcachefs/journal_io.c
libbcachefs/lru.c
libbcachefs/lru.h
libbcachefs/move.c
libbcachefs/move.h
libbcachefs/movinggc.c
libbcachefs/rebalance.c
libbcachefs/recovery.c
libbcachefs/replicas.c
libbcachefs/replicas.h
libbcachefs/sb-clean.c
libbcachefs/sb-members.c
libbcachefs/super-io.c
libbcachefs/super.c
libbcachefs/sysfs.c
libbcachefs/trace.h

index 4649f2ba299a4818220296f95d8e446a0775ded3..97936a157a1fee58056954ef7b85a20208ac0115 100644 (file)
@@ -1 +1 @@
-783085c3cc440183ba5e987b1aa7791cc1ca42ba
+8c94740b1bf8645d3398170f41c9c88b78332252
index 1ed8506c33c7385e20d775a94da34748a29e92e3..56a18ace8cf32fe9abc034128fb4b6a0d3acffcd 100644 (file)
@@ -261,10 +261,8 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
        case BCH_DATA_free:
        case BCH_DATA_need_gc_gens:
        case BCH_DATA_need_discard:
-               bkey_fsck_err_on(a.v->dirty_sectors ||
-                                a.v->cached_sectors ||
-                                a.v->stripe, c, err,
-                                alloc_key_empty_but_have_data,
+               bkey_fsck_err_on(bch2_bucket_sectors(*a.v) || a.v->stripe,
+                                c, err, alloc_key_empty_but_have_data,
                                 "empty data type free but have data");
                break;
        case BCH_DATA_sb:
@@ -272,22 +270,21 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
        case BCH_DATA_btree:
        case BCH_DATA_user:
        case BCH_DATA_parity:
-               bkey_fsck_err_on(!a.v->dirty_sectors, c, err,
-                                alloc_key_dirty_sectors_0,
+               bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v),
+                                c, err, alloc_key_dirty_sectors_0,
                                 "data_type %s but dirty_sectors==0",
                                 bch2_data_types[a.v->data_type]);
                break;
        case BCH_DATA_cached:
                bkey_fsck_err_on(!a.v->cached_sectors ||
-                                a.v->dirty_sectors ||
-                                a.v->stripe, c, err,
-                                alloc_key_cached_inconsistency,
+                                bch2_bucket_sectors_dirty(*a.v) ||
+                                a.v->stripe,
+                                c, err, alloc_key_cached_inconsistency,
                                 "data type inconsistency");
 
                bkey_fsck_err_on(!a.v->io_time[READ] &&
                                 c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
-                                c, err,
-                                alloc_key_cached_but_read_time_zero,
+                                c, err, alloc_key_cached_but_read_time_zero,
                                 "cached bucket with read_time == 0");
                break;
        case BCH_DATA_stripe:
@@ -790,8 +787,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
 
        new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
 
-       if (new_a->dirty_sectors > old_a->dirty_sectors ||
-           new_a->cached_sectors > old_a->cached_sectors) {
+       if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) {
                new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
                new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
                SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
@@ -1509,6 +1505,27 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
        if (a->data_type != BCH_DATA_cached)
                return 0;
 
+       if (fsck_err_on(!a->io_time[READ], c,
+                       alloc_key_cached_but_read_time_zero,
+                       "cached bucket with read_time 0\n"
+                       "  %s",
+               (printbuf_reset(&buf),
+                bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+               struct bkey_i_alloc_v4 *a_mut =
+                       bch2_alloc_to_v4_mut(trans, alloc_k);
+               ret = PTR_ERR_OR_ZERO(a_mut);
+               if (ret)
+                       goto err;
+
+               a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
+               ret = bch2_trans_update(trans, alloc_iter,
+                                       &a_mut->k_i, BTREE_TRIGGER_NORUN);
+               if (ret)
+                       goto err;
+
+               a = &a_mut->v;
+       }
+
        lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
                             lru_pos(alloc_k.k->p.inode,
                                     bucket_to_u64(alloc_k.k->p),
@@ -1517,41 +1534,18 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       if (fsck_err_on(!a->io_time[READ], c,
-                       alloc_key_cached_but_read_time_zero,
-                       "cached bucket with read_time 0\n"
-                       "  %s",
-               (printbuf_reset(&buf),
-                bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
-           fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
+       if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
                        alloc_key_to_missing_lru_entry,
                        "missing lru entry\n"
                        "  %s",
                        (printbuf_reset(&buf),
                         bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
-               u64 read_time = a->io_time[READ] ?:
-                       atomic64_read(&c->io_clock[READ].now);
-
                ret = bch2_lru_set(trans,
                                   alloc_k.k->p.inode,
                                   bucket_to_u64(alloc_k.k->p),
-                                  read_time);
+                                  a->io_time[READ]);
                if (ret)
                        goto err;
-
-               if (a->io_time[READ] != read_time) {
-                       struct bkey_i_alloc_v4 *a_mut =
-                               bch2_alloc_to_v4_mut(trans, alloc_k);
-                       ret = PTR_ERR_OR_ZERO(a_mut);
-                       if (ret)
-                               goto err;
-
-                       a_mut->v.io_time[READ] = read_time;
-                       ret = bch2_trans_update(trans, alloc_iter,
-                                               &a_mut->k_i, BTREE_TRIGGER_NORUN);
-                       if (ret)
-                               goto err;
-               }
        }
 err:
 fsck_err:
@@ -1564,15 +1558,13 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
-       int ret = 0;
 
-       ret = bch2_trans_run(c,
+       int ret = bch2_trans_run(c,
                for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
                                POS_MIN, BTREE_ITER_PREFETCH, k,
                                NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
                        bch2_check_alloc_to_lru_ref(trans, &iter)));
-       if (ret)
-               bch_err_fn(c, ret);
+       bch_err_fn(c, ret);
        return ret;
 }
 
@@ -1734,28 +1726,25 @@ void bch2_do_discards(struct bch_fs *c)
 static int invalidate_one_bucket(struct btree_trans *trans,
                                 struct btree_iter *lru_iter,
                                 struct bkey_s_c lru_k,
+                                struct bpos *last_flushed_pos,
                                 s64 *nr_to_invalidate)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter alloc_iter = { NULL };
-       struct bkey_i_alloc_v4 *a = NULL;
-       struct printbuf buf = PRINTBUF;
-       struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
-       unsigned cached_sectors;
        int ret = 0;
 
        if (*nr_to_invalidate <= 0)
                return 1;
 
-       if (!bch2_dev_bucket_exists(c, bucket)) {
-               prt_str(&buf, "lru entry points to invalid bucket");
-               goto err;
-       }
+       ret = bch2_check_lru_key(trans, lru_iter, lru_k, last_flushed_pos);
+       if (ret)
+               return ret < 0 ? ret : 0;
 
+       struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
        if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
                return 0;
 
-       a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
+       struct btree_iter alloc_iter;
+       struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
        ret = PTR_ERR_OR_ZERO(a);
        if (ret)
                goto out;
@@ -1769,7 +1758,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
        if (!a->v.cached_sectors)
                bch_err(c, "invalidating empty bucket, confused");
 
-       cached_sectors = a->v.cached_sectors;
+       unsigned cached_sectors = a->v.cached_sectors;
 
        SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
        a->v.gen++;
@@ -1791,28 +1780,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
        --*nr_to_invalidate;
 out:
        bch2_trans_iter_exit(trans, &alloc_iter);
-       printbuf_exit(&buf);
        return ret;
-err:
-       prt_str(&buf, "\n  lru key: ");
-       bch2_bkey_val_to_text(&buf, c, lru_k);
-
-       prt_str(&buf, "\n  lru entry: ");
-       bch2_lru_pos_to_text(&buf, lru_iter->pos);
-
-       prt_str(&buf, "\n  alloc key: ");
-       if (!a)
-               bch2_bpos_to_text(&buf, bucket);
-       else
-               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
-
-       bch_err(c, "%s", buf.buf);
-       if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) {
-               bch2_inconsistent_error(c);
-               ret = -EINVAL;
-       }
-
-       goto out;
 }
 
 static void bch2_do_invalidates_work(struct work_struct *work)
@@ -1822,6 +1790,7 @@ static void bch2_do_invalidates_work(struct work_struct *work)
        struct btree_trans *trans = bch2_trans_get(c);
        struct btree_iter iter;
        struct bkey_s_c k;
+       struct bpos last_flushed_pos = POS_MIN;
        unsigned i;
        int ret = 0;
 
@@ -1837,7 +1806,8 @@ static void bch2_do_invalidates_work(struct work_struct *work)
                                lru_pos(ca->dev_idx, 0, 0),
                                lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
                                BTREE_ITER_INTENT, k,
-                       invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
+                       invalidate_one_bucket(trans, &iter, k, &last_flushed_pos,
+                                             &nr_to_invalidate));
 
                if (ret < 0) {
                        percpu_ref_put(&ca->ref);
index 73faf99a222aac3b33035432666e4d9b272c6fe9..72bb8491fe743042b81aed50d72afc4ee77f02c1 100644 (file)
@@ -71,6 +71,24 @@ static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
        return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
 }
 
+static inline unsigned bch2_bucket_sectors(struct bch_alloc_v4 a)
+{
+       return a.dirty_sectors + a.cached_sectors;
+}
+
+static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a)
+{
+       return a.dirty_sectors;
+}
+
+static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca,
+                                                struct bch_alloc_v4 a)
+{
+       unsigned d = bch2_bucket_sectors_dirty(a);
+
+       return d ? max(0U, ca->mi.bucket_size - d) : 0;
+}
+
 static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
 {
        return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
@@ -90,10 +108,11 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
                                              struct bch_dev *ca)
 {
        if (!data_type_movable(a.data_type) ||
-           a.dirty_sectors >= ca->mi.bucket_size)
+           !bch2_bucket_sectors_fragmented(ca, a))
                return 0;
 
-       return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
+       u64 d = bch2_bucket_sectors_dirty(a);
+       return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
 }
 
 static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a)
index eef6fa8d0f9fdf715b6df4d39c158b5824138713..1ba0eeb7552a2795d033d2603cccce611c8f3381 100644 (file)
@@ -1345,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
        int ret;
        int i;
 
+       if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
+               erasure_code = false;
+
        BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS);
 
        BUG_ON(!nr_replicas || !nr_replicas_required);
index 53f93f03f0885f8b27d5959041a2b8b05cf9a555..295efeda12ff8eaaf43e6c88aa9aabea138fb548 100644 (file)
@@ -935,7 +935,7 @@ struct bch_fs {
        mempool_t               compression_bounce[2];
        mempool_t               compress_workspace[BCH_COMPRESSION_TYPE_NR];
        mempool_t               decompress_workspace;
-       ZSTD_parameters         zstd_params;
+       size_t                  zstd_workspace_size;
 
        struct crypto_shash     *sha256;
        struct crypto_sync_skcipher *chacha20;
index ad0f298c87c39e1420ad31b00a57b371b87af37d..967780072f69c18ad5be64cbee0a114e129a0c94 100644 (file)
@@ -151,7 +151,11 @@ struct bpos {
 #else
 #error edit for your odd byteorder.
 #endif
-} __packed __aligned(4);
+} __packed
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+__aligned(4)
+#endif
+;
 
 #define KEY_INODE_MAX                  ((__u64)~0ULL)
 #define KEY_OFFSET_MAX                 ((__u64)~0ULL)
@@ -2203,8 +2207,8 @@ struct jset_entry_dev_usage {
        __le32                  dev;
        __u32                   pad;
 
-       __le64                  buckets_ec;
-       __le64                  _buckets_unavailable; /* No longer used */
+       __le64                  _buckets_ec;            /* No longer used */
+       __le64                  _buckets_unavailable;   /* No longer used */
 
        struct jset_entry_dev_usage_type d[];
 };
index 18eb325401cf647662ed0b2ec1d1d623669214ae..44ba7a87aea726ee5626d6a86957eb37cd1c5c58 100644 (file)
@@ -81,6 +81,8 @@ struct bch_ioctl_incremental {
 #define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc,  16,  struct bch_ioctl_subvolume)
 #define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17,  struct bch_ioctl_subvolume)
 
+#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc,     18, struct bch_ioctl_dev_usage_v2)
+
 /* ioctl below act on a particular file, not the filesystem as a whole: */
 
 #define BCHFS_IOC_REINHERIT_ATTRS      _IOR(0xbc, 64, const char __user *)
@@ -298,7 +300,20 @@ struct bch_ioctl_dev_usage {
                __u64           buckets;
                __u64           sectors;
                __u64           fragmented;
-       }                       d[BCH_DATA_NR];
+       }                       d[10];
+};
+
+struct bch_ioctl_dev_usage_v2 {
+       __u64                   dev;
+       __u32                   flags;
+       __u8                    state;
+       __u8                    nr_data_types;
+       __u8                    pad[6];
+
+       __u32                   bucket_size;
+       __u64                   nr_buckets;
+
+       struct bch_ioctl_dev_usage_type  d[0];
 };
 
 /*
index 7e5d52f8ffd718d89b27cc4e8578d9e3ee5a7bf2..90f5bcfa3c4f2acaf0fc8e2b96b03c04e8e9b920 100644 (file)
@@ -1254,9 +1254,6 @@ static int bch2_gc_done(struct bch_fs *c,
                        copy_dev_field(dev_usage_fragmented_wrong,
                                       d[i].fragmented, "%s fragmented", bch2_data_types[i]);
                }
-
-               copy_dev_field(dev_usage_buckets_ec_wrong,
-                              buckets_ec,              "buckets_ec");
        }
 
        {
index 09e94cc460ef9d8d39115b305dce1437d118b763..7210d5c22c986a4bc9bbc2ca442fc535d4a914d2 100644 (file)
@@ -361,7 +361,6 @@ noinline static int
 btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
                                     struct btree_path *path, unsigned new_u64s)
 {
-       struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
        struct bkey_cached *ck = (void *) path->l[0].b;
        struct bkey_i *new_k;
@@ -372,7 +371,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
 
        new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
        if (!new_k) {
-               bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
+               bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u",
                        bch2_btree_id_str(path->btree_id), new_u64s);
                return -BCH_ERR_ENOMEM_btree_key_cache_insert;
        }
index 0c2db1fab38acabd96253931bb7acd8935e7966a..d3c38d2c008cf42507706635ade392a90d71944c 100644 (file)
@@ -29,14 +29,12 @@ static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_re
 #ifdef CONFIG_X86_64
        int cmp;
 
-       asm(".intel_syntax noprefix;"
-           "mov rax, [%[l]];"
-           "sub rax, [%[r]];"
-           "mov rax, [%[l] + 8];"
-           "sbb rax, [%[r] + 8];"
-           "mov rax, [%[l] + 16];"
-           "sbb rax, [%[r] + 16];"
-           ".att_syntax prefix;"
+       asm("mov   (%[l]), %%rax;"
+           "sub   (%[r]), %%rax;"
+           "mov  8(%[l]), %%rax;"
+           "sbb  8(%[r]), %%rax;"
+           "mov 16(%[l]), %%rax;"
+           "sbb 16(%[r]), %%rax;"
            : "=@ccae" (cmp)
            : [l] "r" (l), [r] "r" (r)
            : "rax", "cc");
@@ -297,7 +295,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
                        struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
 
                        skipped++;
-                       n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);;
+                       n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
                        k->journal_seq = 0;
                        continue;
                }
index 5bfa102a0438a33ab691fc0a73f8196b0f584cf2..50eb6ba2f64b5eb88eecf43eb72716dc08ebb46d 100644 (file)
@@ -277,12 +277,28 @@ void bch2_dev_usage_init(struct bch_dev *ca)
        ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
 }
 
-static inline int bucket_sectors_fragmented(struct bch_dev *ca,
-                                           struct bch_alloc_v4 a)
+void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
 {
-       return a.dirty_sectors
-               ? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors)
-               : 0;
+       prt_tab(out);
+       prt_str(out, "buckets");
+       prt_tab_rjust(out);
+       prt_str(out, "sectors");
+       prt_tab_rjust(out);
+       prt_str(out, "fragmented");
+       prt_tab_rjust(out);
+       prt_newline(out);
+
+       for (unsigned i = 0; i < BCH_DATA_NR; i++) {
+               prt_str(out, bch2_data_types[i]);
+               prt_tab(out);
+               prt_u64(out, usage->d[i].buckets);
+               prt_tab_rjust(out);
+               prt_u64(out, usage->d[i].sectors);
+               prt_tab_rjust(out);
+               prt_u64(out, usage->d[i].fragmented);
+               prt_tab_rjust(out);
+               prt_newline(out);
+       }
 }
 
 static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
@@ -306,41 +322,37 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
        u->d[old.data_type].buckets--;
        u->d[new.data_type].buckets++;
 
-       u->buckets_ec -= (int) !!old.stripe;
-       u->buckets_ec += (int) !!new.stripe;
-
-       u->d[old.data_type].sectors -= old.dirty_sectors;
-       u->d[new.data_type].sectors += new.dirty_sectors;
+       u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old);
+       u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new);
 
        u->d[BCH_DATA_cached].sectors += new.cached_sectors;
        u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
 
-       u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old);
-       u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
+       u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old);
+       u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new);
 
        preempt_enable();
 }
 
+struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
+{
+       return (struct bch_alloc_v4) {
+               .gen            = b.gen,
+               .data_type      = b.data_type,
+               .dirty_sectors  = b.dirty_sectors,
+               .cached_sectors = b.cached_sectors,
+               .stripe         = b.stripe,
+       };
+}
+
 static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
                                    struct bucket old, struct bucket new,
                                    u64 journal_seq, bool gc)
 {
-       struct bch_alloc_v4 old_a = {
-               .gen            = old.gen,
-               .data_type      = old.data_type,
-               .dirty_sectors  = old.dirty_sectors,
-               .cached_sectors = old.cached_sectors,
-               .stripe         = old.stripe,
-       };
-       struct bch_alloc_v4 new_a = {
-               .gen            = new.gen,
-               .data_type      = new.data_type,
-               .dirty_sectors  = new.dirty_sectors,
-               .cached_sectors = new.cached_sectors,
-               .stripe         = new.stripe,
-       };
-
-       bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc);
+       bch2_dev_usage_update(c, ca,
+                             bucket_m_to_alloc(old),
+                             bucket_m_to_alloc(new),
+                             journal_seq, gc);
 }
 
 static inline int __update_replicas(struct bch_fs *c,
@@ -640,7 +652,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
                goto err;
        }
 
-
        g->data_type = data_type;
        g->dirty_sectors += sectors;
        new = *g;
@@ -657,14 +668,11 @@ static int check_bucket_ref(struct btree_trans *trans,
                            const struct bch_extent_ptr *ptr,
                            s64 sectors, enum bch_data_type ptr_data_type,
                            u8 b_gen, u8 bucket_data_type,
-                           u32 dirty_sectors, u32 cached_sectors)
+                           u32 bucket_sectors)
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
        size_t bucket_nr = PTR_BUCKET_NR(ca, ptr);
-       u32 bucket_sectors = !ptr->cached
-               ? dirty_sectors
-               : cached_sectors;
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
@@ -799,7 +807,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
 
        ret = check_bucket_ref(trans, k, ptr, sectors, data_type,
                               g->gen, g->data_type,
-                              g->dirty_sectors, g->cached_sectors);
+                              g->dirty_sectors);
        if (ret)
                goto err;
 
@@ -829,8 +837,7 @@ static int __mark_pointer(struct btree_trans *trans,
                ? dirty_sectors
                : cached_sectors;
        int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
-                                  bucket_gen, *bucket_data_type,
-                                  *dirty_sectors, *cached_sectors);
+                                  bucket_gen, *bucket_data_type, *dst_sectors);
 
        if (ret)
                return ret;
@@ -1559,7 +1566,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
 
        ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
                               a->v.gen, a->v.data_type,
-                              a->v.dirty_sectors, a->v.cached_sectors);
+                              a->v.dirty_sectors);
        if (ret)
                goto err;
 
@@ -2073,8 +2080,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
        bucket_gens->first_bucket = ca->mi.first_bucket;
        bucket_gens->nbuckets   = nbuckets;
 
-       bch2_copygc_stop(c);
-
        if (resize) {
                down_write(&c->gc_lock);
                down_write(&ca->bucket_lock);
index 5574b62e0553074a706678208efe8ad58fa264db..bc088673009bfce4f9af14506d9f16382338a4f4 100644 (file)
@@ -203,6 +203,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
 }
 
 void bch2_dev_usage_init(struct bch_dev *);
+void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *);
 
 static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark)
 {
index 2a9dab9006efa68ca64648d866fec47010b58ad2..783f71017204cafa0277644a6d1b5564c779d366 100644 (file)
@@ -33,8 +33,6 @@ struct bucket_gens {
 };
 
 struct bch_dev_usage {
-       u64                     buckets_ec;
-
        struct {
                u64             buckets;
                u64             sectors; /* _compressed_ sectors: */
index de3d82de9d290a07ff09d27d93bf5c98bfad7309..118f0c0c4e3049bb7efc91eabd531c1196e61b03 100644 (file)
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+__must_check
+static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
+{
+       return copy_to_user(to, from, n) ? -EFAULT : 0;
+}
+
 /* returns with ref on ca->ref */
 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
                                          unsigned flags)
@@ -149,10 +155,8 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg)
 static long bch2_ioctl_query_uuid(struct bch_fs *c,
                        struct bch_ioctl_query_uuid __user *user_arg)
 {
-       if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid,
-                        sizeof(c->sb.user_uuid)))
-               return -EFAULT;
-       return 0;
+       return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
+                                   sizeof(c->sb.user_uuid));
 }
 
 #if 0
@@ -341,10 +345,7 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
        if (len < sizeof(e))
                return -EINVAL;
 
-       if (copy_to_user(buf, &e, sizeof(e)))
-               return -EFAULT;
-
-       return sizeof(e);
+       return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
 }
 
 static const struct file_operations bcachefs_data_ops = {
@@ -474,14 +475,15 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
 
        if (ret)
                goto err;
-       if (copy_to_user(user_arg, arg,
-                        sizeof(*arg) + arg->replica_entries_bytes))
-               ret = -EFAULT;
+
+       ret = copy_to_user_errcode(user_arg, arg,
+                       sizeof(*arg) + arg->replica_entries_bytes);
 err:
        kfree(arg);
        return ret;
 }
 
+/* obsolete, didn't allow for new data types: */
 static long bch2_ioctl_dev_usage(struct bch_fs *c,
                                 struct bch_ioctl_dev_usage __user *user_arg)
 {
@@ -511,7 +513,6 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
        arg.state               = ca->mi.state;
        arg.bucket_size         = ca->mi.bucket_size;
        arg.nr_buckets          = ca->mi.nbuckets - ca->mi.first_bucket;
-       arg.buckets_ec          = src.buckets_ec;
 
        for (i = 0; i < BCH_DATA_NR; i++) {
                arg.d[i].buckets        = src.d[i].buckets;
@@ -521,10 +522,58 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c,
 
        percpu_ref_put(&ca->ref);
 
-       if (copy_to_user(user_arg, &arg, sizeof(arg)))
+       return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
+}
+
+static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
+                                struct bch_ioctl_dev_usage_v2 __user *user_arg)
+{
+       struct bch_ioctl_dev_usage_v2 arg;
+       struct bch_dev_usage src;
+       struct bch_dev *ca;
+       int ret = 0;
+
+       if (!test_bit(BCH_FS_STARTED, &c->flags))
+               return -EINVAL;
+
+       if (copy_from_user(&arg, user_arg, sizeof(arg)))
                return -EFAULT;
 
-       return 0;
+       if ((arg.flags & ~BCH_BY_INDEX) ||
+           arg.pad[0] ||
+           arg.pad[1] ||
+           arg.pad[2])
+               return -EINVAL;
+
+       ca = bch2_device_lookup(c, arg.dev, arg.flags);
+       if (IS_ERR(ca))
+               return PTR_ERR(ca);
+
+       src = bch2_dev_usage_read(ca);
+
+       arg.state               = ca->mi.state;
+       arg.bucket_size         = ca->mi.bucket_size;
+       arg.nr_data_types       = min(arg.nr_data_types, BCH_DATA_NR);
+       arg.nr_buckets          = ca->mi.nbuckets - ca->mi.first_bucket;
+
+       ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
+       if (ret)
+               goto err;
+
+       for (unsigned i = 0; i < arg.nr_data_types; i++) {
+               struct bch_ioctl_dev_usage_type t = {
+                       .buckets        = src.d[i].buckets,
+                       .sectors        = src.d[i].sectors,
+                       .fragmented     = src.d[i].fragmented,
+               };
+
+               ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
+               if (ret)
+                       goto err;
+       }
+err:
+       percpu_ref_put(&ca->ref);
+       return ret;
 }
 
 static long bch2_ioctl_read_super(struct bch_fs *c,
@@ -561,9 +610,8 @@ static long bch2_ioctl_read_super(struct bch_fs *c,
                goto err;
        }
 
-       if (copy_to_user((void __user *)(unsigned long)arg.sb, sb,
-                        vstruct_bytes(sb)))
-               ret = -EFAULT;
+       ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
+                                  vstruct_bytes(sb));
 err:
        if (!IS_ERR_OR_NULL(ca))
                percpu_ref_put(&ca->ref);
@@ -663,6 +711,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
                return bch2_ioctl_fs_usage(c, arg);
        case BCH_IOCTL_DEV_USAGE:
                return bch2_ioctl_dev_usage(c, arg);
+       case BCH_IOCTL_DEV_USAGE_V2:
+               return bch2_ioctl_dev_usage_v2(c, arg);
 #if 0
        case BCH_IOCTL_START:
                BCH_IOCTL(start, struct bch_ioctl_start);
index a8b148ec2a2b6b8ed1f33d10ad195b72afa112e0..64bdafe3190b8bd0be89ea5dd7d01447bddd040d 100644 (file)
@@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c,
                 */
                unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
                ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
-               ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
-                       zstd_cctx_workspace_bound(&params.cParams));
+               ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size);
 
                /*
                 * ZSTD requires that when we decompress we pass in the exact
@@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c,
                size_t len = zstd_compress_cctx(ctx,
                                dst + 4,        dst_len - 4 - 7,
                                src,            src_len,
-                               &c->zstd_params);
+                               &params);
                if (zstd_is_error(len))
                        return 0;
 
@@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
        size_t decompress_workspace_size = 0;
        ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
                                                 c->opts.encoded_extent_max);
+
+       /*
+        * ZSTD is lying: if we allocate the size of the workspace it says it
+        * requires, it returns memory allocation errors
+        */
+       c->zstd_workspace_size = zstd_cctx_workspace_bound(&params.cParams) * 2;
+
        struct {
                unsigned                        feature;
                enum bch_compression_type       type;
@@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
                        zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
                        zlib_inflate_workspacesize(), },
                { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
-                       zstd_cctx_workspace_bound(&params.cParams),
+                       c->zstd_workspace_size,
                        zstd_dctx_workspace_bound() },
        }, *i;
        bool have_compressed = false;
 
-       c->zstd_params = params;
-
        for (i = compression_types;
             i < compression_types + ARRAY_SIZE(compression_types);
             i++)
index 55769d77e6e79c188fd5a94fb6db264a0066f8a1..31090796cb6bc950e0ca202c2f38d419893c5354 100644 (file)
@@ -267,6 +267,20 @@ restart_drop_extra_replicas:
                        goto out;
                }
 
+               if (trace_data_update_enabled()) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "\nold: ");
+                       bch2_bkey_val_to_text(&buf, c, old);
+                       prt_str(&buf, "\nk:   ");
+                       bch2_bkey_val_to_text(&buf, c, k);
+                       prt_str(&buf, "\nnew: ");
+                       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+                       trace_data_update(c, buf.buf);
+                       printbuf_exit(&buf);
+               }
+
                ret =   bch2_insert_snapshot_whiteouts(trans, m->btree_id,
                                                k.k->p, bkey_start_pos(&insert->k)) ?:
                        bch2_insert_snapshot_whiteouts(trans, m->btree_id,
@@ -356,7 +370,7 @@ void bch2_data_update_exit(struct data_update *update)
        bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
 }
 
-void bch2_update_unwritten_extent(struct btree_trans *trans,
+static void bch2_update_unwritten_extent(struct btree_trans *trans,
                                  struct data_update *update)
 {
        struct bch_fs *c = update->op.c;
@@ -436,7 +450,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
        }
 }
 
+int bch2_extent_drop_ptrs(struct btree_trans *trans,
+                         struct btree_iter *iter,
+                         struct bkey_s_c k,
+                         struct data_update_opts data_opts)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_i *n;
+       int ret;
+
+       n = bch2_bkey_make_mut_noupdate(trans, k);
+       ret = PTR_ERR_OR_ZERO(n);
+       if (ret)
+               return ret;
+
+       while (data_opts.kill_ptrs) {
+               unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+               struct bch_extent_ptr *ptr;
+
+               bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
+               data_opts.kill_ptrs ^= 1U << drop;
+       }
+
+       /*
+        * If the new extent no longer has any pointers, bch2_extent_normalize()
+        * will do the appropriate thing with it (turning it into a
+        * KEY_TYPE_error key, or just a discard if it was a cached extent)
+        */
+       bch2_extent_normalize(c, bkey_i_to_s(n));
+
+       /*
+        * Since we're not inserting through an extent iterator
+        * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+        * we aren't using the extent overwrite path to delete, we're
+        * just using the normal key deletion path:
+        */
+       if (bkey_deleted(&n->k))
+               n->k.size = 0;
+
+       return bch2_trans_relock(trans) ?:
+               bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+               bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+}
+
 int bch2_data_update_init(struct btree_trans *trans,
+                         struct btree_iter *iter,
                          struct moving_context *ctxt,
                          struct data_update *m,
                          struct write_point_specifier wp,
@@ -452,7 +510,7 @@ int bch2_data_update_init(struct btree_trans *trans,
        const struct bch_extent_ptr *ptr;
        unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
        unsigned ptrs_locked = 0;
-       int ret;
+       int ret = 0;
 
        bch2_bkey_buf_init(&m->k);
        bch2_bkey_buf_reassemble(&m->k, c, k);
@@ -478,6 +536,8 @@ int bch2_data_update_init(struct btree_trans *trans,
        bkey_for_each_ptr(ptrs, ptr)
                percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
 
+       unsigned durability_have = 0, durability_removing = 0;
+
        i = 0;
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
                bool locked;
@@ -489,8 +549,11 @@ int bch2_data_update_init(struct btree_trans *trans,
                                reserve_sectors += k.k->size;
 
                        m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
-               } else if (!p.ptr.cached) {
+                       durability_removing += bch2_extent_ptr_desired_durability(c, &p);
+               } else if (!p.ptr.cached &&
+                          !((1U << i) & m->data_opts.kill_ptrs)) {
                        bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+                       durability_have += bch2_extent_ptr_durability(c, &p);
                }
 
                /*
@@ -529,6 +592,29 @@ int bch2_data_update_init(struct btree_trans *trans,
                i++;
        }
 
+       /*
+        * If current extent durability is less than io_opts.data_replicas,
+        * we're not trying to rereplicate the extent up to data_replicas here -
+        * unless extra_replicas was specified
+        *
+        * Increasing replication is an explicit operation triggered by
+        * rereplicate, currently, so that users don't get an unexpected -ENOSPC
+        */
+       if (durability_have >= io_opts.data_replicas) {
+               m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
+               m->data_opts.rewrite_ptrs = 0;
+               /* if iter == NULL, it's just a promote */
+               if (iter)
+                       ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+               goto done;
+       }
+
+       m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+               m->data_opts.extra_replicas;
+       m->op.nr_replicas_required = m->op.nr_replicas;
+
+       BUG_ON(!m->op.nr_replicas);
+
        if (reserve_sectors) {
                ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
                                m->data_opts.extra_replicas
@@ -538,14 +624,11 @@ int bch2_data_update_init(struct btree_trans *trans,
                        goto err;
        }
 
-       m->op.nr_replicas += m->data_opts.extra_replicas;
-       m->op.nr_replicas_required = m->op.nr_replicas;
-
-       BUG_ON(!m->op.nr_replicas);
+       if (bkey_extent_is_unwritten(k)) {
+               bch2_update_unwritten_extent(trans, m);
+               goto done;
+       }
 
-       /* Special handling required: */
-       if (bkey_extent_is_unwritten(k))
-               return -BCH_ERR_unwritten_extent_update;
        return 0;
 err:
        i = 0;
@@ -560,6 +643,9 @@ err:
        bch2_bkey_buf_exit(&m->k, c);
        bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
        return ret;
+done:
+       bch2_data_update_exit(m);
+       return ret ?: -BCH_ERR_data_update_done;
 }
 
 void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
index 9dc17b9d83795181798deb5af39401d4d6248581..991095bbd469baeb55de1c0d2636267e49d68a28 100644 (file)
@@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *);
 void bch2_data_update_read_done(struct data_update *,
                                struct bch_extent_crc_unpacked);
 
+int bch2_extent_drop_ptrs(struct btree_trans *,
+                         struct btree_iter *,
+                         struct bkey_s_c,
+                         struct data_update_opts);
+
 void bch2_data_update_exit(struct data_update *);
-void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
-int bch2_data_update_init(struct btree_trans *, struct moving_context *,
+int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
+                         struct moving_context *,
                          struct data_update *,
                          struct write_point_specifier,
                          struct bch_io_opts, struct data_update_opts,
index e42b45293bbd6b0590bc53454180f76a8038c8e8..4d35e5c6cd3e2cc3e3e2eb6c797a0c7e7ac2a046 100644 (file)
        x(BCH_ERR_fsck,                 fsck_repair_unimplemented)              \
        x(BCH_ERR_fsck,                 fsck_repair_impossible)                 \
        x(0,                            restart_recovery)                       \
-       x(0,                            unwritten_extent_update)                \
+       x(0,                            data_update_done)                       \
        x(EINVAL,                       device_state_not_allowed)               \
        x(EINVAL,                       member_info_missing)                    \
        x(EINVAL,                       mismatched_block_size)                  \
        x(BCH_ERR_invalid_sb,           invalid_sb_members)                     \
        x(BCH_ERR_invalid_sb,           invalid_sb_disk_groups)                 \
        x(BCH_ERR_invalid_sb,           invalid_sb_replicas)                    \
+       x(BCH_ERR_invalid_sb,           invalid_replicas_entry)                 \
        x(BCH_ERR_invalid_sb,           invalid_sb_journal)                     \
        x(BCH_ERR_invalid_sb,           invalid_sb_journal_seq_blacklist)       \
        x(BCH_ERR_invalid_sb,           invalid_sb_crypt)                       \
index a864de231b69e297e85491dfd285928152c467b8..f6c92df552702a7455baa39532bf3c6231ae69b4 100644 (file)
@@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
        return replicas;
 }
 
-unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p)
 {
-       struct bch_dev *ca;
-
        if (p->ptr.cached)
                return 0;
 
-       ca = bch_dev_bkey_exists(c, p->ptr.dev);
-
-       return ca->mi.durability +
-               (p->has_ec
-                ? p->ec.redundancy
-                : 0);
+       return p->has_ec
+               ? p->ec.redundancy + 1
+               : ca->mi.durability;
 }
 
-unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
 {
-       struct bch_dev *ca;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
 
-       if (p->ptr.cached)
-               return 0;
+       return __extent_ptr_durability(ca, p);
+}
 
-       ca = bch_dev_bkey_exists(c, p->ptr.dev);
+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev);
 
        if (ca->mi.state == BCH_MEMBER_STATE_failed)
                return 0;
 
-       return ca->mi.durability +
-               (p->has_ec
-                ? p->ec.redundancy
-                : 0);
+       return __extent_ptr_durability(ca, p);
 }
 
 unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
index b833409c7865b766c79ffeaa4d7b10910be5095e..3281c4dd1d5252840fa0c13f1c7c4f720108ebdf 100644 (file)
@@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
        bio = &op->write.op.wbio.bio;
        bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
 
-       ret = bch2_data_update_init(trans, NULL, &op->write,
+       ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
                        writepoint_hashed((unsigned long) current),
                        opts,
                        (struct data_update_opts) {
index 4ec5d5d38abca8d85d8bc023f3f5eea1dc751415..c2a655235fb25e284dd698504334493d525a88f1 100644 (file)
@@ -548,6 +548,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
        struct jset_entry_data_usage *u =
                container_of(entry, struct jset_entry_data_usage, entry);
        unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+       struct printbuf err = PRINTBUF;
        int ret = 0;
 
        if (journal_entry_err_on(bytes < sizeof(*u) ||
@@ -556,10 +557,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c,
                                 journal_entry_data_usage_bad_size,
                                 "invalid journal entry usage: bad size")) {
                journal_entry_null_range(entry, vstruct_next(entry));
-               return ret;
+               goto out;
        }
 
+       if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err),
+                                c, version, jset, entry,
+                                journal_entry_data_usage_bad_size,
+                                "invalid journal entry usage: %s", err.buf)) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+               goto out;
+       }
+out:
 fsck_err:
+       printbuf_exit(&err);
        return ret;
 }
 
@@ -676,8 +686,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
                       le64_to_cpu(u->d[i].sectors),
                       le64_to_cpu(u->d[i].fragmented));
        }
-
-       prt_printf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec));
 }
 
 static int journal_entry_log_validate(struct bch_fs *c,
index e6d081c0592c81bb1db26db6c3d08aafce8bc7d9..5340f2d0eebaa70e047417fd98169ff4c8df2110 100644 (file)
@@ -40,8 +40,8 @@ void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru)
                   u64_to_bucket(lru.offset).offset);
 }
 
-static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
-                         u64 dev_bucket, u64 time, bool set)
+static inline int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
+                                u64 dev_bucket, u64 time, bool set)
 {
        return time
                ? bch2_btree_bit_mod(trans, BTREE_ID_lru,
@@ -51,12 +51,12 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
 
 int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
 {
-       return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted);
+       return __bch2_lru_set(trans, lru_id, dev_bucket, time, false);
 }
 
 int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
 {
-       return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
+       return __bch2_lru_set(trans, lru_id, dev_bucket, time, true);
 }
 
 int bch2_lru_change(struct btree_trans *trans,
@@ -66,8 +66,8 @@ int bch2_lru_change(struct btree_trans *trans,
        if (old_time == new_time)
                return 0;
 
-       return  bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?:
-               bch2_lru_set(trans, lru_id, dev_bucket, new_time);
+       return  __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?:
+               __bch2_lru_set(trans, lru_id, dev_bucket, new_time, true);
 }
 
 static const char * const bch2_lru_types[] = {
@@ -77,10 +77,11 @@ static const char * const bch2_lru_types[] = {
        NULL
 };
 
-static int bch2_check_lru_key(struct btree_trans *trans,
-                             struct btree_iter *lru_iter,
-                             struct bkey_s_c lru_k,
-                             struct bpos *last_flushed_pos)
+/* Returns 1 if key has been deleted */
+int bch2_check_lru_key(struct btree_trans *trans,
+                      struct btree_iter *lru_iter,
+                      struct bkey_s_c lru_k,
+                      struct bpos *last_flushed_pos)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
@@ -89,7 +90,6 @@ static int bch2_check_lru_key(struct btree_trans *trans,
        const struct bch_alloc_v4 *a;
        struct printbuf buf1 = PRINTBUF;
        struct printbuf buf2 = PRINTBUF;
-       enum bch_lru_type type = lru_type(lru_k);
        struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
        u64 idx;
        int ret;
@@ -98,7 +98,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
                        lru_entry_to_invalid_bucket,
                        "lru key points to nonexistent device:bucket %llu:%llu",
                        alloc_pos.inode, alloc_pos.offset))
-               return bch2_btree_delete_at(trans, lru_iter, 0);
+               goto delete;
 
        k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0);
        ret = bkey_err(k);
@@ -107,6 +107,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
 
        a = bch2_alloc_to_v4(k, &a_convert);
 
+       enum bch_lru_type type = lru_type(lru_k);
        switch (type) {
        case BCH_LRU_read:
                idx = alloc_lru_idx_read(*a);
@@ -114,6 +115,9 @@ static int bch2_check_lru_key(struct btree_trans *trans,
        case BCH_LRU_fragmentation:
                idx = a->fragmentation_lru;
                break;
+       default:
+               /* unknown LRU type, don't check: */
+               goto out;
        }
 
        if (lru_k.k->type != KEY_TYPE_set ||
@@ -125,16 +129,18 @@ static int bch2_check_lru_key(struct btree_trans *trans,
                        goto out;
                }
 
-               if (c->opts.reconstruct_alloc ||
+               if ((c->opts.reconstruct_alloc &&
+                    c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_lrus) ||
                    fsck_err(c, lru_entry_bad,
                             "incorrect lru entry: lru %s time %llu\n"
                             "  %s\n"
-                            "  for %s",
+                            "for\n"
+                            "  %s",
                             bch2_lru_types[type],
                             lru_pos_time(lru_k.k->p),
                             (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
                             (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf)))
-                       ret = bch2_btree_delete_at(trans, lru_iter, 0);
+                       goto delete;
        }
 out:
 err:
@@ -143,6 +149,14 @@ fsck_err:
        printbuf_exit(&buf2);
        printbuf_exit(&buf1);
        return ret;
+delete:
+       ret =   bch2_btree_delete_at(trans, lru_iter, 0) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 BCH_WATERMARK_btree|
+                                 BCH_TRANS_COMMIT_lazy_rw|
+                                 BCH_TRANS_COMMIT_no_enospc) ?:
+               1;
+       goto out;
 }
 
 int bch2_check_lrus(struct bch_fs *c)
@@ -150,15 +164,14 @@ int bch2_check_lrus(struct bch_fs *c)
        struct btree_iter iter;
        struct bkey_s_c k;
        struct bpos last_flushed_pos = POS_MIN;
-       int ret = 0;
 
-       ret = bch2_trans_run(c,
-               for_each_btree_key_commit(trans, iter,
-                               BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
-                               NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw,
-                       bch2_check_lru_key(trans, &iter, k, &last_flushed_pos)));
-       if (ret)
-               bch_err_fn(c, ret);
-       return ret;
+       int ret = bch2_trans_run(c,
+               for_each_btree_key2(trans, iter,
+                               BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, ({
+                       int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos);
 
+                       ret2 < 0 ? ret2 : 0;
+               })));
+       bch_err_fn(c, ret);
+       return ret;
 }
index 429dca816df5c5049c85e31ea20eb1e92ea694cf..014dba7cdc63a9de54bedaecc14447490a5072a8 100644 (file)
@@ -64,6 +64,8 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
 int bch2_lru_set(struct btree_trans *, u16, u64, u64);
 int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
 
+int bch2_check_lru_key(struct btree_trans *, struct btree_iter *,
+                      struct bkey_s_c, struct bpos *);
 int bch2_check_lrus(struct bch_fs *);
 
 #endif /* _BCACHEFS_LRU_H */
index cf36f2b0738f40284d099b9e654d1f78463eae71..c5518a86627642addb83299fad23a5604e6184e6 100644 (file)
@@ -173,6 +173,7 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
 static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt)
 {
        move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
+       bch2_trans_unlock_long(ctxt->trans);
        closure_sync(&ctxt->cl);
 }
 
@@ -235,49 +236,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, const char *name)
        scnprintf(stats->name, sizeof(stats->name), "%s", name);
 }
 
-static int bch2_extent_drop_ptrs(struct btree_trans *trans,
-                                struct btree_iter *iter,
-                                struct bkey_s_c k,
-                                struct data_update_opts data_opts)
-{
-       struct bch_fs *c = trans->c;
-       struct bkey_i *n;
-       int ret;
-
-       n = bch2_bkey_make_mut_noupdate(trans, k);
-       ret = PTR_ERR_OR_ZERO(n);
-       if (ret)
-               return ret;
-
-       while (data_opts.kill_ptrs) {
-               unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
-               struct bch_extent_ptr *ptr;
-
-               bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
-               data_opts.kill_ptrs ^= 1U << drop;
-       }
-
-       /*
-        * If the new extent no longer has any pointers, bch2_extent_normalize()
-        * will do the appropriate thing with it (turning it into a
-        * KEY_TYPE_error key, or just a discard if it was a cached extent)
-        */
-       bch2_extent_normalize(c, bkey_i_to_s(n));
-
-       /*
-        * Since we're not inserting through an extent iterator
-        * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
-        * we aren't using the extent overwrite path to delete, we're
-        * just using the normal key deletion path:
-        */
-       if (bkey_deleted(&n->k))
-               n->k.size = 0;
-
-       return bch2_trans_relock(trans) ?:
-               bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-               bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
-}
-
 int bch2_move_extent(struct moving_context *ctxt,
                     struct move_bucket_in_flight *bucket_in_flight,
                     struct btree_iter *iter,
@@ -347,19 +305,11 @@ int bch2_move_extent(struct moving_context *ctxt,
        io->rbio.bio.bi_iter.bi_sector  = bkey_start_offset(k.k);
        io->rbio.bio.bi_end_io          = move_read_endio;
 
-       ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
+       ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
                                    io_opts, data_opts, iter->btree_id, k);
-       if (ret && ret != -BCH_ERR_unwritten_extent_update)
+       if (ret)
                goto err_free_pages;
 
-       if (ret == -BCH_ERR_unwritten_extent_update) {
-               bch2_update_unwritten_extent(trans, &io->write);
-               move_free(io);
-               return 0;
-       }
-
-       BUG_ON(ret);
-
        io->write.op.end_io = move_write_done;
 
        if (ctxt->rate)
@@ -403,6 +353,9 @@ err_free_pages:
 err_free:
        kfree(io);
 err:
+       if (ret == -BCH_ERR_data_update_done)
+               return 0;
+
        this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
        trace_move_extent_alloc_mem_fail2(c, k);
        return ret;
@@ -506,22 +459,13 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
        do {
                delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
 
-
-               if (delay) {
-                       if (delay > HZ / 10)
-                               bch2_trans_unlock_long(ctxt->trans);
-                       else
-                               bch2_trans_unlock(ctxt->trans);
-                       set_current_state(TASK_INTERRUPTIBLE);
-               }
-
-               if (kthread_should_stop()) {
-                       __set_current_state(TASK_RUNNING);
+               if (kthread_should_stop())
                        return 1;
-               }
 
                if (delay)
-                       schedule_timeout(delay);
+                       move_ctxt_wait_event_timeout(ctxt,
+                                       freezing(current) || kthread_should_stop(),
+                                       delay);
 
                if (unlikely(freezing(current))) {
                        bch2_moving_ctxt_flush_all(ctxt);
@@ -729,7 +673,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
        }
 
        a = bch2_alloc_to_v4(k, &a_convert);
-       dirty_sectors = a->dirty_sectors;
+       dirty_sectors = bch2_bucket_sectors_dirty(*a);
        bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
        fragmentation = a->fragmentation_lru;
 
index cedde6ee99d0c88ceb86525e4e8fb53e643385ef..531965674a31cb562ea5f7476caeecac6a8d12e1 100644 (file)
@@ -38,6 +38,25 @@ struct moving_context {
        wait_queue_head_t       wait;
 };
 
+#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout)                   \
+({                                                                             \
+       int _ret = 0;                                                           \
+       while (true) {                                                          \
+               bool cond_finished = false;                                     \
+               bch2_moving_ctxt_do_pending_writes(_ctxt);                      \
+                                                                               \
+               if (_cond)                                                      \
+                       break;                                                  \
+               bch2_trans_unlock_long((_ctxt)->trans);                         \
+               _ret = __wait_event_timeout((_ctxt)->wait,                      \
+                            bch2_moving_ctxt_next_pending_write(_ctxt) ||      \
+                            (cond_finished = (_cond)), _timeout);              \
+               if (_ret || ( cond_finished))                                   \
+                       break;                                                  \
+       }                                                                       \
+       _ret;                                                                   \
+})
+
 #define move_ctxt_wait_event(_ctxt, _cond)                             \
 do {                                                                   \
        bool cond_finished = false;                                     \
index e884324bd2fa3e742e17c46522ba47b921b8e21a..d079ba7aa65349bd8c6fbe73f65dee2872a249a0 100644 (file)
@@ -91,7 +91,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
 
        a = bch2_alloc_to_v4(k, &_a);
        b->k.gen        = a->gen;
-       b->sectors      = a->dirty_sectors;
+       b->sectors      = bch2_bucket_sectors_dirty(*a);
 
        ret = data_type_movable(a->data_type) &&
                a->fragmentation_lru &&
@@ -149,6 +149,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
        struct bkey_s_c k;
        size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
        size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
+       struct bpos last_flushed_pos = POS_MIN;
        int ret;
 
        move_buckets_wait(ctxt, buckets_in_flight, false);
@@ -165,11 +166,16 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
                                  lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
                                  lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
                                  0, k, ({
-               struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
-               int ret2 = 0;
+               int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos);
+               if (ret2) {
+                       ret2 = ret2 < 0 ? ret2 : 0;
+                       goto next;
+               }
 
                saw++;
 
+               struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) };
+
                if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p)))
                        not_movable++;
                else if (bucket_in_flight(buckets_in_flight, b.k))
@@ -179,6 +185,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
                        if (ret2 >= 0)
                                sectors += b.sectors;
                }
+next:
                ret2;
        }));
 
index db2139c0545d789c95297fd68ea9292a26a9cdaa..fefa4395fabb3ea2d1f36ff2241a028917ceddb4 100644 (file)
@@ -171,6 +171,20 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans,
                return bkey_s_c_null;
        }
 
+       if (trace_rebalance_extent_enabled()) {
+               struct printbuf buf = PRINTBUF;
+
+               prt_str(&buf, "target=");
+               bch2_target_to_text(&buf, c, r->target);
+               prt_str(&buf, " compression=");
+               prt_str(&buf, bch2_compression_opts[r->compression]);
+               prt_str(&buf, " ");
+               bch2_bkey_val_to_text(&buf, c, k);
+
+               trace_rebalance_extent(c, buf.buf);
+               printbuf_exit(&buf);
+       }
+
        return k;
 }
 
index a94f2b5ed055f747a27b1cca56ed4ac233e80345..5f4f76e67cdda46755c9e16eb5e8b1720af98f19 100644 (file)
@@ -302,8 +302,6 @@ static int journal_replay_entry_early(struct bch_fs *c,
                struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev));
                unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
 
-               ca->usage_base->buckets_ec              = le64_to_cpu(u->buckets_ec);
-
                for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) {
                        ca->usage_base->d[i].buckets    = le64_to_cpu(u->d[i].buckets);
                        ca->usage_base->d[i].sectors    = le64_to_cpu(u->d[i].sectors);
index 820f99898a16e8f89f99e2923201dcbf6c87c958..ccb776e045dc11e15bbc86ff11caa0c7259eac3a 100644 (file)
@@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out,
        prt_printf(out, "]");
 }
 
+int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
+                                struct bch_sb *sb,
+                                struct printbuf *err)
+{
+       if (!r->nr_devs) {
+               prt_printf(err, "no devices in entry ");
+               goto bad;
+       }
+
+       if (r->nr_required > 1 &&
+           r->nr_required >= r->nr_devs) {
+               prt_printf(err, "bad nr_required in entry ");
+               goto bad;
+       }
+
+       for (unsigned i = 0; i < r->nr_devs; i++)
+               if (!bch2_dev_exists(sb, r->devs[i])) {
+                       prt_printf(err, "invalid device %u in entry ", r->devs[i]);
+                       goto bad;
+               }
+
+       return 0;
+bad:
+       bch2_replicas_entry_to_text(err, r);
+       return -BCH_ERR_invalid_replicas_entry;
+}
+
 void bch2_cpu_replicas_to_text(struct printbuf *out,
                               struct bch_replicas_cpu *r)
 {
@@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
 }
 
 static struct bch_replicas_cpu
-cpu_replicas_add_entry(struct bch_replicas_cpu *old,
+cpu_replicas_add_entry(struct bch_fs *c,
+                      struct bch_replicas_cpu *old,
                       struct bch_replicas_entry_v1 *new_entry)
 {
        unsigned i;
@@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
                                        replicas_entry_bytes(new_entry)),
        };
 
+       for (i = 0; i < new_entry->nr_devs; i++)
+               BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i]));
+
        BUG_ON(!new_entry->data_type);
        verify_replicas_entry(new_entry);
 
@@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
 
        if (c->replicas_gc.entries &&
            !__replicas_has_entry(&c->replicas_gc, new_entry)) {
-               new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry);
+               new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
                if (!new_gc.entries) {
                        ret = -BCH_ERR_ENOMEM_cpu_replicas;
                        goto err;
@@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
        }
 
        if (!__replicas_has_entry(&c->replicas, new_entry)) {
-               new_r = cpu_replicas_add_entry(&c->replicas, new_entry);
+               new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
                if (!new_r.entries) {
                        ret = -BCH_ERR_ENOMEM_cpu_replicas;
                        goto err;
@@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
        if (idx < 0) {
                struct bch_replicas_cpu n;
 
-               n = cpu_replicas_add_entry(&c->replicas, r);
+               n = cpu_replicas_add_entry(c, &c->replicas, r);
                if (!n.entries)
                        return -BCH_ERR_ENOMEM_cpu_replicas;
 
@@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
                                      struct bch_sb *sb,
                                      struct printbuf *err)
 {
-       unsigned i, j;
+       unsigned i;
 
        sort_cmp_size(cpu_r->entries,
                      cpu_r->nr,
@@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
                struct bch_replicas_entry_v1 *e =
                        cpu_replicas_entry(cpu_r, i);
 
-               if (e->data_type >= BCH_DATA_NR) {
-                       prt_printf(err, "invalid data type in entry ");
-                       bch2_replicas_entry_to_text(err, e);
-                       return -BCH_ERR_invalid_sb_replicas;
-               }
-
-               if (!e->nr_devs) {
-                       prt_printf(err, "no devices in entry ");
-                       bch2_replicas_entry_to_text(err, e);
-                       return -BCH_ERR_invalid_sb_replicas;
-               }
-
-               if (e->nr_required > 1 &&
-                   e->nr_required >= e->nr_devs) {
-                       prt_printf(err, "bad nr_required in entry ");
-                       bch2_replicas_entry_to_text(err, e);
-                       return -BCH_ERR_invalid_sb_replicas;
-               }
-
-               for (j = 0; j < e->nr_devs; j++)
-                       if (!bch2_dev_exists(sb, e->devs[j])) {
-                               prt_printf(err, "invalid device %u in entry ", e->devs[j]);
-                               bch2_replicas_entry_to_text(err, e);
-                               return -BCH_ERR_invalid_sb_replicas;
-                       }
+               int ret = bch2_replicas_entry_validate(e, sb, err);
+               if (ret)
+                       return ret;
 
                if (i + 1 < cpu_r->nr) {
                        struct bch_replicas_entry_v1 *n =
index b2bb12a9b5335fbb25a95d339ceecb0a28f06433..654a4b26d3a3c96e3ac0cecb9586de15828665f1 100644 (file)
@@ -9,6 +9,8 @@
 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
 void bch2_replicas_entry_to_text(struct printbuf *,
                                 struct bch_replicas_entry_v1 *);
+int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *,
+                                struct bch_sb *, struct printbuf *);
 void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
 
 static inline struct bch_replicas_entry_v1 *
index fedc9e102eddad07cbf4743ec1d9122c20e19798..8dc0e3db1f03ebaa88df4b2e336671a089879a1e 100644 (file)
@@ -256,7 +256,6 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c,
 
                u->entry.type = BCH_JSET_ENTRY_dev_usage;
                u->dev = cpu_to_le32(dev);
-               u->buckets_ec           = cpu_to_le64(ca->usage_base->buckets_ec);
 
                for (i = 0; i < BCH_DATA_NR; i++) {
                        u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets);
index bed0f857fe5b7627639ee24202dba1002910eee7..259af07f4624792062fff170ae69c28cd2e9aa8a 100644 (file)
@@ -259,6 +259,11 @@ static void member_to_text(struct printbuf *out,
                prt_printf(out, "(none)");
        prt_newline(out);
 
+       prt_str(out, "Durability:");
+       prt_tab(out);
+       prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m));
+       prt_newline(out);
+
        prt_printf(out, "Discard:");
        prt_tab(out);
        prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m));
index fa8534782183163841874f8d3f972530017b7735..512d5665786420dab9763995d3955441e3a91a3f 100644 (file)
@@ -658,7 +658,7 @@ reread:
        return 0;
 }
 
-int __bch2_read_super(const char *path, struct bch_opts *opts,
+static int __bch2_read_super(const char *path, struct bch_opts *opts,
                    struct bch_sb_handle *sb, bool ignore_notbchfs_msg)
 {
        u64 offset = opt_get(*opts, sb);
index 91f757173ef0ae5cdbb96e0f2cef9570f23ab57e..552d55dd963f8525ee9ee127118ca2d1177847df 100644 (file)
@@ -270,6 +270,8 @@ void bch2_fs_read_only(struct bch_fs *c)
 
        BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
 
+       bch_verbose(c, "going read-only");
+
        /*
         * Block new foreground-end write operations from starting - any new
         * writes will return -EROFS:
@@ -297,13 +299,21 @@ void bch2_fs_read_only(struct bch_fs *c)
                   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) ||
                   test_bit(BCH_FS_EMERGENCY_RO, &c->flags));
 
+       bool writes_disabled = test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
+       if (writes_disabled)
+               bch_verbose(c, "finished waiting for writes to stop");
+
        __bch2_fs_read_only(c);
 
        wait_event(bch2_read_only_wait,
                   test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
 
+       if (!writes_disabled)
+               bch_verbose(c, "finished waiting for writes to stop");
+
        clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags);
        clear_bit(BCH_FS_GOING_RO, &c->flags);
+       clear_bit(BCH_FS_RW, &c->flags);
 
        if (!bch2_journal_error(&c->journal) &&
            !test_bit(BCH_FS_ERROR, &c->flags) &&
@@ -319,9 +329,9 @@ void bch2_fs_read_only(struct bch_fs *c)
 
                bch_verbose(c, "marking filesystem clean");
                bch2_fs_mark_clean(c);
+       } else {
+               bch_verbose(c, "done going read-only, filesystem not clean");
        }
-
-       clear_bit(BCH_FS_RW, &c->flags);
 }
 
 static void bch2_fs_read_only_work(struct work_struct *work)
@@ -424,6 +434,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);
 
+       set_bit(BCH_FS_RW, &c->flags);
+       set_bit(BCH_FS_WAS_RW, &c->flags);
+
+#ifndef BCH_WRITE_REF_DEBUG
+       percpu_ref_reinit(&c->writes);
+#else
+       for (i = 0; i < BCH_WRITE_REF_NR; i++) {
+               BUG_ON(atomic_long_read(&c->writes[i]));
+               atomic_long_inc(&c->writes[i]);
+       }
+#endif
+
        ret = bch2_gc_thread_start(c);
        if (ret) {
                bch_err(c, "error starting gc thread");
@@ -440,24 +462,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                        goto err;
        }
 
-#ifndef BCH_WRITE_REF_DEBUG
-       percpu_ref_reinit(&c->writes);
-#else
-       for (i = 0; i < BCH_WRITE_REF_NR; i++) {
-               BUG_ON(atomic_long_read(&c->writes[i]));
-               atomic_long_inc(&c->writes[i]);
-       }
-#endif
-       set_bit(BCH_FS_RW, &c->flags);
-       set_bit(BCH_FS_WAS_RW, &c->flags);
-
        bch2_do_discards(c);
        bch2_do_invalidates(c);
        bch2_do_stripe_deletes(c);
        bch2_do_pending_node_rewrites(c);
        return 0;
 err:
-       __bch2_fs_read_only(c);
+       if (test_bit(BCH_FS_RW, &c->flags))
+               bch2_fs_read_only(c);
+       else
+               __bch2_fs_read_only(c);
        return ret;
 }
 
index 264c46b456c2af470dfdc9d5745fe676efa0795d..7223418d3bb427d36921e957d59a584d7a9691e9 100644 (file)
@@ -258,15 +258,16 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
        struct btree_iter iter;
        struct bkey_s_c k;
        enum btree_id id;
-       u64 nr_uncompressed_extents = 0,
-           nr_compressed_extents = 0,
-           nr_incompressible_extents = 0,
-           uncompressed_sectors = 0,
-           incompressible_sectors = 0,
-           compressed_sectors_compressed = 0,
-           compressed_sectors_uncompressed = 0;
+       struct compression_type_stats {
+               u64             nr_extents;
+               u64             sectors_compressed;
+               u64             sectors_uncompressed;
+       } s[BCH_COMPRESSION_TYPE_NR];
+       u64 compressed_incompressible = 0;
        int ret = 0;
 
+       memset(s, 0, sizeof(s));
+
        if (!test_bit(BCH_FS_STARTED, &c->flags))
                return -EPERM;
 
@@ -279,36 +280,30 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
                for_each_btree_key(trans, iter, id, POS_MIN,
                                   BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
                        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+                       struct bch_extent_crc_unpacked crc;
                        const union bch_extent_entry *entry;
-                       struct extent_ptr_decoded p;
-                       bool compressed = false, uncompressed = false, incompressible = false;
-
-                       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-                               switch (p.crc.compression_type) {
-                               case BCH_COMPRESSION_TYPE_none:
-                                       uncompressed = true;
-                                       uncompressed_sectors += k.k->size;
-                                       break;
-                               case BCH_COMPRESSION_TYPE_incompressible:
-                                       incompressible = true;
-                                       incompressible_sectors += k.k->size;
-                                       break;
-                               default:
-                                       compressed_sectors_compressed +=
-                                               p.crc.compressed_size;
-                                       compressed_sectors_uncompressed +=
-                                               p.crc.uncompressed_size;
-                                       compressed = true;
-                                       break;
+                       bool compressed = false, incompressible = false;
+
+                       bkey_for_each_crc(k.k, ptrs, crc, entry) {
+                               incompressible  |= crc.compression_type == BCH_COMPRESSION_TYPE_incompressible;
+                               compressed      |= crc_is_compressed(crc);
+
+                               if (crc_is_compressed(crc)) {
+                                       s[crc.compression_type].nr_extents++;
+                                       s[crc.compression_type].sectors_compressed += crc.compressed_size;
+                                       s[crc.compression_type].sectors_uncompressed += crc.uncompressed_size;
                                }
                        }
 
-                       if (incompressible)
-                               nr_incompressible_extents++;
-                       else if (uncompressed)
-                               nr_uncompressed_extents++;
-                       else if (compressed)
-                               nr_compressed_extents++;
+                       compressed_incompressible += compressed && incompressible;
+
+                       if (!compressed) {
+                               unsigned t = incompressible ? BCH_COMPRESSION_TYPE_incompressible : 0;
+
+                               s[t].nr_extents++;
+                               s[t].sectors_compressed += k.k->size;
+                               s[t].sectors_uncompressed += k.k->size;
+                       }
                }
                bch2_trans_iter_exit(trans, &iter);
        }
@@ -318,26 +313,45 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
        if (ret)
                return ret;
 
-       prt_printf(out, "uncompressed:\n");
-       prt_printf(out, "       nr extents:             %llu\n", nr_uncompressed_extents);
-       prt_printf(out, "       size:                   ");
-       prt_human_readable_u64(out, uncompressed_sectors << 9);
-       prt_printf(out, "\n");
+       prt_str(out, "type");
+       printbuf_tabstop_push(out, 12);
+       prt_tab(out);
 
-       prt_printf(out, "compressed:\n");
-       prt_printf(out, "       nr extents:             %llu\n", nr_compressed_extents);
-       prt_printf(out, "       compressed size:        ");
-       prt_human_readable_u64(out, compressed_sectors_compressed << 9);
-       prt_printf(out, "\n");
-       prt_printf(out, "       uncompressed size:      ");
-       prt_human_readable_u64(out, compressed_sectors_uncompressed << 9);
-       prt_printf(out, "\n");
+       prt_str(out, "compressed");
+       printbuf_tabstop_push(out, 16);
+       prt_tab_rjust(out);
+
+       prt_str(out, "uncompressed");
+       printbuf_tabstop_push(out, 16);
+       prt_tab_rjust(out);
+
+       prt_str(out, "average extent size");
+       printbuf_tabstop_push(out, 24);
+       prt_tab_rjust(out);
+       prt_newline(out);
+
+       for (unsigned i = 0; i < ARRAY_SIZE(s); i++) {
+               prt_str(out, bch2_compression_types[i]);
+               prt_tab(out);
+
+               prt_human_readable_u64(out, s[i].sectors_compressed << 9);
+               prt_tab_rjust(out);
+
+               prt_human_readable_u64(out, s[i].sectors_uncompressed << 9);
+               prt_tab_rjust(out);
+
+               prt_human_readable_u64(out, s[i].nr_extents
+                                      ? div_u64(s[i].sectors_uncompressed << 9, s[i].nr_extents)
+                                      : 0);
+               prt_tab_rjust(out);
+               prt_newline(out);
+       }
+
+       if (compressed_incompressible) {
+               prt_printf(out, "%llu compressed & incompressible extents", compressed_incompressible);
+               prt_newline(out);
+       }
 
-       prt_printf(out, "incompressible:\n");
-       prt_printf(out, "       nr extents:             %llu\n", nr_incompressible_extents);
-       prt_printf(out, "       size:                   ");
-       prt_human_readable_u64(out, incompressible_sectors << 9);
-       prt_printf(out, "\n");
        return 0;
 }
 
@@ -786,32 +800,7 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
        printbuf_tabstop_push(out, 16);
        printbuf_tabstop_push(out, 16);
 
-       prt_tab(out);
-       prt_str(out, "buckets");
-       prt_tab_rjust(out);
-       prt_str(out, "sectors");
-       prt_tab_rjust(out);
-       prt_str(out, "fragmented");
-       prt_tab_rjust(out);
-       prt_newline(out);
-
-       for (i = 0; i < BCH_DATA_NR; i++) {
-               prt_str(out, bch2_data_types[i]);
-               prt_tab(out);
-               prt_u64(out, stats.d[i].buckets);
-               prt_tab_rjust(out);
-               prt_u64(out, stats.d[i].sectors);
-               prt_tab_rjust(out);
-               prt_u64(out, stats.d[i].fragmented);
-               prt_tab_rjust(out);
-               prt_newline(out);
-       }
-
-       prt_str(out, "ec");
-       prt_tab(out);
-       prt_u64(out, stats.buckets_ec);
-       prt_tab_rjust(out);
-       prt_newline(out);
+       bch2_dev_usage_to_text(out, &stats);
 
        prt_newline(out);
 
index 7b24e7fe3b5332c984282d737c1088c6a31bc09e..4980cfdd13b355c8558adb83187b009c3cba2272 100644 (file)
@@ -32,19 +32,21 @@ DECLARE_EVENT_CLASS(bpos,
        TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot)
 );
 
-DECLARE_EVENT_CLASS(bkey,
-       TP_PROTO(struct bch_fs *c, const char *k),
-       TP_ARGS(c, k),
+DECLARE_EVENT_CLASS(str,
+       TP_PROTO(struct bch_fs *c, const char *str),
+       TP_ARGS(c, str),
 
        TP_STRUCT__entry(
-               __string(k,     k                               )
+               __field(dev_t,          dev                     )
+               __string(str,           str                     )
        ),
 
        TP_fast_assign(
-               __assign_str(k, k);
+               __entry->dev            = c->dev;
+               __assign_str(str, str);
        ),
 
-       TP_printk("%s", __get_str(k))
+       TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str))
 );
 
 DECLARE_EVENT_CLASS(btree_node,
@@ -736,22 +738,22 @@ TRACE_EVENT(bucket_evacuate,
                  __entry->dev_idx, __entry->bucket)
 );
 
-DEFINE_EVENT(bkey, move_extent,
+DEFINE_EVENT(str, move_extent,
        TP_PROTO(struct bch_fs *c, const char *k),
        TP_ARGS(c, k)
 );
 
-DEFINE_EVENT(bkey, move_extent_read,
+DEFINE_EVENT(str, move_extent_read,
        TP_PROTO(struct bch_fs *c, const char *k),
        TP_ARGS(c, k)
 );
 
-DEFINE_EVENT(bkey, move_extent_write,
+DEFINE_EVENT(str, move_extent_write,
        TP_PROTO(struct bch_fs *c, const char *k),
        TP_ARGS(c, k)
 );
 
-DEFINE_EVENT(bkey, move_extent_finish,
+DEFINE_EVENT(str, move_extent_finish,
        TP_PROTO(struct bch_fs *c, const char *k),
        TP_ARGS(c, k)
 );
@@ -773,7 +775,7 @@ TRACE_EVENT(move_extent_fail,
        TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg))
 );
 
-DEFINE_EVENT(bkey, move_extent_alloc_mem_fail,
+DEFINE_EVENT(str, move_extent_alloc_mem_fail,
        TP_PROTO(struct bch_fs *c, const char *k),
        TP_ARGS(c, k)
 );
@@ -1366,6 +1368,16 @@ TRACE_EVENT(write_buffer_flush_slowpath,
        TP_printk("%zu/%zu", __entry->slowpath, __entry->total)
 );
 
+DEFINE_EVENT(str, rebalance_extent,
+       TP_PROTO(struct bch_fs *c, const char *str),
+       TP_ARGS(c, str)
+);
+
+DEFINE_EVENT(str, data_update,
+       TP_PROTO(struct bch_fs *c, const char *str),
+       TP_ARGS(c, str)
+);
+
 #endif /* _TRACE_BCACHEFS_H */
 
 /* This part must be outside protection */