]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 2a6125decb43 bcachefs: bch_sb_field_downgrade
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 31 Dec 2023 03:54:04 +0000 (22:54 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 4 Jan 2024 01:31:37 +0000 (20:31 -0500)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
30 files changed:
.bcachefs_revision
include/linux/bitops.h
libbcachefs/acl.c
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/darray.c
libbcachefs/darray.h
libbcachefs/errcode.h
libbcachefs/error.c
libbcachefs/fs-io-direct.c
libbcachefs/fs-ioctl.c
libbcachefs/fs.c
libbcachefs/io_write.c
libbcachefs/printbuf.c
libbcachefs/printbuf.h
libbcachefs/recovery.c
libbcachefs/recovery.h
libbcachefs/recovery_types.h
libbcachefs/sb-clean.c
libbcachefs/sb-downgrade.c [new file with mode: 0644]
libbcachefs/sb-downgrade.h [new file with mode: 0644]
libbcachefs/sb-errors.c
libbcachefs/sb-errors.h
libbcachefs/sb-errors_types.h
libbcachefs/subvolume.c
libbcachefs/subvolume.h
libbcachefs/super-io.c
libbcachefs/super-io.h
libbcachefs/util.h
libbcachefs/xattr.c

index 197bcf3751dfe58181487b9ba22992d1762dce7e..595b9eff9254db89f237b8ee9b371779a8362aa5 100644 (file)
@@ -1 +1 @@
-5264e9f4d0c00922dee2b2635dedaa3438a78e3f
+2a6125decb436ddc5e022c2428f64cf68dc974de
index 62a3f4040a73a6ea812f7b72bd691a4f38143de4..758476b1699cc34516d3fd3e2772c96a2a9a9901 100644 (file)
@@ -16,6 +16,7 @@
 
 #define BIT_MASK(nr)           (1UL << ((nr) % BITS_PER_LONG))
 #define BIT_WORD(nr)           ((nr) / BITS_PER_LONG)
+#define BITS_PER_TYPE(type)    (sizeof(type) * BITS_PER_BYTE)
 #define BITS_PER_BYTE          8
 #define BITS_TO_LONGS(nr)      DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
 #define BITS_TO_U64(nr)                DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
index f3809897f00a7d5c98c7f33f36bc2fd587939dcc..3640f417cce118b06e43ae4c8b38bb275b0097fc 100644 (file)
@@ -366,7 +366,8 @@ retry:
        bch2_trans_begin(trans);
        acl = _acl;
 
-       ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
+       ret   = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?:
+               bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
                              BTREE_ITER_INTENT);
        if (ret)
                goto btree_err;
index 3926e9e3ba96fdd7fc9e6b206edd3a0c4209a1c6..840f605eff1f8dc693b17eacc1d7145224a843cf 100644 (file)
@@ -796,6 +796,7 @@ struct bch_fs {
                unsigned        nsec_per_time_unit;
                u64             features;
                u64             compat;
+               unsigned long   errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
        }                       sb;
 
 
index 2105198daf3be3a0bc61cf8fa7d5880e37b0d74b..e7a2d25dfe049e87e004b6c11906fc2229cfb78f 100644 (file)
@@ -1214,19 +1214,21 @@ struct bch_sb_field {
 };
 
 #define BCH_SB_FIELDS()                                \
-       x(journal,      0)                      \
-       x(members_v1,   1)                      \
-       x(crypt,        2)                      \
-       x(replicas_v0,  3)                      \
-       x(quota,        4)                      \
-       x(disk_groups,  5)                      \
-       x(clean,        6)                      \
-       x(replicas,     7)                      \
-       x(journal_seq_blacklist, 8)             \
-       x(journal_v2,   9)                      \
-       x(counters,     10)                     \
-       x(members_v2,   11)                     \
-       x(errors,       12)
+       x(journal,                      0)      \
+       x(members_v1,                   1)      \
+       x(crypt,                        2)      \
+       x(replicas_v0,                  3)      \
+       x(quota,                        4)      \
+       x(disk_groups,                  5)      \
+       x(clean,                        6)      \
+       x(replicas,                     7)      \
+       x(journal_seq_blacklist,        8)      \
+       x(journal_v2,                   9)      \
+       x(counters,                     10)     \
+       x(members_v2,                   11)     \
+       x(errors,                       12)     \
+       x(ext,                          13)     \
+       x(downgrade,                    14)
 
 enum bch_sb_field_type {
 #define x(f, nr)       BCH_SB_FIELD_##f = nr,
@@ -1641,6 +1643,24 @@ struct bch_sb_field_errors {
 LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID,    struct bch_sb_field_error_entry, v,  0, 16);
 LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR,    struct bch_sb_field_error_entry, v, 16, 64);
 
+struct bch_sb_field_ext {
+       struct bch_sb_field     field;
+       __le64                  recovery_passes_required[2];
+       __le64                  errors_silent[8];
+};
+
+struct bch_sb_field_downgrade_entry {
+       __le16                  version;
+       __le64                  recovery_passes[2];
+       __le16                  nr_errors;
+       __le16                  errors[] __counted_by(nr_errors);
+} __packed __aligned(2);
+
+struct bch_sb_field_downgrade {
+       struct bch_sb_field     field;
+       struct bch_sb_field_downgrade_entry entries[];
+};
+
 /* Superblock: */
 
 /*
@@ -1654,6 +1674,11 @@ LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR,      struct bch_sb_field_error_entry, v, 16, 64);
 
 #define RECOVERY_PASS_ALL_FSCK         (1ULL << 63)
 
+/*
+ * field 1:            version name
+ * field 2:            BCH_VERSION(major, minor)
+ * field 3:            recovery passess required on upgrade
+ */
 #define BCH_METADATA_VERSIONS()                                                \
        x(bkey_renumber,                BCH_VERSION(0, 10),             \
          RECOVERY_PASS_ALL_FSCK)                                       \
@@ -1707,7 +1732,9 @@ LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR,       struct bch_sb_field_error_entry, v, 16, 64);
        x(rebalance_work,               BCH_VERSION(1,  3),             \
          BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))            \
        x(member_seq,                   BCH_VERSION(1,  4),             \
-         0)
+         0)                                                            \
+       x(disk_accounting_v2,           BCH_VERSION(1,  5),             \
+         BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
index 4c900c8532688fd7be902585ce531a822577804f..ac35b8b705ae1c076e780af570bd824d87c28ab2 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/slab.h>
 #include "darray.h"
 
-int __bch2_darray_resize(darray_void *d, size_t element_size, size_t new_size, gfp_t gfp)
+int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
 {
        if (new_size > d->size) {
                new_size = roundup_pow_of_two(new_size);
@@ -14,7 +14,8 @@ int __bch2_darray_resize(darray_void *d, size_t element_size, size_t new_size, g
                        return -ENOMEM;
 
                memcpy(data, d->data, d->size * element_size);
-               kvfree(d->data);
+               if (d->data != d->preallocated)
+                       kvfree(d->data);
                d->data = data;
                d->size = new_size;
        }
index e49be3ad858a9efd8c5683a23b65a328a6ea6bc3..d867ee620bc1f041e0a3c67fb8eba3ad28560285 100644 (file)
 
 #include <linux/slab.h>
 
-#define DARRAY(type)                                                   \
+#define DARRAY_PREALLOCATED(_type, _nr)                                        \
 struct {                                                               \
        size_t nr, size;                                                \
-       type *data;                                                     \
+       _type *data;                                                    \
+       _type preallocated[_nr];                                        \
 }
 
-typedef DARRAY(void) darray_void;
-typedef DARRAY(char *) darray_str;
+#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
 
-int __bch2_darray_resize(darray_void *, size_t, size_t, gfp_t);
+typedef DARRAY(char)   darray_char;
+typedef DARRAY(char *) darray_str;
 
-static inline int __darray_resize(darray_void *d, size_t element_size,
+int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);
+
+static inline int __darray_resize(darray_char *d, size_t element_size,
                                  size_t new_size, gfp_t gfp)
 {
        return unlikely(new_size > d->size)
@@ -30,18 +33,18 @@ static inline int __darray_resize(darray_void *d, size_t element_size,
 }
 
 #define darray_resize_gfp(_d, _new_size, _gfp)                         \
-       __darray_resize((darray_void *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp)
+       unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp))
 
 #define darray_resize(_d, _new_size)                                   \
        darray_resize_gfp(_d, _new_size, GFP_KERNEL)
 
-static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
+static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp)
 {
        return __darray_resize(d, t_size, d->nr + more, gfp);
 }
 
 #define darray_make_room_gfp(_d, _more, _gfp)                          \
-       __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
+       __darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
 
 #define darray_make_room(_d, _more)                                    \
        darray_make_room_gfp(_d, _more, GFP_KERNEL)
@@ -90,13 +93,16 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
 
 #define darray_init(_d)                                                        \
 do {                                                                   \
-       (_d)->data = NULL;                                              \
-       (_d)->nr = (_d)->size = 0;                                      \
+       (_d)->nr = 0;                                                   \
+       (_d)->size = ARRAY_SIZE((_d)->preallocated);                    \
+       (_d)->data = (_d)->size ? (_d)->preallocated : NULL;            \
 } while (0)
 
 #define darray_exit(_d)                                                        \
 do {                                                                   \
-       kvfree((_d)->data);                                             \
+       if (!ARRAY_SIZE((_d)->preallocated) ||                          \
+           (_d)->data != (_d)->preallocated)                           \
+               kvfree((_d)->data);                                     \
        darray_init(_d);                                                \
 } while (0)
 
index f827db54438c2d037eb762b4c7962f07abce0955..8c40c2067a0471e2dde6c3dcbcdeb709565732a7 100644 (file)
@@ -94,6 +94,7 @@
        x(ENOSPC,                       ENOSPC_sb_members)                      \
        x(ENOSPC,                       ENOSPC_sb_members_v2)                   \
        x(ENOSPC,                       ENOSPC_sb_crypt)                        \
+       x(ENOSPC,                       ENOSPC_sb_downgrade)                    \
        x(ENOSPC,                       ENOSPC_btree_slot)                      \
        x(ENOSPC,                       ENOSPC_snapshot_tree)                   \
        x(ENOENT,                       ENOENT_bkey_type_mismatch)              \
        x(BCH_ERR_invalid_sb,           invalid_sb_quota)                       \
        x(BCH_ERR_invalid_sb,           invalid_sb_errors)                      \
        x(BCH_ERR_invalid_sb,           invalid_sb_opt_compression)             \
+       x(BCH_ERR_invalid_sb,           invalid_sb_ext)                         \
+       x(BCH_ERR_invalid_sb,           invalid_sb_downgrade)                   \
        x(BCH_ERR_invalid,              invalid_bkey)                           \
        x(BCH_ERR_operation_blocked,    nocow_lock_blocked)                     \
        x(EIO,                          btree_node_read_err)                    \
index 655e3ba9bfd2c09f221011ae16c38ba9f101e6ff..aa4f7f4925f6855c486221b0c26415a4bddc8aed 100644 (file)
@@ -152,6 +152,9 @@ int bch2_fsck_err(struct bch_fs *c,
        struct printbuf buf = PRINTBUF, *out = &buf;
        int ret = -BCH_ERR_fsck_ignore;
 
+       if (test_bit(err, c->sb.errors_silent))
+               return -BCH_ERR_fsck_fix;
+
        bch2_sb_error_count(c, err);
 
        va_start(args, fmt);
index 0ae16f63e64b55305ce8708382a7cde2b451a868..fdd57c5785c9cebf609959fb753ee30e55e85b92 100644 (file)
@@ -213,11 +213,11 @@ struct dio_write {
        struct address_space            *mapping;
        struct bch_inode_info           *inode;
        struct mm_struct                *mm;
+       const struct iovec              *iov;
        unsigned                        loop:1,
                                        extending:1,
                                        sync:1,
-                                       flush:1,
-                                       free_iov:1;
+                                       flush:1;
        struct quota_res                quota_res;
        u64                             written;
 
@@ -309,12 +309,10 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
                return -1;
 
        if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
-               iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
+               dio->iov = iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
                                    GFP_KERNEL);
                if (unlikely(!iov))
                        return -ENOMEM;
-
-               dio->free_iov = true;
        }
 
        memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov));
@@ -378,8 +376,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
 
        bch2_pagecache_block_put(inode);
 
-       if (dio->free_iov)
-               kfree(dio->iter.__iov);
+       kfree(dio->iov);
 
        ret = dio->op.error ?: ((long) dio->written << 9);
        bio_put(&dio->op.wbio.bio);
@@ -623,11 +620,11 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
        dio->mapping            = mapping;
        dio->inode              = inode;
        dio->mm                 = current->mm;
+       dio->iov                = NULL;
        dio->loop               = false;
        dio->extending          = extending;
        dio->sync               = is_sync_kiocb(req) || extending;
        dio->flush              = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
-       dio->free_iov           = false;
        dio->quota_res.sectors  = 0;
        dio->written            = 0;
        dio->iter               = *iter;
index 8098a3a299d1c0e3fc1deea083b7cee1621ef44e..e0a19a73c8e1a6dae41b0f9d0f7f96226e07260e 100644 (file)
@@ -100,7 +100,8 @@ static int bch2_ioc_setflags(struct bch_fs *c,
        }
 
        mutex_lock(&inode->ei_update_lock);
-       ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
+       ret   = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+               bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
                               ATTR_CTIME);
        mutex_unlock(&inode->ei_update_lock);
 
@@ -183,13 +184,10 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
        }
 
        mutex_lock(&inode->ei_update_lock);
-       ret = bch2_set_projid(c, inode, fa.fsx_projid);
-       if (ret)
-               goto err_unlock;
-
-       ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
+       ret   = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+               bch2_set_projid(c, inode, fa.fsx_projid) ?:
+               bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
                               ATTR_CTIME);
-err_unlock:
        mutex_unlock(&inode->ei_update_lock);
 err:
        inode_unlock(&inode->v);
index 34cb22a9c05d6d8346b217b1836171f06c52d04a..da11757682c0967950e9e9ea7d1e4d862937d0ce 100644 (file)
@@ -258,7 +258,8 @@ __bch2_create(struct mnt_idmap *idmap,
 retry:
        bch2_trans_begin(trans);
 
-       ret   = bch2_create_trans(trans,
+       ret   = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
+               bch2_create_trans(trans,
                                  inode_inum(dir), &dir_u, &inode_u,
                                  !(flags & BCH_CREATE_TMPFILE)
                                  ? &dentry->d_name : NULL,
@@ -430,7 +431,9 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
 
        lockdep_assert_held(&inode->v.i_rwsem);
 
-       ret = __bch2_link(c, inode, dir, dentry);
+       ret   = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+               bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+               __bch2_link(c, inode, dir, dentry);
        if (unlikely(ret))
                return ret;
 
@@ -481,7 +484,11 @@ err:
 
 static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
 {
-       return __bch2_unlink(vdir, dentry, false);
+       struct bch_inode_info *dir= to_bch_ei(vdir);
+       struct bch_fs *c = dir->v.i_sb->s_fs_info;
+
+       return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+               __bch2_unlink(vdir, dentry, false);
 }
 
 static int bch2_symlink(struct mnt_idmap *idmap,
@@ -562,6 +569,11 @@ static int bch2_rename2(struct mnt_idmap *idmap,
                         src_inode,
                         dst_inode);
 
+       ret   = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
+               bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
+       if (ret)
+               goto err;
+
        if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
                ret = bch2_fs_quota_transfer(c, src_inode,
                                             dst_dir->ei_qid,
@@ -783,11 +795,13 @@ static int bch2_setattr(struct mnt_idmap *idmap,
                        struct dentry *dentry, struct iattr *iattr)
 {
        struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
        int ret;
 
        lockdep_assert_held(&inode->v.i_rwsem);
 
-       ret = setattr_prepare(idmap, dentry, iattr);
+       ret   = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
+               setattr_prepare(idmap, dentry, iattr);
        if (ret)
                return ret;
 
@@ -1008,12 +1022,26 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
        return bch2_err_class(ret);
 }
 
+static int bch2_open(struct inode *vinode, struct file *file)
+{
+       if (file->f_flags & (O_WRONLY|O_RDWR)) {
+               struct bch_inode_info *inode = to_bch_ei(vinode);
+               struct bch_fs *c = inode->v.i_sb->s_fs_info;
+
+               int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
+               if (ret)
+                       return ret;
+       }
+
+       return generic_file_open(vinode, file);
+}
+
 static const struct file_operations bch_file_operations = {
+       .open           = bch2_open,
        .llseek         = bch2_llseek,
        .read_iter      = bch2_read_iter,
        .write_iter     = bch2_write_iter,
        .mmap           = bch2_mmap,
-       .open           = generic_file_open,
        .fsync          = bch2_fsync,
        .splice_read    = filemap_splice_read,
        .splice_write   = iter_file_splice_write,
index c5961bac19f0087772b2dbc01b386ea173c6b09f..33c0e783d54697b50c490309726b49eacb410189 100644 (file)
@@ -1202,24 +1202,27 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done)
        bch2_write_done(cl);
 }
 
+struct bucket_to_lock {
+       struct bpos             b;
+       unsigned                gen;
+       struct nocow_lock_bucket *l;
+};
+
 static void bch2_nocow_write(struct bch_write_op *op)
 {
        struct bch_fs *c = op->c;
        struct btree_trans *trans;
        struct btree_iter iter;
        struct bkey_s_c k;
-       struct {
-               struct bpos     b;
-               unsigned        gen;
-               struct nocow_lock_bucket *l;
-       } buckets[BCH_REPLICAS_MAX];
-       unsigned nr_buckets = 0;
+       DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets;
        u32 snapshot;
-       int ret, i;
+       struct bucket_to_lock *stale_at;
+       int ret;
 
        if (op->flags & BCH_WRITE_MOVE)
                return;
 
+       darray_init(&buckets);
        trans = bch2_trans_get(c);
 retry:
        bch2_trans_begin(trans);
@@ -1234,7 +1237,7 @@ retry:
        while (1) {
                struct bio *bio = &op->wbio.bio;
 
-               nr_buckets = 0;
+               buckets.nr = 0;
 
                k = bch2_btree_iter_peek_slot(&iter);
                ret = bkey_err(k);
@@ -1247,26 +1250,26 @@ retry:
                        break;
 
                if (bch2_keylist_realloc(&op->insert_keys,
-                                       op->inline_keys,
-                                       ARRAY_SIZE(op->inline_keys),
-                                       k.k->u64s))
+                                        op->inline_keys,
+                                        ARRAY_SIZE(op->inline_keys),
+                                        k.k->u64s))
                        break;
 
                /* Get iorefs before dropping btree locks: */
                struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
                bkey_for_each_ptr(ptrs, ptr) {
-                       buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
-                       buckets[nr_buckets].gen = ptr->gen;
-                       buckets[nr_buckets].l =
-                               bucket_nocow_lock(&c->nocow_locks,
-                                                 bucket_to_u64(buckets[nr_buckets].b));
-
-                       prefetch(buckets[nr_buckets].l);
+                       struct bpos b = PTR_BUCKET_POS(c, ptr);
+                       struct nocow_lock_bucket *l =
+                               bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b));
+                       prefetch(l);
 
                        if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
                                goto err_get_ioref;
 
-                       nr_buckets++;
+                       /* XXX allocating memory with btree locks held - rare */
+                       darray_push_gfp(&buckets, ((struct bucket_to_lock) {
+                                                  .b = b, .gen = ptr->gen, .l = l,
+                                                  }), GFP_KERNEL|__GFP_NOFAIL);
 
                        if (ptr->unwritten)
                                op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
@@ -1280,21 +1283,21 @@ retry:
                if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
                        bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
 
-               for (i = 0; i < nr_buckets; i++) {
-                       struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
-                       struct nocow_lock_bucket *l = buckets[i].l;
-                       bool stale;
+               darray_for_each(buckets, i) {
+                       struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode);
 
-                       __bch2_bucket_nocow_lock(&c->nocow_locks, l,
-                                                bucket_to_u64(buckets[i].b),
+                       __bch2_bucket_nocow_lock(&c->nocow_locks, i->l,
+                                                bucket_to_u64(i->b),
                                                 BUCKET_NOCOW_LOCK_UPDATE);
 
                        rcu_read_lock();
-                       stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
+                       bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen);
                        rcu_read_unlock();
 
-                       if (unlikely(stale))
+                       if (unlikely(stale)) {
+                               stale_at = i;
                                goto err_bucket_stale;
+                       }
                }
 
                bio = &op->wbio.bio;
@@ -1330,15 +1333,14 @@ err:
 
        if (ret) {
                bch_err_inum_offset_ratelimited(c,
-                               op->pos.inode,
-                               op->pos.offset << 9,
-                               "%s: btree lookup error %s",
-                               __func__, bch2_err_str(ret));
+                       op->pos.inode, op->pos.offset << 9,
+                       "%s: btree lookup error %s", __func__, bch2_err_str(ret));
                op->error = ret;
                op->flags |= BCH_WRITE_DONE;
        }
 
        bch2_trans_put(trans);
+       darray_exit(&buckets);
 
        /* fallback to cow write path? */
        if (!(op->flags & BCH_WRITE_DONE)) {
@@ -1358,24 +1360,21 @@ err:
        }
        return;
 err_get_ioref:
-       for (i = 0; i < nr_buckets; i++)
-               percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
+       darray_for_each(buckets, i)
+               percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref);
 
        /* Fall back to COW path: */
        goto out;
 err_bucket_stale:
-       while (i >= 0) {
-               bch2_bucket_nocow_unlock(&c->nocow_locks,
-                                        buckets[i].b,
-                                        BUCKET_NOCOW_LOCK_UPDATE);
-               --i;
+       darray_for_each(buckets, i) {
+               bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE);
+               if (i == stale_at)
+                       break;
        }
-       for (i = 0; i < nr_buckets; i++)
-               percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
 
        /* We can retry this: */
        ret = -BCH_ERR_transaction_restart;
-       goto out;
+       goto err_get_ioref;
 }
 
 static void __bch2_write(struct bch_write_op *op)
index 5e653eb81d54f8fdfcca37038eeaf5a1febdb8e7..187b0377bd40522ebb35235c64723f13bfaa5742 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (C) 2022 Kent Overstreet */
 
 #include <linux/err.h>
+#include <linux/bitmap.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -423,3 +424,24 @@ void bch2_prt_bitflags(struct printbuf *out,
                flags ^= BIT_ULL(bit);
        }
 }
+
+void bch2_prt_bitflags_vector(struct printbuf *out,
+                             const char * const list[],
+                             unsigned long *v, unsigned nr)
+{
+       bool first = true;
+       unsigned i;
+
+       for (i = 0; i < nr; i++)
+               if (!list[i]) {
+                       nr = i - 1;
+                       break;
+               }
+
+       for_each_set_bit(i, v, nr) {
+               if (!first)
+                       bch2_prt_printf(out, ",");
+               first = false;
+               bch2_prt_printf(out, "%s", list[i]);
+       }
+}
index 2191423d9f22895f9943b80c2134f150eeb26fad..9a4a56c409371570e26b1279850322ce1f2f3820 100644 (file)
@@ -124,6 +124,8 @@ void bch2_prt_units_u64(struct printbuf *, u64);
 void bch2_prt_units_s64(struct printbuf *, s64);
 void bch2_prt_string_option(struct printbuf *, const char * const[], size_t);
 void bch2_prt_bitflags(struct printbuf *, const char * const[], u64);
+void bch2_prt_bitflags_vector(struct printbuf *, const char * const[],
+                             unsigned long *, unsigned);
 
 /* Initializer for a heap allocated printbuf: */
 #define PRINTBUF ((struct printbuf) { .heap_allocated = true })
index 5933b02e49e0e5f83f4ecc0f66cf8403aa9a992b..3e49209db2dec37f9cd97aa6fdfd0fda61f479ca 100644 (file)
@@ -27,6 +27,7 @@
 #include "recovery.h"
 #include "replicas.h"
 #include "sb-clean.h"
+#include "sb-downgrade.h"
 #include "snapshot.h"
 #include "subvolume.h"
 #include "super-io.h"
@@ -503,7 +504,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
 }
 
 const char * const bch2_recovery_passes[] = {
-#define x(_fn, _when)  #_fn,
+#define x(_fn, ...)    #_fn,
        BCH_RECOVERY_PASSES()
 #undef x
        NULL
@@ -539,18 +540,47 @@ struct recovery_pass_fn {
 };
 
 static struct recovery_pass_fn recovery_pass_fns[] = {
-#define x(_fn, _when)  { .fn = bch2_##_fn, .when = _when },
+#define x(_fn, _id, _when)     { .fn = bch2_##_fn, .when = _when },
        BCH_RECOVERY_PASSES()
 #undef x
 };
 
-static void check_version_upgrade(struct bch_fs *c)
+u64 bch2_recovery_passes_to_stable(u64 v)
+{
+       static const u8 map[] = {
+#define x(n, id, ...)  [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
+       BCH_RECOVERY_PASSES()
+#undef x
+       };
+
+       u64 ret = 0;
+       for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+               if (v & BIT_ULL(i))
+                       ret |= BIT_ULL(map[i]);
+       return ret;
+}
+
+u64 bch2_recovery_passes_from_stable(u64 v)
+{
+       static const u8 map[] = {
+#define x(n, id, ...)  [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
+       BCH_RECOVERY_PASSES()
+#undef x
+       };
+
+       u64 ret = 0;
+       for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+               if (v & BIT_ULL(i))
+                       ret |= BIT_ULL(map[i]);
+       return ret;
+}
+
+static u64 check_version_upgrade(struct bch_fs *c)
 {
        unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
        unsigned latest_version = bcachefs_metadata_version_current;
        unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
        unsigned new_version = 0;
-       u64 recovery_passes;
 
        if (old_version < bcachefs_metadata_required_upgrade_below) {
                if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
@@ -594,27 +624,25 @@ static void check_version_upgrade(struct bch_fs *c)
                bch2_version_to_text(&buf, new_version);
                prt_newline(&buf);
 
-               recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
+               u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
                if (recovery_passes) {
-                       if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK)
-                               prt_str(&buf, "fsck required");
-                       else {
-                               prt_str(&buf, "running recovery passes: ");
-                               prt_bitflags(&buf, bch2_recovery_passes, recovery_passes);
-                       }
-
-                       c->recovery_passes_explicit |= recovery_passes;
-                       c->opts.fix_errors = FSCK_FIX_yes;
+                       prt_str(&buf, "  running recovery passes: ");
+                       prt_bitflags(&buf, bch2_recovery_passes, recovery_passes);
                }
 
                bch_info(c, "%s", buf.buf);
 
-               mutex_lock(&c->sb_lock);
                bch2_sb_upgrade(c, new_version);
-               mutex_unlock(&c->sb_lock);
 
                printbuf_exit(&buf);
+
+               struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+               ext->recovery_passes_required[0] |=
+                       cpu_to_le64(bch2_recovery_passes_to_stable(recovery_passes));
+               return true;
        }
+
+       return false;
 }
 
 u64 bch2_fsck_recovery_passes(void)
@@ -709,7 +737,6 @@ int bch2_fs_recovery(struct bch_fs *c)
        struct bch_sb_field_clean *clean = NULL;
        struct jset *last_journal_entry = NULL;
        u64 last_seq = 0, blacklist_seq, journal_seq;
-       bool write_sb = false;
        int ret = 0;
 
        if (c->sb.clean) {
@@ -737,15 +764,81 @@ int bch2_fs_recovery(struct bch_fs *c)
                goto err;
        }
 
-       if (!(c->opts.nochanges && c->opts.norecovery))
-               check_version_upgrade(c);
-
        if (c->opts.fsck && c->opts.norecovery) {
                bch_err(c, "cannot select both norecovery and fsck");
                ret = -EINVAL;
                goto err;
        }
 
+       if (!(c->opts.nochanges && c->opts.norecovery)) {
+               mutex_lock(&c->sb_lock);
+               bool write_sb = false;
+
+               struct bch_sb_field_ext *ext =
+                       bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
+               if (!ext) {
+                       ret = -BCH_ERR_ENOSPC_sb;
+                       mutex_unlock(&c->sb_lock);
+                       goto err;
+               }
+
+               if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
+                       ext->recovery_passes_required[0] |=
+                               cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
+                       write_sb = true;
+               }
+
+               u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+               if (sb_passes) {
+                       struct printbuf buf = PRINTBUF;
+                       prt_str(&buf, "superblock requires following recovery passes to be run:\n  ");
+                       prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
+                       bch_info(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+                       c->recovery_passes_explicit |= sb_passes;
+               }
+
+               if (bcachefs_metadata_version_current < c->sb.version) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "Version downgrade required:\n");
+
+                       u64 passes = ext->recovery_passes_required[0];
+                       ret = bch2_sb_set_downgrade(c,
+                                       BCH_VERSION_MINOR(bcachefs_metadata_version_current),
+                                       BCH_VERSION_MINOR(c->sb.version));
+                       if (ret) {
+                               mutex_unlock(&c->sb_lock);
+                               goto err;
+                       }
+
+                       passes = ext->recovery_passes_required[0] & ~passes;
+                       if (passes) {
+                               prt_str(&buf, "  running recovery passes: ");
+                               prt_bitflags(&buf, bch2_recovery_passes,
+                                            bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
+                       }
+
+                       bch_info(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+
+                       bch2_sb_maybe_downgrade(c);
+                       write_sb = true;
+               }
+
+               if (check_version_upgrade(c))
+                       write_sb = true;
+
+               if (write_sb)
+                       bch2_write_super(c);
+
+               c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+               mutex_unlock(&c->sb_lock);
+       }
+
+       if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
+
        ret = bch2_blacklist_table_initialize(c);
        if (ret) {
                bch_err(c, "error initializing blacklist table");
@@ -882,11 +975,6 @@ use_clean:
        if (ret)
                goto err;
 
-       if (c->opts.fsck &&
-           (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
-            BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)))
-               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
-
        ret = bch2_run_recovery_passes(c);
        if (ret)
                goto err;
@@ -925,16 +1013,30 @@ use_clean:
        }
 
        mutex_lock(&c->sb_lock);
+       bool write_sb = false;
+
        if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
                SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version));
                write_sb = true;
        }
 
-       if (!test_bit(BCH_FS_error, &c->flags)) {
+       if (!test_bit(BCH_FS_error, &c->flags) &&
+           !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) {
                c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
                write_sb = true;
        }
 
+       if (!test_bit(BCH_FS_error, &c->flags)) {
+               struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+               if (ext &&
+                   (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
+                    !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
+                       memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
+                       memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
+                       write_sb = true;
+               }
+       }
+
        if (c->opts.fsck &&
            !test_bit(BCH_FS_error, &c->flags) &&
            !test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
index 175a21a0bf26dfdfc759b1cbbb907ea4a47626fe..4e9d24719b2e85c356fa88a0bd3923c3a2ff30cc 100644 (file)
@@ -4,6 +4,9 @@
 
 extern const char * const bch2_recovery_passes[];
 
+u64 bch2_recovery_passes_to_stable(u64 v);
+u64 bch2_recovery_passes_from_stable(u64 v);
+
 /*
  * For when we need to rewind recovery passes and run a pass we skipped:
  */
index 2d139864e27fe145ffd57615719f97cfd74645d4..fa0c8efd2a1b42450535474079b791aa2e6e9938 100644 (file)
@@ -8,45 +8,57 @@
 #define PASS_ALWAYS            BIT(3)
 #define PASS_ONLINE            BIT(4)
 
+/*
+ * Passes may be reordered, but the second field is a persistent identifier and
+ * must never change:
+ */
 #define BCH_RECOVERY_PASSES()                                                  \
-       x(alloc_read,                           PASS_ALWAYS)                    \
-       x(stripes_read,                         PASS_ALWAYS)                    \
-       x(initialize_subvolumes,                0)                              \
-       x(snapshots_read,                       PASS_ALWAYS)                    \
-       x(check_topology,                       0)                              \
-       x(check_allocations,                    PASS_FSCK)                      \
-       x(trans_mark_dev_sbs,                   PASS_ALWAYS|PASS_SILENT)        \
-       x(fs_journal_alloc,                     PASS_ALWAYS|PASS_SILENT)        \
-       x(set_may_go_rw,                        PASS_ALWAYS|PASS_SILENT)        \
-       x(journal_replay,                       PASS_ALWAYS)                    \
-       x(check_alloc_info,                     PASS_ONLINE|PASS_FSCK)          \
-       x(check_lrus,                           PASS_ONLINE|PASS_FSCK)          \
-       x(check_btree_backpointers,             PASS_ONLINE|PASS_FSCK)          \
-       x(check_backpointers_to_extents,        PASS_ONLINE|PASS_FSCK)          \
-       x(check_extents_to_backpointers,        PASS_ONLINE|PASS_FSCK)          \
-       x(check_alloc_to_lru_refs,              PASS_ONLINE|PASS_FSCK)          \
-       x(fs_freespace_init,                    PASS_ALWAYS|PASS_SILENT)        \
-       x(bucket_gens_init,                     0)                              \
-       x(check_snapshot_trees,                 PASS_ONLINE|PASS_FSCK)          \
-       x(check_snapshots,                      PASS_ONLINE|PASS_FSCK)          \
-       x(check_subvols,                        PASS_ONLINE|PASS_FSCK)          \
-       x(delete_dead_snapshots,                PASS_ONLINE|PASS_FSCK)          \
-       x(fs_upgrade_for_subvolumes,            0)                              \
-       x(resume_logged_ops,                    PASS_ALWAYS)                    \
-       x(check_inodes,                         PASS_FSCK)                      \
-       x(check_extents,                        PASS_FSCK)                      \
-       x(check_indirect_extents,               PASS_FSCK)                      \
-       x(check_dirents,                        PASS_FSCK)                      \
-       x(check_xattrs,                         PASS_FSCK)                      \
-       x(check_root,                           PASS_ONLINE|PASS_FSCK)          \
-       x(check_directory_structure,            PASS_ONLINE|PASS_FSCK)          \
-       x(check_nlinks,                         PASS_FSCK)                      \
-       x(delete_dead_inodes,                   PASS_FSCK|PASS_UNCLEAN)         \
-       x(fix_reflink_p,                        0)                              \
-       x(set_fs_needs_rebalance,               0)                              \
+       x(alloc_read,                            0, PASS_ALWAYS)                \
+       x(stripes_read,                          1, PASS_ALWAYS)                \
+       x(initialize_subvolumes,                 2, 0)                          \
+       x(snapshots_read,                        3, PASS_ALWAYS)                \
+       x(check_topology,                        4, 0)                          \
+       x(check_allocations,                     5, PASS_FSCK)                  \
+       x(trans_mark_dev_sbs,                    6, PASS_ALWAYS|PASS_SILENT)    \
+       x(fs_journal_alloc,                      7, PASS_ALWAYS|PASS_SILENT)    \
+       x(set_may_go_rw,                         8, PASS_ALWAYS|PASS_SILENT)    \
+       x(journal_replay,                        9, PASS_ALWAYS)                \
+       x(check_alloc_info,                     10, PASS_ONLINE|PASS_FSCK)      \
+       x(check_lrus,                           11, PASS_ONLINE|PASS_FSCK)      \
+       x(check_btree_backpointers,             12, PASS_ONLINE|PASS_FSCK)      \
+       x(check_backpointers_to_extents,        13, PASS_ONLINE|PASS_FSCK)      \
+       x(check_extents_to_backpointers,        14, PASS_ONLINE|PASS_FSCK)      \
+       x(check_alloc_to_lru_refs,              15, PASS_ONLINE|PASS_FSCK)      \
+       x(fs_freespace_init,                    16, PASS_ALWAYS|PASS_SILENT)    \
+       x(bucket_gens_init,                     17, 0)                          \
+       x(check_snapshot_trees,                 18, PASS_ONLINE|PASS_FSCK)      \
+       x(check_snapshots,                      19, PASS_ONLINE|PASS_FSCK)      \
+       x(check_subvols,                        20, PASS_ONLINE|PASS_FSCK)      \
+       x(delete_dead_snapshots,                21, PASS_ONLINE|PASS_FSCK)      \
+       x(fs_upgrade_for_subvolumes,            22, 0)                          \
+       x(resume_logged_ops,                    23, PASS_ALWAYS)                \
+       x(check_inodes,                         24, PASS_FSCK)                  \
+       x(check_extents,                        25, PASS_FSCK)                  \
+       x(check_indirect_extents,               26, PASS_FSCK)                  \
+       x(check_dirents,                        27, PASS_FSCK)                  \
+       x(check_xattrs,                         28, PASS_FSCK)                  \
+       x(check_root,                           29, PASS_ONLINE|PASS_FSCK)      \
+       x(check_directory_structure,            30, PASS_ONLINE|PASS_FSCK)      \
+       x(check_nlinks,                         31, PASS_FSCK)                  \
+       x(delete_dead_inodes,                   32, PASS_FSCK|PASS_UNCLEAN)     \
+       x(fix_reflink_p,                        33, 0)                          \
+       x(set_fs_needs_rebalance,               34, 0)                          \
 
+/* We normally enumerate recovery passes in the order we run them: */
 enum bch_recovery_pass {
-#define x(n, when)     BCH_RECOVERY_PASS_##n,
+#define x(n, id, when) BCH_RECOVERY_PASS_##n,
+       BCH_RECOVERY_PASSES()
+#undef x
+};
+
+/* But we also need stable identifiers that can be used in the superblock */
+enum bch_recovery_pass_stable {
+#define x(n, id, when) BCH_RECOVERY_PASS_STABLE_##n = id,
        BCH_RECOVERY_PASSES()
 #undef x
 };
index ca14f86fb2a9954af5cbaf9b161010cb971061d3..9632f36f5f318134065cfdbae613b422cce98f6a 100644 (file)
@@ -328,8 +328,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
 
        mutex_lock(&c->sb_lock);
        SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-
-       bch2_sb_maybe_downgrade(c);
        c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
 
        ret = bch2_write_super(c);
diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c
new file mode 100644 (file)
index 0000000..d2a92fb
--- /dev/null
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Superblock section that contains a list of recovery passes to run when
+ * downgrading past a given version
+ */
+
+#include "bcachefs.h"
+#include "darray.h"
+#include "recovery.h"
+#include "sb-downgrade.h"
+#include "sb-errors.h"
+#include "super-io.h"
+
+/*
+ * Downgrade table:
+ * When dowgrading past certain versions, we need to run certain recovery passes
+ * and fix certain errors:
+ *
+ * x(version, recovery_passes, errors...)
+ */
+
+#define DOWNGRADE_TABLE()                                      \
+       x(disk_accounting_v2,                                   \
+         BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info),          \
+         BCH_FSCK_ERR_dev_usage_buckets_wrong)
+
+struct downgrade_entry {
+       u64             recovery_passes;
+       u16             version;
+       u16             nr_errors;
+       const u16       *errors;
+};
+
+#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ };
+DOWNGRADE_TABLE()
+#undef x
+
+static const struct downgrade_entry downgrade_table[] = {
+#define x(ver, passes, ...) {                                  \
+       .recovery_passes        = passes,                       \
+       .version                = bcachefs_metadata_version_##ver,\
+       .nr_errors              = ARRAY_SIZE(ver_##errors),     \
+       .errors                 = ver_##errors,                 \
+},
+DOWNGRADE_TABLE()
+#undef x
+};
+
+static inline const struct bch_sb_field_downgrade_entry *
+downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
+{
+       return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
+}
+
+#define for_each_downgrade_entry(_d, _i)                                               \
+       for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries;             \
+            (void *) _i        < vstruct_end(&(_d)->field) &&                          \
+            (void *) &_i->errors[0] < vstruct_end(&(_d)->field);                       \
+            _i = downgrade_entry_next_c(_i))
+
+static inline unsigned bch2_sb_field_downgrade_u64s(unsigned nr)
+{
+       return (sizeof(struct bch_sb_field_downgrade) +
+               sizeof(struct bch_sb_field_downgrade_entry) * nr) / sizeof(u64);
+}
+
+static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
+                                     struct printbuf *err)
+{
+       struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+       for_each_downgrade_entry(e, i) {
+               if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
+                   BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
+                       prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
+                                  BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
+                                  BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
+                       return -BCH_ERR_invalid_sb_downgrade;
+               }
+       }
+
+       return 0;
+}
+
+static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
+                                     struct bch_sb_field *f)
+{
+       struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
+
+       if (out->nr_tabstops <= 1)
+               printbuf_tabstop_push(out, 16);
+
+       for_each_downgrade_entry(e, i) {
+               prt_str(out, "version:");
+               prt_tab(out);
+               bch2_version_to_text(out, le16_to_cpu(i->version));
+               prt_newline(out);
+
+               prt_str(out, "recovery passes:");
+               prt_tab(out);
+               prt_bitflags(out, bch2_recovery_passes,
+                            bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
+               prt_newline(out);
+
+               prt_str(out, "errors:");
+               prt_tab(out);
+               bool first = true;
+               for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+                       if (!first)
+                               prt_char(out, ',');
+                       first = false;
+                       unsigned e = le16_to_cpu(i->errors[j]);
+                       prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
+               }
+               prt_newline(out);
+       }
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
+       .validate       = bch2_sb_downgrade_validate,
+       .to_text        = bch2_sb_downgrade_to_text,
+};
+
+int bch2_sb_downgrade_update(struct bch_fs *c)
+{
+       darray_char table = {};
+       int ret = 0;
+
+       for (const struct downgrade_entry *src = downgrade_table;
+            src < downgrade_table + ARRAY_SIZE(downgrade_table);
+            src++) {
+               if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+                       continue;
+
+               struct bch_sb_field_downgrade_entry *dst;
+               unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
+
+               ret = darray_make_room(&table, bytes);
+               if (ret)
+                       goto out;
+
+               dst = (void *) &darray_top(table);
+               dst->version = cpu_to_le16(src->version);
+               dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
+               dst->recovery_passes[1] = 0;
+               dst->nr_errors          = cpu_to_le16(src->nr_errors);
+               for (unsigned i = 0; i < src->nr_errors; i++)
+                       dst->errors[i] = cpu_to_le16(src->errors[i]);
+
+               table.nr += bytes;
+       }
+
+       struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+
+       unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
+
+       if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
+               goto out;
+
+       d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
+       if (!d) {
+               ret = -BCH_ERR_ENOSPC_sb_downgrade;
+               goto out;
+       }
+
+       memcpy(d->entries, table.data, table.nr);
+       memset_u64s_tail(d->entries, 0, table.nr);
+out:
+       darray_exit(&table);
+       return ret;
+}
+
+int bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
+{
+       struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
+       if (!d)
+               return 0;
+
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+       for_each_downgrade_entry(d, i) {
+               unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
+               if (new_minor < minor && minor <= old_minor) {
+                       ext->recovery_passes_required[0] |= i->recovery_passes[0];
+                       ext->recovery_passes_required[1] |= i->recovery_passes[1];
+
+                       for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
+                               unsigned e = le16_to_cpu(i->errors[j]);
+                               if (e < BCH_SB_ERR_MAX)
+                                       __set_bit(e, c->sb.errors_silent);
+                               if (e < sizeof(ext->errors_silent) * 8)
+                                       ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64));
+                       }
+               }
+       }
+
+       return 0;
+}
diff --git a/libbcachefs/sb-downgrade.h b/libbcachefs/sb-downgrade.h
new file mode 100644 (file)
index 0000000..0703ad7
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_DOWNGRADE_H
+#define _BCACHEFS_SB_DOWNGRADE_H
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
+
+int bch2_sb_downgrade_update(struct bch_fs *);
+int bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
+
+#endif /* _BCACHEFS_SB_DOWNGRADE_H */
index f0930ab7f036eb30fe5d40708f4b82a1e68907f2..5f5bcae391fb9fcde0d306870adbb6779e06cda1 100644 (file)
@@ -4,7 +4,7 @@
 #include "sb-errors.h"
 #include "super-io.h"
 
-static const char * const bch2_sb_error_strs[] = {
+const char * const bch2_sb_error_strs[] = {
 #define x(t, n, ...) [n] = #t,
        BCH_SB_ERRS()
        NULL
@@ -20,9 +20,7 @@ static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id
 
 static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
 {
-       return e
-               ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0])
-               : 0;
+       return bch2_sb_field_nr_entries(e);
 }
 
 static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)
index 92289ce62cbffdcb948d58d5189b8dbac60eda82..8889001e7db4bd595192d338ea0eb24c115550b1 100644 (file)
@@ -4,259 +4,7 @@
 
 #include "sb-errors_types.h"
 
-#define BCH_SB_ERRS()                                                  \
-       x(clean_but_journal_not_empty,                          0)      \
-       x(dirty_but_no_journal_entries,                         1)      \
-       x(dirty_but_no_journal_entries_post_drop_nonflushes,    2)      \
-       x(sb_clean_journal_seq_mismatch,                        3)      \
-       x(sb_clean_btree_root_mismatch,                         4)      \
-       x(sb_clean_missing,                                     5)      \
-       x(jset_unsupported_version,                             6)      \
-       x(jset_unknown_csum,                                    7)      \
-       x(jset_last_seq_newer_than_seq,                         8)      \
-       x(jset_past_bucket_end,                                 9)      \
-       x(jset_seq_blacklisted,                                 10)     \
-       x(journal_entries_missing,                              11)     \
-       x(journal_entry_replicas_not_marked,                    12)     \
-       x(journal_entry_past_jset_end,                          13)     \
-       x(journal_entry_replicas_data_mismatch,                 14)     \
-       x(journal_entry_bkey_u64s_0,                            15)     \
-       x(journal_entry_bkey_past_end,                          16)     \
-       x(journal_entry_bkey_bad_format,                        17)     \
-       x(journal_entry_bkey_invalid,                           18)     \
-       x(journal_entry_btree_root_bad_size,                    19)     \
-       x(journal_entry_blacklist_bad_size,                     20)     \
-       x(journal_entry_blacklist_v2_bad_size,                  21)     \
-       x(journal_entry_blacklist_v2_start_past_end,            22)     \
-       x(journal_entry_usage_bad_size,                         23)     \
-       x(journal_entry_data_usage_bad_size,                    24)     \
-       x(journal_entry_clock_bad_size,                         25)     \
-       x(journal_entry_clock_bad_rw,                           26)     \
-       x(journal_entry_dev_usage_bad_size,                     27)     \
-       x(journal_entry_dev_usage_bad_dev,                      28)     \
-       x(journal_entry_dev_usage_bad_pad,                      29)     \
-       x(btree_node_unreadable,                                30)     \
-       x(btree_node_fault_injected,                            31)     \
-       x(btree_node_bad_magic,                                 32)     \
-       x(btree_node_bad_seq,                                   33)     \
-       x(btree_node_unsupported_version,                       34)     \
-       x(btree_node_bset_older_than_sb_min,                    35)     \
-       x(btree_node_bset_newer_than_sb,                        36)     \
-       x(btree_node_data_missing,                              37)     \
-       x(btree_node_bset_after_end,                            38)     \
-       x(btree_node_replicas_sectors_written_mismatch,         39)     \
-       x(btree_node_replicas_data_mismatch,                    40)     \
-       x(bset_unknown_csum,                                    41)     \
-       x(bset_bad_csum,                                        42)     \
-       x(bset_past_end_of_btree_node,                          43)     \
-       x(bset_wrong_sector_offset,                             44)     \
-       x(bset_empty,                                           45)     \
-       x(bset_bad_seq,                                         46)     \
-       x(bset_blacklisted_journal_seq,                         47)     \
-       x(first_bset_blacklisted_journal_seq,                   48)     \
-       x(btree_node_bad_btree,                                 49)     \
-       x(btree_node_bad_level,                                 50)     \
-       x(btree_node_bad_min_key,                               51)     \
-       x(btree_node_bad_max_key,                               52)     \
-       x(btree_node_bad_format,                                53)     \
-       x(btree_node_bkey_past_bset_end,                        54)     \
-       x(btree_node_bkey_bad_format,                           55)     \
-       x(btree_node_bad_bkey,                                  56)     \
-       x(btree_node_bkey_out_of_order,                         57)     \
-       x(btree_root_bkey_invalid,                              58)     \
-       x(btree_root_read_error,                                59)     \
-       x(btree_root_bad_min_key,                               60)     \
-       x(btree_root_bad_max_key,                               61)     \
-       x(btree_node_read_error,                                62)     \
-       x(btree_node_topology_bad_min_key,                      63)     \
-       x(btree_node_topology_bad_max_key,                      64)     \
-       x(btree_node_topology_overwritten_by_prev_node,         65)     \
-       x(btree_node_topology_overwritten_by_next_node,         66)     \
-       x(btree_node_topology_interior_node_empty,              67)     \
-       x(fs_usage_hidden_wrong,                                68)     \
-       x(fs_usage_btree_wrong,                                 69)     \
-       x(fs_usage_data_wrong,                                  70)     \
-       x(fs_usage_cached_wrong,                                71)     \
-       x(fs_usage_reserved_wrong,                              72)     \
-       x(fs_usage_persistent_reserved_wrong,                   73)     \
-       x(fs_usage_nr_inodes_wrong,                             74)     \
-       x(fs_usage_replicas_wrong,                              75)     \
-       x(dev_usage_buckets_wrong,                              76)     \
-       x(dev_usage_sectors_wrong,                              77)     \
-       x(dev_usage_fragmented_wrong,                           78)     \
-       x(dev_usage_buckets_ec_wrong,                           79)     \
-       x(bkey_version_in_future,                               80)     \
-       x(bkey_u64s_too_small,                                  81)     \
-       x(bkey_invalid_type_for_btree,                          82)     \
-       x(bkey_extent_size_zero,                                83)     \
-       x(bkey_extent_size_greater_than_offset,                 84)     \
-       x(bkey_size_nonzero,                                    85)     \
-       x(bkey_snapshot_nonzero,                                86)     \
-       x(bkey_snapshot_zero,                                   87)     \
-       x(bkey_at_pos_max,                                      88)     \
-       x(bkey_before_start_of_btree_node,                      89)     \
-       x(bkey_after_end_of_btree_node,                         90)     \
-       x(bkey_val_size_nonzero,                                91)     \
-       x(bkey_val_size_too_small,                              92)     \
-       x(alloc_v1_val_size_bad,                                93)     \
-       x(alloc_v2_unpack_error,                                94)     \
-       x(alloc_v3_unpack_error,                                95)     \
-       x(alloc_v4_val_size_bad,                                96)     \
-       x(alloc_v4_backpointers_start_bad,                      97)     \
-       x(alloc_key_data_type_bad,                              98)     \
-       x(alloc_key_empty_but_have_data,                        99)     \
-       x(alloc_key_dirty_sectors_0,                            100)    \
-       x(alloc_key_data_type_inconsistency,                    101)    \
-       x(alloc_key_to_missing_dev_bucket,                      102)    \
-       x(alloc_key_cached_inconsistency,                       103)    \
-       x(alloc_key_cached_but_read_time_zero,                  104)    \
-       x(alloc_key_to_missing_lru_entry,                       105)    \
-       x(alloc_key_data_type_wrong,                            106)    \
-       x(alloc_key_gen_wrong,                                  107)    \
-       x(alloc_key_dirty_sectors_wrong,                        108)    \
-       x(alloc_key_cached_sectors_wrong,                       109)    \
-       x(alloc_key_stripe_wrong,                               110)    \
-       x(alloc_key_stripe_redundancy_wrong,                    111)    \
-       x(bucket_sector_count_overflow,                         112)    \
-       x(bucket_metadata_type_mismatch,                        113)    \
-       x(need_discard_key_wrong,                               114)    \
-       x(freespace_key_wrong,                                  115)    \
-       x(freespace_hole_missing,                               116)    \
-       x(bucket_gens_val_size_bad,                             117)    \
-       x(bucket_gens_key_wrong,                                118)    \
-       x(bucket_gens_hole_wrong,                               119)    \
-       x(bucket_gens_to_invalid_dev,                           120)    \
-       x(bucket_gens_to_invalid_buckets,                       121)    \
-       x(bucket_gens_nonzero_for_invalid_buckets,              122)    \
-       x(need_discard_freespace_key_to_invalid_dev_bucket,     123)    \
-       x(need_discard_freespace_key_bad,                       124)    \
-       x(backpointer_pos_wrong,                                125)    \
-       x(backpointer_to_missing_device,                        126)    \
-       x(backpointer_to_missing_alloc,                         127)    \
-       x(backpointer_to_missing_ptr,                           128)    \
-       x(lru_entry_at_time_0,                                  129)    \
-       x(lru_entry_to_invalid_bucket,                          130)    \
-       x(lru_entry_bad,                                        131)    \
-       x(btree_ptr_val_too_big,                                132)    \
-       x(btree_ptr_v2_val_too_big,                             133)    \
-       x(btree_ptr_has_non_ptr,                                134)    \
-       x(extent_ptrs_invalid_entry,                            135)    \
-       x(extent_ptrs_no_ptrs,                                  136)    \
-       x(extent_ptrs_too_many_ptrs,                            137)    \
-       x(extent_ptrs_redundant_crc,                            138)    \
-       x(extent_ptrs_redundant_stripe,                         139)    \
-       x(extent_ptrs_unwritten,                                140)    \
-       x(extent_ptrs_written_and_unwritten,                    141)    \
-       x(ptr_to_invalid_device,                                142)    \
-       x(ptr_to_duplicate_device,                              143)    \
-       x(ptr_after_last_bucket,                                144)    \
-       x(ptr_before_first_bucket,                              145)    \
-       x(ptr_spans_multiple_buckets,                           146)    \
-       x(ptr_to_missing_backpointer,                           147)    \
-       x(ptr_to_missing_alloc_key,                             148)    \
-       x(ptr_to_missing_replicas_entry,                        149)    \
-       x(ptr_to_missing_stripe,                                150)    \
-       x(ptr_to_incorrect_stripe,                              151)    \
-       x(ptr_gen_newer_than_bucket_gen,                        152)    \
-       x(ptr_too_stale,                                        153)    \
-       x(stale_dirty_ptr,                                      154)    \
-       x(ptr_bucket_data_type_mismatch,                        155)    \
-       x(ptr_cached_and_erasure_coded,                         156)    \
-       x(ptr_crc_uncompressed_size_too_small,                  157)    \
-       x(ptr_crc_csum_type_unknown,                            158)    \
-       x(ptr_crc_compression_type_unknown,                     159)    \
-       x(ptr_crc_redundant,                                    160)    \
-       x(ptr_crc_uncompressed_size_too_big,                    161)    \
-       x(ptr_crc_nonce_mismatch,                               162)    \
-       x(ptr_stripe_redundant,                                 163)    \
-       x(reservation_key_nr_replicas_invalid,                  164)    \
-       x(reflink_v_refcount_wrong,                             165)    \
-       x(reflink_p_to_missing_reflink_v,                       166)    \
-       x(stripe_pos_bad,                                       167)    \
-       x(stripe_val_size_bad,                                  168)    \
-       x(stripe_sector_count_wrong,                            169)    \
-       x(snapshot_tree_pos_bad,                                170)    \
-       x(snapshot_tree_to_missing_snapshot,                    171)    \
-       x(snapshot_tree_to_missing_subvol,                      172)    \
-       x(snapshot_tree_to_wrong_subvol,                        173)    \
-       x(snapshot_tree_to_snapshot_subvol,                     174)    \
-       x(snapshot_pos_bad,                                     175)    \
-       x(snapshot_parent_bad,                                  176)    \
-       x(snapshot_children_not_normalized,                     177)    \
-       x(snapshot_child_duplicate,                             178)    \
-       x(snapshot_child_bad,                                   179)    \
-       x(snapshot_skiplist_not_normalized,                     180)    \
-       x(snapshot_skiplist_bad,                                181)    \
-       x(snapshot_should_not_have_subvol,                      182)    \
-       x(snapshot_to_bad_snapshot_tree,                        183)    \
-       x(snapshot_bad_depth,                                   184)    \
-       x(snapshot_bad_skiplist,                                185)    \
-       x(subvol_pos_bad,                                       186)    \
-       x(subvol_not_master_and_not_snapshot,                   187)    \
-       x(subvol_to_missing_root,                               188)    \
-       x(subvol_root_wrong_bi_subvol,                          189)    \
-       x(bkey_in_missing_snapshot,                             190)    \
-       x(inode_pos_inode_nonzero,                              191)    \
-       x(inode_pos_blockdev_range,                             192)    \
-       x(inode_unpack_error,                                   193)    \
-       x(inode_str_hash_invalid,                               194)    \
-       x(inode_v3_fields_start_bad,                            195)    \
-       x(inode_snapshot_mismatch,                              196)    \
-       x(inode_unlinked_but_clean,                             197)    \
-       x(inode_unlinked_but_nlink_nonzero,                     198)    \
-       x(inode_checksum_type_invalid,                          199)    \
-       x(inode_compression_type_invalid,                       200)    \
-       x(inode_subvol_root_but_not_dir,                        201)    \
-       x(inode_i_size_dirty_but_clean,                         202)    \
-       x(inode_i_sectors_dirty_but_clean,                      203)    \
-       x(inode_i_sectors_wrong,                                204)    \
-       x(inode_dir_wrong_nlink,                                205)    \
-       x(inode_dir_multiple_links,                             206)    \
-       x(inode_multiple_links_but_nlink_0,                     207)    \
-       x(inode_wrong_backpointer,                              208)    \
-       x(inode_wrong_nlink,                                    209)    \
-       x(inode_unreachable,                                    210)    \
-       x(deleted_inode_but_clean,                              211)    \
-       x(deleted_inode_missing,                                212)    \
-       x(deleted_inode_is_dir,                                 213)    \
-       x(deleted_inode_not_unlinked,                           214)    \
-       x(extent_overlapping,                                   215)    \
-       x(extent_in_missing_inode,                              216)    \
-       x(extent_in_non_reg_inode,                              217)    \
-       x(extent_past_end_of_inode,                             218)    \
-       x(dirent_empty_name,                                    219)    \
-       x(dirent_val_too_big,                                   220)    \
-       x(dirent_name_too_long,                                 221)    \
-       x(dirent_name_embedded_nul,                             222)    \
-       x(dirent_name_dot_or_dotdot,                            223)    \
-       x(dirent_name_has_slash,                                224)    \
-       x(dirent_d_type_wrong,                                  225)    \
-       x(dirent_d_parent_subvol_wrong,                         226)    \
-       x(dirent_in_missing_dir_inode,                          227)    \
-       x(dirent_in_non_dir_inode,                              228)    \
-       x(dirent_to_missing_inode,                              229)    \
-       x(dirent_to_missing_subvol,                             230)    \
-       x(dirent_to_itself,                                     231)    \
-       x(quota_type_invalid,                                   232)    \
-       x(xattr_val_size_too_small,                             233)    \
-       x(xattr_val_size_too_big,                               234)    \
-       x(xattr_invalid_type,                                   235)    \
-       x(xattr_name_invalid_chars,                             236)    \
-       x(xattr_in_missing_inode,                               237)    \
-       x(root_subvol_missing,                                  238)    \
-       x(root_dir_missing,                                     239)    \
-       x(root_inode_not_dir,                                   240)    \
-       x(dir_loop,                                             241)    \
-       x(hash_table_key_duplicate,                             242)    \
-       x(hash_table_key_wrong_offset,                          243)    \
-       x(unlinked_inode_not_on_deleted_list,                   244)
-
-enum bch_sb_error_id {
-#define x(t, n) BCH_FSCK_ERR_##t = n,
-       BCH_SB_ERRS()
-#undef x
-       BCH_SB_ERR_MAX
-};
+extern const char * const bch2_sb_error_strs[];
 
 extern const struct bch_sb_field_ops bch_sb_field_ops_errors;
 
index b1c099843a396a85e42f391eba5fe2c72199581f..e7be1f9bdaabb39190e9d598683ae5fc171a14b6 100644 (file)
@@ -4,6 +4,260 @@
 
 #include "darray.h"
 
+#define BCH_SB_ERRS()                                                  \
+       x(clean_but_journal_not_empty,                          0)      \
+       x(dirty_but_no_journal_entries,                         1)      \
+       x(dirty_but_no_journal_entries_post_drop_nonflushes,    2)      \
+       x(sb_clean_journal_seq_mismatch,                        3)      \
+       x(sb_clean_btree_root_mismatch,                         4)      \
+       x(sb_clean_missing,                                     5)      \
+       x(jset_unsupported_version,                             6)      \
+       x(jset_unknown_csum,                                    7)      \
+       x(jset_last_seq_newer_than_seq,                         8)      \
+       x(jset_past_bucket_end,                                 9)      \
+       x(jset_seq_blacklisted,                                 10)     \
+       x(journal_entries_missing,                              11)     \
+       x(journal_entry_replicas_not_marked,                    12)     \
+       x(journal_entry_past_jset_end,                          13)     \
+       x(journal_entry_replicas_data_mismatch,                 14)     \
+       x(journal_entry_bkey_u64s_0,                            15)     \
+       x(journal_entry_bkey_past_end,                          16)     \
+       x(journal_entry_bkey_bad_format,                        17)     \
+       x(journal_entry_bkey_invalid,                           18)     \
+       x(journal_entry_btree_root_bad_size,                    19)     \
+       x(journal_entry_blacklist_bad_size,                     20)     \
+       x(journal_entry_blacklist_v2_bad_size,                  21)     \
+       x(journal_entry_blacklist_v2_start_past_end,            22)     \
+       x(journal_entry_usage_bad_size,                         23)     \
+       x(journal_entry_data_usage_bad_size,                    24)     \
+       x(journal_entry_clock_bad_size,                         25)     \
+       x(journal_entry_clock_bad_rw,                           26)     \
+       x(journal_entry_dev_usage_bad_size,                     27)     \
+       x(journal_entry_dev_usage_bad_dev,                      28)     \
+       x(journal_entry_dev_usage_bad_pad,                      29)     \
+       x(btree_node_unreadable,                                30)     \
+       x(btree_node_fault_injected,                            31)     \
+       x(btree_node_bad_magic,                                 32)     \
+       x(btree_node_bad_seq,                                   33)     \
+       x(btree_node_unsupported_version,                       34)     \
+       x(btree_node_bset_older_than_sb_min,                    35)     \
+       x(btree_node_bset_newer_than_sb,                        36)     \
+       x(btree_node_data_missing,                              37)     \
+       x(btree_node_bset_after_end,                            38)     \
+       x(btree_node_replicas_sectors_written_mismatch,         39)     \
+       x(btree_node_replicas_data_mismatch,                    40)     \
+       x(bset_unknown_csum,                                    41)     \
+       x(bset_bad_csum,                                        42)     \
+       x(bset_past_end_of_btree_node,                          43)     \
+       x(bset_wrong_sector_offset,                             44)     \
+       x(bset_empty,                                           45)     \
+       x(bset_bad_seq,                                         46)     \
+       x(bset_blacklisted_journal_seq,                         47)     \
+       x(first_bset_blacklisted_journal_seq,                   48)     \
+       x(btree_node_bad_btree,                                 49)     \
+       x(btree_node_bad_level,                                 50)     \
+       x(btree_node_bad_min_key,                               51)     \
+       x(btree_node_bad_max_key,                               52)     \
+       x(btree_node_bad_format,                                53)     \
+       x(btree_node_bkey_past_bset_end,                        54)     \
+       x(btree_node_bkey_bad_format,                           55)     \
+       x(btree_node_bad_bkey,                                  56)     \
+       x(btree_node_bkey_out_of_order,                         57)     \
+       x(btree_root_bkey_invalid,                              58)     \
+       x(btree_root_read_error,                                59)     \
+       x(btree_root_bad_min_key,                               60)     \
+       x(btree_root_bad_max_key,                               61)     \
+       x(btree_node_read_error,                                62)     \
+       x(btree_node_topology_bad_min_key,                      63)     \
+       x(btree_node_topology_bad_max_key,                      64)     \
+       x(btree_node_topology_overwritten_by_prev_node,         65)     \
+       x(btree_node_topology_overwritten_by_next_node,         66)     \
+       x(btree_node_topology_interior_node_empty,              67)     \
+       x(fs_usage_hidden_wrong,                                68)     \
+       x(fs_usage_btree_wrong,                                 69)     \
+       x(fs_usage_data_wrong,                                  70)     \
+       x(fs_usage_cached_wrong,                                71)     \
+       x(fs_usage_reserved_wrong,                              72)     \
+       x(fs_usage_persistent_reserved_wrong,                   73)     \
+       x(fs_usage_nr_inodes_wrong,                             74)     \
+       x(fs_usage_replicas_wrong,                              75)     \
+       x(dev_usage_buckets_wrong,                              76)     \
+       x(dev_usage_sectors_wrong,                              77)     \
+       x(dev_usage_fragmented_wrong,                           78)     \
+       x(dev_usage_buckets_ec_wrong,                           79)     \
+       x(bkey_version_in_future,                               80)     \
+       x(bkey_u64s_too_small,                                  81)     \
+       x(bkey_invalid_type_for_btree,                          82)     \
+       x(bkey_extent_size_zero,                                83)     \
+       x(bkey_extent_size_greater_than_offset,                 84)     \
+       x(bkey_size_nonzero,                                    85)     \
+       x(bkey_snapshot_nonzero,                                86)     \
+       x(bkey_snapshot_zero,                                   87)     \
+       x(bkey_at_pos_max,                                      88)     \
+       x(bkey_before_start_of_btree_node,                      89)     \
+       x(bkey_after_end_of_btree_node,                         90)     \
+       x(bkey_val_size_nonzero,                                91)     \
+       x(bkey_val_size_too_small,                              92)     \
+       x(alloc_v1_val_size_bad,                                93)     \
+       x(alloc_v2_unpack_error,                                94)     \
+       x(alloc_v3_unpack_error,                                95)     \
+       x(alloc_v4_val_size_bad,                                96)     \
+       x(alloc_v4_backpointers_start_bad,                      97)     \
+       x(alloc_key_data_type_bad,                              98)     \
+       x(alloc_key_empty_but_have_data,                        99)     \
+       x(alloc_key_dirty_sectors_0,                            100)    \
+       x(alloc_key_data_type_inconsistency,                    101)    \
+       x(alloc_key_to_missing_dev_bucket,                      102)    \
+       x(alloc_key_cached_inconsistency,                       103)    \
+       x(alloc_key_cached_but_read_time_zero,                  104)    \
+       x(alloc_key_to_missing_lru_entry,                       105)    \
+       x(alloc_key_data_type_wrong,                            106)    \
+       x(alloc_key_gen_wrong,                                  107)    \
+       x(alloc_key_dirty_sectors_wrong,                        108)    \
+       x(alloc_key_cached_sectors_wrong,                       109)    \
+       x(alloc_key_stripe_wrong,                               110)    \
+       x(alloc_key_stripe_redundancy_wrong,                    111)    \
+       x(bucket_sector_count_overflow,                         112)    \
+       x(bucket_metadata_type_mismatch,                        113)    \
+       x(need_discard_key_wrong,                               114)    \
+       x(freespace_key_wrong,                                  115)    \
+       x(freespace_hole_missing,                               116)    \
+       x(bucket_gens_val_size_bad,                             117)    \
+       x(bucket_gens_key_wrong,                                118)    \
+       x(bucket_gens_hole_wrong,                               119)    \
+       x(bucket_gens_to_invalid_dev,                           120)    \
+       x(bucket_gens_to_invalid_buckets,                       121)    \
+       x(bucket_gens_nonzero_for_invalid_buckets,              122)    \
+       x(need_discard_freespace_key_to_invalid_dev_bucket,     123)    \
+       x(need_discard_freespace_key_bad,                       124)    \
+       x(backpointer_pos_wrong,                                125)    \
+       x(backpointer_to_missing_device,                        126)    \
+       x(backpointer_to_missing_alloc,                         127)    \
+       x(backpointer_to_missing_ptr,                           128)    \
+       x(lru_entry_at_time_0,                                  129)    \
+       x(lru_entry_to_invalid_bucket,                          130)    \
+       x(lru_entry_bad,                                        131)    \
+       x(btree_ptr_val_too_big,                                132)    \
+       x(btree_ptr_v2_val_too_big,                             133)    \
+       x(btree_ptr_has_non_ptr,                                134)    \
+       x(extent_ptrs_invalid_entry,                            135)    \
+       x(extent_ptrs_no_ptrs,                                  136)    \
+       x(extent_ptrs_too_many_ptrs,                            137)    \
+       x(extent_ptrs_redundant_crc,                            138)    \
+       x(extent_ptrs_redundant_stripe,                         139)    \
+       x(extent_ptrs_unwritten,                                140)    \
+       x(extent_ptrs_written_and_unwritten,                    141)    \
+       x(ptr_to_invalid_device,                                142)    \
+       x(ptr_to_duplicate_device,                              143)    \
+       x(ptr_after_last_bucket,                                144)    \
+       x(ptr_before_first_bucket,                              145)    \
+       x(ptr_spans_multiple_buckets,                           146)    \
+       x(ptr_to_missing_backpointer,                           147)    \
+       x(ptr_to_missing_alloc_key,                             148)    \
+       x(ptr_to_missing_replicas_entry,                        149)    \
+       x(ptr_to_missing_stripe,                                150)    \
+       x(ptr_to_incorrect_stripe,                              151)    \
+       x(ptr_gen_newer_than_bucket_gen,                        152)    \
+       x(ptr_too_stale,                                        153)    \
+       x(stale_dirty_ptr,                                      154)    \
+       x(ptr_bucket_data_type_mismatch,                        155)    \
+       x(ptr_cached_and_erasure_coded,                         156)    \
+       x(ptr_crc_uncompressed_size_too_small,                  157)    \
+       x(ptr_crc_csum_type_unknown,                            158)    \
+       x(ptr_crc_compression_type_unknown,                     159)    \
+       x(ptr_crc_redundant,                                    160)    \
+       x(ptr_crc_uncompressed_size_too_big,                    161)    \
+       x(ptr_crc_nonce_mismatch,                               162)    \
+       x(ptr_stripe_redundant,                                 163)    \
+       x(reservation_key_nr_replicas_invalid,                  164)    \
+       x(reflink_v_refcount_wrong,                             165)    \
+       x(reflink_p_to_missing_reflink_v,                       166)    \
+       x(stripe_pos_bad,                                       167)    \
+       x(stripe_val_size_bad,                                  168)    \
+       x(stripe_sector_count_wrong,                            169)    \
+       x(snapshot_tree_pos_bad,                                170)    \
+       x(snapshot_tree_to_missing_snapshot,                    171)    \
+       x(snapshot_tree_to_missing_subvol,                      172)    \
+       x(snapshot_tree_to_wrong_subvol,                        173)    \
+       x(snapshot_tree_to_snapshot_subvol,                     174)    \
+       x(snapshot_pos_bad,                                     175)    \
+       x(snapshot_parent_bad,                                  176)    \
+       x(snapshot_children_not_normalized,                     177)    \
+       x(snapshot_child_duplicate,                             178)    \
+       x(snapshot_child_bad,                                   179)    \
+       x(snapshot_skiplist_not_normalized,                     180)    \
+       x(snapshot_skiplist_bad,                                181)    \
+       x(snapshot_should_not_have_subvol,                      182)    \
+       x(snapshot_to_bad_snapshot_tree,                        183)    \
+       x(snapshot_bad_depth,                                   184)    \
+       x(snapshot_bad_skiplist,                                185)    \
+       x(subvol_pos_bad,                                       186)    \
+       x(subvol_not_master_and_not_snapshot,                   187)    \
+       x(subvol_to_missing_root,                               188)    \
+       x(subvol_root_wrong_bi_subvol,                          189)    \
+       x(bkey_in_missing_snapshot,                             190)    \
+       x(inode_pos_inode_nonzero,                              191)    \
+       x(inode_pos_blockdev_range,                             192)    \
+       x(inode_unpack_error,                                   193)    \
+       x(inode_str_hash_invalid,                               194)    \
+       x(inode_v3_fields_start_bad,                            195)    \
+       x(inode_snapshot_mismatch,                              196)    \
+       x(inode_unlinked_but_clean,                             197)    \
+       x(inode_unlinked_but_nlink_nonzero,                     198)    \
+       x(inode_checksum_type_invalid,                          199)    \
+       x(inode_compression_type_invalid,                       200)    \
+       x(inode_subvol_root_but_not_dir,                        201)    \
+       x(inode_i_size_dirty_but_clean,                         202)    \
+       x(inode_i_sectors_dirty_but_clean,                      203)    \
+       x(inode_i_sectors_wrong,                                204)    \
+       x(inode_dir_wrong_nlink,                                205)    \
+       x(inode_dir_multiple_links,                             206)    \
+       x(inode_multiple_links_but_nlink_0,                     207)    \
+       x(inode_wrong_backpointer,                              208)    \
+       x(inode_wrong_nlink,                                    209)    \
+       x(inode_unreachable,                                    210)    \
+       x(deleted_inode_but_clean,                              211)    \
+       x(deleted_inode_missing,                                212)    \
+       x(deleted_inode_is_dir,                                 213)    \
+       x(deleted_inode_not_unlinked,                           214)    \
+       x(extent_overlapping,                                   215)    \
+       x(extent_in_missing_inode,                              216)    \
+       x(extent_in_non_reg_inode,                              217)    \
+       x(extent_past_end_of_inode,                             218)    \
+       x(dirent_empty_name,                                    219)    \
+       x(dirent_val_too_big,                                   220)    \
+       x(dirent_name_too_long,                                 221)    \
+       x(dirent_name_embedded_nul,                             222)    \
+       x(dirent_name_dot_or_dotdot,                            223)    \
+       x(dirent_name_has_slash,                                224)    \
+       x(dirent_d_type_wrong,                                  225)    \
+       x(dirent_d_parent_subvol_wrong,                         226)    \
+       x(dirent_in_missing_dir_inode,                          227)    \
+       x(dirent_in_non_dir_inode,                              228)    \
+       x(dirent_to_missing_inode,                              229)    \
+       x(dirent_to_missing_subvol,                             230)    \
+       x(dirent_to_itself,                                     231)    \
+       x(quota_type_invalid,                                   232)    \
+       x(xattr_val_size_too_small,                             233)    \
+       x(xattr_val_size_too_big,                               234)    \
+       x(xattr_invalid_type,                                   235)    \
+       x(xattr_name_invalid_chars,                             236)    \
+       x(xattr_in_missing_inode,                               237)    \
+       x(root_subvol_missing,                                  238)    \
+       x(root_dir_missing,                                     239)    \
+       x(root_inode_not_dir,                                   240)    \
+       x(dir_loop,                                             241)    \
+       x(hash_table_key_duplicate,                             242)    \
+       x(hash_table_key_wrong_offset,                          243)    \
+       x(unlinked_inode_not_on_deleted_list,                   244)
+
+enum bch_sb_error_id {
+#define x(t, n) BCH_FSCK_ERR_##t = n,
+       BCH_SB_ERRS()
+#undef x
+       BCH_SB_ERR_MAX
+};
+
 struct bch_sb_error_entry_cpu {
        u64                     id:16,
                                nr:48;
index 9e7164c2363b9d7a933ecd2d1b72b3587981bf6e..7c67c28d3ef88ff32d1805257faf37ebc79f0d2d 100644 (file)
@@ -138,6 +138,24 @@ int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
        return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
 }
 
+int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol)
+{
+       struct bch_subvolume s;
+       int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s);
+       if (ret)
+               return ret;
+
+       if (BCH_SUBVOLUME_RO(&s))
+               return -EROFS;
+       return 0;
+}
+
+int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol)
+{
+       return bch2_trans_do(c, NULL, NULL, 0,
+               bch2_subvol_is_ro_trans(trans, subvol));
+}
+
 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
                             struct bch_subvolume *subvol)
 {
index a1003d30ab0a0c613b644c54fba09964d9ec4b29..a6f56f66e27cb7699402f089ef89a2f1355077c4 100644 (file)
@@ -23,6 +23,9 @@ int bch2_subvolume_get(struct btree_trans *, unsigned,
                       bool, int, struct bch_subvolume *);
 int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
 
+int bch2_subvol_is_ro_trans(struct btree_trans *, u32);
+int bch2_subvol_is_ro(struct bch_fs *, u32);
+
 int bch2_delete_dead_snapshots(struct bch_fs *);
 void bch2_delete_dead_snapshots_async(struct bch_fs *);
 
index c80a993ec9820f8361f36716bd601100af85ae04..7cbf496dcf99b046cc0e2add0548bdea389d6460 100644 (file)
@@ -13,6 +13,7 @@
 #include "replicas.h"
 #include "quota.h"
 #include "sb-clean.h"
+#include "sb-downgrade.h"
 #include "sb-errors.h"
 #include "sb-members.h"
 #include "super-io.h"
@@ -260,6 +261,17 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
        return f;
 }
 
+struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb,
+                                                 enum bch_sb_field_type type,
+                                                 unsigned u64s)
+{
+       struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
+
+       if (!f || le32_to_cpu(f->u64s) < u64s)
+               f = bch2_sb_field_resize_id(sb, type, u64s);
+       return f;
+}
+
 /* Superblock validate: */
 
 static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out)
@@ -479,6 +491,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
 
 /* device open: */
 
+static unsigned long le_ulong_to_cpu(unsigned long v)
+{
+       return sizeof(unsigned long) == 8
+               ? le64_to_cpu(v)
+               : le32_to_cpu(v);
+}
+
+static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr)
+{
+       BUG_ON(nr & (BITS_PER_TYPE(long) - 1));
+
+       for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++)
+               dst[i] = le_ulong_to_cpu(src[i]);
+}
+
 static void bch2_sb_update(struct bch_fs *c)
 {
        struct bch_sb *src = c->disk_sb.sb;
@@ -505,6 +532,13 @@ static void bch2_sb_update(struct bch_fs *c)
        c->sb.features          = le64_to_cpu(src->features[0]);
        c->sb.compat            = le64_to_cpu(src->compat[0]);
 
+       memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
+
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
+       if (ext)
+               le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
+                                   sizeof(c->sb.errors_silent) * 8);
+
        for_each_member_device(c, ca) {
                struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
                ca->mi = bch2_mi_to_cpu(&m);
@@ -925,6 +959,7 @@ int bch2_write_super(struct bch_fs *c)
        bch2_sb_members_from_cpu(c);
        bch2_sb_members_cpy_v2_v1(&c->disk_sb);
        bch2_sb_errors_from_cpu(c);
+       bch2_sb_downgrade_update(c);
 
        for_each_online_member(c, ca)
                bch2_sb_from_fs(c, ca);
@@ -1081,10 +1116,54 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
 {
        lockdep_assert_held(&c->sb_lock);
 
+       if (BCH_VERSION_MAJOR(new_version) >
+           BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+               bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
+
        c->disk_sb.sb->version = cpu_to_le16(new_version);
        c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
 }
 
+static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
+                               struct printbuf *err)
+{
+       if (vstruct_bytes(f) < 88) {
+               prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88);
+               return -BCH_ERR_invalid_sb_ext;
+       }
+
+       return 0;
+}
+
+static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
+                               struct bch_sb_field *f)
+{
+       struct bch_sb_field_ext *e = field_to_type(f, ext);
+
+       prt_printf(out, "Recovery passes required:");
+       prt_tab(out);
+       prt_bitflags(out, bch2_recovery_passes,
+                    bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0])));
+       prt_newline(out);
+
+       unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL);
+       if (errors_silent) {
+               le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8);
+
+               prt_printf(out, "Errors to silently fix:");
+               prt_tab(out);
+               prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8);
+               prt_newline(out);
+
+               kfree(errors_silent);
+       }
+}
+
+static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
+       .validate       = bch2_sb_ext_validate,
+       .to_text        = bch2_sb_ext_to_text,
+};
+
 static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
 #define x(f, nr)                                       \
        [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
index e6f40a05431933e146c68376c4c87eacce38402e..1a8c2088c5c56ea93e9065a40e8041d8ba69deba 100644 (file)
@@ -40,6 +40,16 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *,
 #define bch2_sb_field_resize(_sb, _name, _u64s)                                \
        field_to_type(bch2_sb_field_resize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
 
+struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *,
+                                       enum bch_sb_field_type, unsigned);
+#define bch2_sb_field_get_minsize(_sb, _name, _u64s)                           \
+       field_to_type(bch2_sb_field_get_minsize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
+
+#define bch2_sb_field_nr_entries(_f)                                   \
+       (_f ? ((bch2_sb_field_bytes(&_f->field) - sizeof(*_f)) /        \
+              sizeof(_f->entries[0]))                                  \
+           : 0)
+
 void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
 
 extern const char * const bch2_sb_fields[];
index 4525fb51e34f981a969d5e38ef97db08ac668f81..4290e0a53b7563a4e6d912ce17083a947b6a3f4b 100644 (file)
@@ -243,6 +243,7 @@ do {                                                                        \
 #define prt_units_s64(...)             bch2_prt_units_s64(__VA_ARGS__)
 #define prt_string_option(...)         bch2_prt_string_option(__VA_ARGS__)
 #define prt_bitflags(...)              bch2_prt_bitflags(__VA_ARGS__)
+#define prt_bitflags_vector(...)       bch2_prt_bitflags_vector(__VA_ARGS__)
 
 void bch2_pr_time_units(struct printbuf *, u64);
 void bch2_prt_datetime(struct printbuf *, time64_t);
index 79d982674c180307f5d5a4da42fabaa480878573..5a1858fb9879afd1c70c3d5a64883315090d6dbe 100644 (file)
@@ -176,7 +176,8 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
        struct btree_iter inode_iter = { NULL };
        int ret;
 
-       ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
+       ret   = bch2_subvol_is_ro_trans(trans, inum.subvol) ?:
+               bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
        if (ret)
                return ret;