]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/super-io.c
Update bcachefs sources to 50847e296b34 bcachefs: Check subvol <-> inode pointers...
[bcachefs-tools-debian] / libbcachefs / super-io.c
index 136c01403c95465c23488504c0f2e92798bd10e2..d60c7d27a0477cb0de116675671d5c888d8f1c86 100644 (file)
@@ -2,7 +2,6 @@
 
 #include "bcachefs.h"
 #include "checksum.h"
-#include "counters.h"
 #include "disk_groups.h"
 #include "ec.h"
 #include "error.h"
@@ -13,6 +12,8 @@
 #include "replicas.h"
 #include "quota.h"
 #include "sb-clean.h"
+#include "sb-counters.h"
+#include "sb-downgrade.h"
 #include "sb-errors.h"
 #include "sb-members.h"
 #include "super-io.h"
@@ -29,14 +30,12 @@ static const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
 struct bch2_metadata_version {
        u16             version;
        const char      *name;
-       u64             recovery_passes;
 };
 
 static const struct bch2_metadata_version bch2_metadata_versions[] = {
-#define x(n, v, _recovery_passes) {            \
+#define x(n, v) {              \
        .version = v,                           \
        .name = #n,                             \
-       .recovery_passes = _recovery_passes,    \
 },
        BCH_METADATA_VERSIONS()
 #undef x
@@ -69,24 +68,6 @@ unsigned bch2_latest_compatible_version(unsigned v)
        return v;
 }
 
-u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
-                                unsigned old_version,
-                                unsigned new_version)
-{
-       u64 ret = 0;
-
-       for (const struct bch2_metadata_version *i = bch2_metadata_versions;
-            i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions);
-            i++)
-               if (i->version > old_version && i->version <= new_version) {
-                       if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK)
-                               ret |= bch2_fsck_recovery_passes();
-                       ret |= i->recovery_passes;
-               }
-
-       return ret &= ~RECOVERY_PASS_ALL_FSCK;
-}
-
 const char * const bch2_sb_fields[] = {
 #define x(name, nr)    #name,
        BCH_SB_FIELDS()
@@ -100,8 +81,6 @@ static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *,
 struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb,
                                      enum bch_sb_field_type type)
 {
-       struct bch_sb_field *f;
-
        /* XXX: need locking around superblock to access optional fields */
 
        vstruct_for_each(sb, f)
@@ -163,8 +142,8 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
 void bch2_free_super(struct bch_sb_handle *sb)
 {
        kfree(sb->bio);
-       if (!IS_ERR_OR_NULL(sb->bdev))
-               blkdev_put(sb->bdev, sb->holder);
+       if (!IS_ERR_OR_NULL(sb->bdev_handle))
+               bdev_release(sb->bdev_handle);
        kfree(sb->holder);
        kfree(sb->sb_name);
 
@@ -191,8 +170,12 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
                u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
 
                if (new_bytes > max_bytes) {
-                       pr_err("%pg: superblock too big: want %zu but have %llu",
-                              sb->bdev, new_bytes, max_bytes);
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_bdevname(&buf, sb->bdev);
+                       prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes);
+                       pr_err("%s", buf.buf);
+                       printbuf_exit(&buf);
                        return -BCH_ERR_ENOSPC_sb;
                }
        }
@@ -240,14 +223,12 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
 
        if (sb->fs_sb) {
                struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb);
-               struct bch_dev *ca;
-               unsigned i;
 
                lockdep_assert_held(&c->sb_lock);
 
                /* XXX: we're not checking that offline device have enough space */
 
-               for_each_online_member(ca, c, i) {
+               for_each_online_member(c, ca) {
                        struct bch_sb_handle *dev_sb = &ca->disk_sb;
 
                        if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) {
@@ -264,6 +245,17 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
        return f;
 }
 
+struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb,
+                                                 enum bch_sb_field_type type,
+                                                 unsigned u64s)
+{
+       struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
+
+       if (!f || le32_to_cpu(f->u64s) < u64s)
+               f = bch2_sb_field_resize_id(sb, type, u64s);
+       return f;
+}
+
 /* Superblock validate: */
 
 static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out)
@@ -356,7 +348,6 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
                            int rw)
 {
        struct bch_sb *sb = disk_sb->sb;
-       struct bch_sb_field *f;
        struct bch_sb_field_members_v1 *mi;
        enum bch_opt_id opt_id;
        u16 block_size;
@@ -484,11 +475,24 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
 
 /* device open: */
 
+static unsigned long le_ulong_to_cpu(unsigned long v)
+{
+       return sizeof(unsigned long) == 8
+               ? le64_to_cpu(v)
+               : le32_to_cpu(v);
+}
+
+static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr)
+{
+       BUG_ON(nr & (BITS_PER_TYPE(long) - 1));
+
+       for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++)
+               dst[i] = le_ulong_to_cpu(src[i]);
+}
+
 static void bch2_sb_update(struct bch_fs *c)
 {
        struct bch_sb *src = c->disk_sb.sb;
-       struct bch_dev *ca;
-       unsigned i;
 
        lockdep_assert_held(&c->sb_lock);
 
@@ -512,8 +516,15 @@ static void bch2_sb_update(struct bch_fs *c)
        c->sb.features          = le64_to_cpu(src->features[0]);
        c->sb.compat            = le64_to_cpu(src->compat[0]);
 
-       for_each_member_device(ca, c, i) {
-               struct bch_member m = bch2_sb_member_get(src, i);
+       memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
+
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
+       if (ext)
+               le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
+                                   sizeof(c->sb.errors_silent) * 8);
+
+       for_each_member_device(c, ca) {
+               struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
                ca->mi = bch2_mi_to_cpu(&m);
        }
 }
@@ -537,6 +548,7 @@ static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
        dst->time_base_lo       = src->time_base_lo;
        dst->time_base_hi       = src->time_base_hi;
        dst->time_precision     = src->time_precision;
+       dst->write_time         = src->write_time;
 
        memcpy(dst->flags,      src->flags,     sizeof(dst->flags));
        memcpy(dst->features,   src->features,  sizeof(dst->features));
@@ -600,7 +612,6 @@ int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca)
 
 static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err)
 {
-       struct bch_csum csum;
        size_t bytes;
        int ret;
 reread:
@@ -616,7 +627,9 @@ reread:
 
        if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) &&
            !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) {
-               prt_printf(err, "Not a bcachefs superblock");
+               prt_str(err, "Not a bcachefs superblock (got magic ");
+               pr_uuid(err, sb->sb->magic.b);
+               prt_str(err, ")");
                return -BCH_ERR_invalid_sb_magic;
        }
 
@@ -639,17 +652,16 @@ reread:
                goto reread;
        }
 
-       if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR) {
+       enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb);
+       if (csum_type >= BCH_CSUM_NR) {
                prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
                return -BCH_ERR_invalid_sb_csum_type;
        }
 
        /* XXX: verify MACs */
-       csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
-                           null_nonce(), sb->sb);
-
+       struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb);
        if (bch2_crc_cmp(csum, sb->sb->csum)) {
-               prt_printf(err, "bad checksum");
+               bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum);
                return -BCH_ERR_invalid_sb_csum;
        }
 
@@ -692,21 +704,22 @@ retry:
        if (!opt_get(*opts, nochanges))
                sb->mode |= BLK_OPEN_WRITE;
 
-       sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
-       if (IS_ERR(sb->bdev) &&
-           PTR_ERR(sb->bdev) == -EACCES &&
+       sb->bdev_handle = bdev_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
+       if (IS_ERR(sb->bdev_handle) &&
+           PTR_ERR(sb->bdev_handle) == -EACCES &&
            opt_get(*opts, read_only)) {
                sb->mode &= ~BLK_OPEN_WRITE;
 
-               sb->bdev = blkdev_get_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
-               if (!IS_ERR(sb->bdev))
+               sb->bdev_handle = bdev_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops);
+               if (!IS_ERR(sb->bdev_handle))
                        opt_set(*opts, nochanges, true);
        }
 
-       if (IS_ERR(sb->bdev)) {
-               ret = PTR_ERR(sb->bdev);
+       if (IS_ERR(sb->bdev_handle)) {
+               ret = PTR_ERR(sb->bdev_handle);
                goto out;
        }
+       sb->bdev = sb->bdev_handle->bdev;
 
        ret = bch2_sb_realloc(sb, 0);
        if (ret) {
@@ -892,9 +905,8 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
 int bch2_write_super(struct bch_fs *c)
 {
        struct closure *cl = &c->sb_write;
-       struct bch_dev *ca;
        struct printbuf err = PRINTBUF;
-       unsigned i, sb = 0, nr_wrote;
+       unsigned sb = 0, nr_wrote;
        struct bch_devs_mask sb_written;
        bool wrote, can_mount_without_written, can_mount_with_written;
        unsigned degraded_flags = BCH_FORCE_IF_DEGRADED;
@@ -916,6 +928,11 @@ int bch2_write_super(struct bch_fs *c)
 
        le64_add_cpu(&c->disk_sb.sb->seq, 1);
 
+       struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+       for_each_online_member(c, ca)
+               __bch2_members_v2_get_mut(mi, ca->dev_idx)->seq = c->disk_sb.sb->seq;
+       c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds());
+
        if (test_bit(BCH_FS_error, &c->flags))
                SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1);
        if (test_bit(BCH_FS_topology_error, &c->flags))
@@ -927,11 +944,12 @@ int bch2_write_super(struct bch_fs *c)
        bch2_sb_members_from_cpu(c);
        bch2_sb_members_cpy_v2_v1(&c->disk_sb);
        bch2_sb_errors_from_cpu(c);
+       bch2_sb_downgrade_update(c);
 
-       for_each_online_member(ca, c, i)
+       for_each_online_member(c, ca)
                bch2_sb_from_fs(c, ca);
 
-       for_each_online_member(ca, c, i) {
+       for_each_online_member(c, ca) {
                printbuf_reset(&err);
 
                ret = bch2_sb_validate(&ca->disk_sb, &err, WRITE);
@@ -952,16 +970,28 @@ int bch2_write_super(struct bch_fs *c)
        if (!BCH_SB_INITIALIZED(c->disk_sb.sb))
                goto out;
 
-       for_each_online_member(ca, c, i) {
+       if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) {
+               struct printbuf buf = PRINTBUF;
+               prt_printf(&buf, "attempting to write superblock that wasn't version downgraded (");
+               bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version));
+               prt_str(&buf, " > ");
+               bch2_version_to_text(&buf, bcachefs_metadata_version_current);
+               prt_str(&buf, ")");
+               bch2_fs_fatal_error(c, "%s", buf.buf);
+               printbuf_exit(&buf);
+               return -BCH_ERR_sb_not_downgraded;
+       }
+
+       for_each_online_member(c, ca) {
                __set_bit(ca->dev_idx, sb_written.d);
                ca->sb_write_error = 0;
        }
 
-       for_each_online_member(ca, c, i)
+       for_each_online_member(c, ca)
                read_back_super(c, ca);
        closure_sync(cl);
 
-       for_each_online_member(ca, c, i) {
+       for_each_online_member(c, ca) {
                if (ca->sb_write_error)
                        continue;
 
@@ -988,7 +1018,7 @@ int bch2_write_super(struct bch_fs *c)
 
        do {
                wrote = false;
-               for_each_online_member(ca, c, i)
+               for_each_online_member(c, ca)
                        if (!ca->sb_write_error &&
                            sb < ca->disk_sb.sb->layout.nr_superblocks) {
                                write_one_super(c, ca, sb);
@@ -998,7 +1028,7 @@ int bch2_write_super(struct bch_fs *c)
                sb++;
        } while (wrote);
 
-       for_each_online_member(ca, c, i) {
+       for_each_online_member(c, ca) {
                if (ca->sb_write_error)
                        __clear_bit(ca->dev_idx, sb_written.d);
                else
@@ -1010,7 +1040,7 @@ int bch2_write_super(struct bch_fs *c)
        can_mount_with_written =
                bch2_have_enough_devs(c, sb_written, degraded_flags, false);
 
-       for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
+       for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++)
                sb_written.d[i] = ~sb_written.d[i];
 
        can_mount_without_written =
@@ -1050,31 +1080,87 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
 }
 
 /* Downgrade if superblock is at a higher version than currently supported: */
-void bch2_sb_maybe_downgrade(struct bch_fs *c)
+bool bch2_check_version_downgrade(struct bch_fs *c)
 {
+       bool ret = bcachefs_metadata_version_current < c->sb.version;
+
        lockdep_assert_held(&c->sb_lock);
 
        /*
         * Downgrade, if superblock is at a higher version than currently
         * supported:
+        *
+        * c->sb will be checked before we write the superblock, so update it as
+        * well:
         */
-       if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
+       if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) {
                SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
-       if (c->sb.version > bcachefs_metadata_version_current)
+               c->sb.version_upgrade_complete = bcachefs_metadata_version_current;
+       }
+       if (c->sb.version > bcachefs_metadata_version_current) {
                c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
-       if (c->sb.version_min > bcachefs_metadata_version_current)
+               c->sb.version = bcachefs_metadata_version_current;
+       }
+       if (c->sb.version_min > bcachefs_metadata_version_current) {
                c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
+               c->sb.version_min = bcachefs_metadata_version_current;
+       }
        c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
+       return ret;
 }
 
 void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
 {
        lockdep_assert_held(&c->sb_lock);
 
+       if (BCH_VERSION_MAJOR(new_version) >
+           BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
+               bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
+
        c->disk_sb.sb->version = cpu_to_le16(new_version);
        c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
 }
 
+static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
+                               struct printbuf *err)
+{
+       if (vstruct_bytes(f) < 88) {
+               prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88);
+               return -BCH_ERR_invalid_sb_ext;
+       }
+
+       return 0;
+}
+
+static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
+                               struct bch_sb_field *f)
+{
+       struct bch_sb_field_ext *e = field_to_type(f, ext);
+
+       prt_printf(out, "Recovery passes required:");
+       prt_tab(out);
+       prt_bitflags(out, bch2_recovery_passes,
+                    bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0])));
+       prt_newline(out);
+
+       unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL);
+       if (errors_silent) {
+               le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8);
+
+               prt_printf(out, "Errors to silently fix:");
+               prt_tab(out);
+               prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8);
+               prt_newline(out);
+
+               kfree(errors_silent);
+       }
+}
+
+static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
+       .validate       = bch2_sb_ext_validate,
+       .to_text        = bch2_sb_ext_to_text,
+};
+
 static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
 #define x(f, nr)                                       \
        [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,
@@ -1111,8 +1197,8 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
        return ret;
 }
 
-void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
-                          struct bch_sb_field *f)
+void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+                            struct bch_sb_field *f)
 {
        unsigned type = le32_to_cpu(f->type);
        const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
@@ -1120,6 +1206,15 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
        if (!out->nr_tabstops)
                printbuf_tabstop_push(out, 32);
 
+       if (ops->to_text)
+               ops->to_text(out, sb, f);
+}
+
+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+                          struct bch_sb_field *f)
+{
+       unsigned type = le32_to_cpu(f->type);
+
        if (type < BCH_SB_FIELD_NR)
                prt_printf(out, "%s", bch2_sb_fields[type]);
        else
@@ -1128,11 +1223,7 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
        prt_printf(out, " (size %zu):", vstruct_bytes(f));
        prt_newline(out);
 
-       if (ops->to_text) {
-               printbuf_indent_add(out, 2);
-               ops->to_text(out, sb, f);
-               printbuf_indent_sub(out, 2);
-       }
+       __bch2_sb_field_to_text(out, sb, f);
 }
 
 void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l)
@@ -1161,7 +1252,6 @@ void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l)
 void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
                     bool print_layout, unsigned fields)
 {
-       struct bch_sb_field *f;
        u64 fields_have = 0;
        unsigned nr_devices = 0;
 
@@ -1181,6 +1271,11 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
        pr_uuid(out, sb->uuid.b);
        prt_newline(out);
 
+       prt_printf(out, "Magic number:");
+       prt_tab(out);
+       pr_uuid(out, sb->magic.b);
+       prt_newline(out);
+
        prt_str(out, "Device index:");
        prt_tab(out);
        prt_printf(out, "%u", sb->dev_idx);
@@ -1219,9 +1314,16 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
        prt_printf(out, "%llu", le64_to_cpu(sb->seq));
        prt_newline(out);
 
+       prt_printf(out, "Time of last write:");
+       prt_tab(out);
+       bch2_prt_datetime(out, le64_to_cpu(sb->write_time));
+       prt_newline(out);
+
        prt_printf(out, "Superblock size:");
        prt_tab(out);
-       prt_printf(out, "%zu", vstruct_bytes(sb));
+       prt_units_u64(out, vstruct_bytes(sb));
+       prt_str(out, "/");
+       prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits);
        prt_newline(out);
 
        prt_printf(out, "Clean:");