]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/bcachefs_format.h
Update bcachefs sources to b9bd69421f73 bcachefs: x-macro-ify inode flags enum
[bcachefs-tools-debian] / libbcachefs / bcachefs_format.h
index 91a6624e29abfb51b3c1ac7c7ab157d2defe7a40..7a1c244071f9254dfdf0c3eca51b79100955bd76 100644 (file)
 #include <linux/uuid.h>
 #include "vstructs.h"
 
+#ifdef __KERNEL__
+typedef uuid_t __uuid_t;
+#endif
+
 #define BITMASK(name, type, field, offset, end)                                \
-static const unsigned  name##_OFFSET = offset;                         \
-static const unsigned  name##_BITS = (end - offset);                   \
+static const __maybe_unused unsigned   name##_OFFSET = offset;         \
+static const __maybe_unused unsigned   name##_BITS = (end - offset);   \
                                                                        \
 static inline __u64 name(const type *k)                                        \
 {                                                                      \
@@ -94,9 +98,9 @@ static inline void SET_##name(type *k, __u64 v)                               \
 }
 
 #define LE_BITMASK(_bits, name, type, field, offset, end)              \
-static const unsigned  name##_OFFSET = offset;                         \
-static const unsigned  name##_BITS = (end - offset);                   \
-static const __u##_bits        name##_MAX = (1ULL << (end - offset)) - 1;      \
+static const __maybe_unused unsigned   name##_OFFSET = offset;         \
+static const __maybe_unused unsigned   name##_BITS = (end - offset);   \
+static const __maybe_unused __u##_bits name##_MAX = (1ULL << (end - offset)) - 1;\
                                                                        \
 static inline __u64 name(const type *k)                                        \
 {                                                                      \
@@ -250,6 +254,11 @@ struct bkey_packed {
        __u8            pad[sizeof(struct bkey) - 3];
 } __packed __aligned(8);
 
+typedef struct {
+       __le64                  lo;
+       __le64                  hi;
+} bch_le128;
+
 #define BKEY_U64s                      (sizeof(struct bkey) / sizeof(__u64))
 #define BKEY_U64s_MAX                  U8_MAX
 #define BKEY_VAL_U64s_MAX              (BKEY_U64s_MAX - BKEY_U64s)
@@ -290,16 +299,8 @@ enum bch_bkey_fields {
 struct bkey_i {
        __u64                   _data[0];
 
-       union {
-       struct {
-               /* Size of combined key and value, in u64s */
-               __u8            u64s;
-       };
-       struct {
-               struct bkey     k;
-               struct bch_val  v;
-       };
-       };
+       struct bkey     k;
+       struct bch_val  v;
 };
 
 #define KEY(_inode, _offset, _size)                                    \
@@ -318,7 +319,7 @@ static inline void bkey_init(struct bkey *k)
 #define bkey_bytes(_k)         ((_k)->u64s * sizeof(__u64))
 
 #define __BKEY_PADDED(key, pad)                                        \
-       struct { struct bkey_i key; __u64 key ## _pad[pad]; }
+       struct bkey_i key; __u64 key ## _pad[pad]
 
 /*
  * - DELETED keys are used internally to mark keys that should be ignored but
@@ -368,7 +369,10 @@ static inline void bkey_init(struct bkey *k)
        x(alloc_v4,             27)                     \
        x(backpointer,          28)                     \
        x(inode_v3,             29)                     \
-       x(bucket_gens,          30)
+       x(bucket_gens,          30)                     \
+       x(snapshot_tree,        31)                     \
+       x(logged_op_truncate,   32)                     \
+       x(logged_op_finsert,    33)
 
 enum bch_bkey_type {
 #define x(name, nr) KEY_TYPE_##name    = nr,
@@ -486,8 +490,9 @@ struct bch_csum {
        x(crc32,                1)              \
        x(crc64,                2)              \
        x(crc128,               3)              \
-       x(stripe_ptr,           4)
-#define BCH_EXTENT_ENTRY_MAX   5
+       x(stripe_ptr,           4)              \
+       x(rebalance,            5)
+#define BCH_EXTENT_ENTRY_MAX   6
 
 enum bch_extent_entry_type {
 #define x(f, n) BCH_EXTENT_ENTRY_##f = n,
@@ -608,16 +613,16 @@ struct bch_extent_stripe_ptr {
 #endif
 };
 
-struct bch_extent_reservation {
+struct bch_extent_rebalance {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
        __u64                   type:6,
-                               unused:22,
-                               replicas:4,
-                               generation:32;
+                               unused:34,
+                               compression:8, /* enum bch_compression_opt */
+                               target:16;
 #elif defined (__BIG_ENDIAN_BITFIELD)
-       __u64                   generation:32,
-                               replicas:4,
-                               unused:22,
+       __u64                   target:16,
+                               compression:8,
+                               unused:34,
                                type:6;
 #endif
 };
@@ -678,7 +683,7 @@ struct bch_reservation {
 /* Maximum size (in u64s) a single pointer could be: */
 #define BKEY_EXTENT_PTR_U64s_MAX\
        ((sizeof(struct bch_extent_crc128) +                    \
-         sizeof(struct bch_extent_ptr)) / sizeof(u64))
+         sizeof(struct bch_extent_ptr)) / sizeof(__u64))
 
 /* Maximum possible size of an entire extent value: */
 #define BKEY_EXTENT_VAL_U64s_MAX                               \
@@ -690,7 +695,7 @@ struct bch_reservation {
 /* Btree pointers don't carry around checksums: */
 #define BKEY_BTREE_PTR_VAL_U64s_MAX                            \
        ((sizeof(struct bch_btree_ptr_v2) +                     \
-         sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
+         sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
 #define BKEY_BTREE_PTR_U64s_MAX                                        \
        (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
 
@@ -706,7 +711,7 @@ struct bch_inode {
        __le64                  bi_hash_seed;
        __le32                  bi_flags;
        __le16                  bi_mode;
-       __u8                    fields[0];
+       __u8                    fields[];
 } __packed __aligned(8);
 
 struct bch_inode_v2 {
@@ -716,7 +721,7 @@ struct bch_inode_v2 {
        __le64                  bi_hash_seed;
        __le64                  bi_flags;
        __le16                  bi_mode;
-       __u8                    fields[0];
+       __u8                    fields[];
 } __packed __aligned(8);
 
 struct bch_inode_v3 {
@@ -728,11 +733,11 @@ struct bch_inode_v3 {
        __le64                  bi_sectors;
        __le64                  bi_size;
        __le64                  bi_version;
-       __u8                    fields[0];
+       __u8                    fields[];
 } __packed __aligned(8);
 
 #define INODEv3_FIELDS_START_INITIAL   6
-#define INODEv3_FIELDS_START_CUR       (offsetof(struct bch_inode_v3, fields) / sizeof(u64))
+#define INODEv3_FIELDS_START_CUR       (offsetof(struct bch_inode_v3, fields) / sizeof(__u64))
 
 struct bch_inode_generation {
        struct bch_val          v;
@@ -819,34 +824,30 @@ enum inode_opt_id {
        Inode_opt_nr,
 };
 
-enum {
-       /*
-        * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
-        * flags)
-        */
-       __BCH_INODE_SYNC                = 0,
-       __BCH_INODE_IMMUTABLE           = 1,
-       __BCH_INODE_APPEND              = 2,
-       __BCH_INODE_NODUMP              = 3,
-       __BCH_INODE_NOATIME             = 4,
-
-       __BCH_INODE_I_SIZE_DIRTY        = 5,
-       __BCH_INODE_I_SECTORS_DIRTY     = 6,
-       __BCH_INODE_UNLINKED            = 7,
-       __BCH_INODE_BACKPTR_UNTRUSTED   = 8,
-
-       /* bits 20+ reserved for packed fields below: */
-};
-
-#define BCH_INODE_SYNC         (1 << __BCH_INODE_SYNC)
-#define BCH_INODE_IMMUTABLE    (1 << __BCH_INODE_IMMUTABLE)
-#define BCH_INODE_APPEND       (1 << __BCH_INODE_APPEND)
-#define BCH_INODE_NODUMP       (1 << __BCH_INODE_NODUMP)
-#define BCH_INODE_NOATIME      (1 << __BCH_INODE_NOATIME)
-#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
-#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-#define BCH_INODE_UNLINKED     (1 << __BCH_INODE_UNLINKED)
-#define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED)
+#define BCH_INODE_FLAGS()                      \
+       x(sync,                         0)      \
+       x(immutable,                    1)      \
+       x(append,                       2)      \
+       x(nodump,                       3)      \
+       x(noatime,                      4)      \
+       x(i_size_dirty,                 5)      \
+       x(i_sectors_dirty,              6)      \
+       x(unlinked,                     7)      \
+       x(backptr_untrusted,            8)
+
+/* bits 20+ reserved for packed fields below: */
+
+enum bch_inode_flags {
+#define x(t, n)        BCH_INODE_##t = 1U << n,
+       BCH_INODE_FLAGS()
+#undef x
+};
+
+enum __bch_inode_flags {
+#define x(t, n)        __BCH_INODE_##t = n,
+       BCH_INODE_FLAGS()
+#undef x
+};
 
 LE32_BITMASK(INODE_STR_HASH,   struct bch_inode, bi_flags, 20, 24);
 LE32_BITMASK(INODE_NR_FIELDS,  struct bch_inode, bi_flags, 24, 31);
@@ -899,9 +900,7 @@ struct bch_dirent {
 #define DT_SUBVOL      16
 #define BCH_DT_MAX     17
 
-#define BCH_NAME_MAX   ((unsigned) (U8_MAX * sizeof(u64) -             \
-                        sizeof(struct bkey) -                          \
-                        offsetof(struct bch_dirent, d_name)))
+#define BCH_NAME_MAX   512
 
 /* Xattrs */
 
@@ -988,10 +987,11 @@ struct bch_alloc_v4 {
        __u64                   io_time[2];
        __u32                   stripe;
        __u32                   nr_external_backpointers;
+       __u64                   fragmentation_lru;
 } __packed __aligned(8);
 
 #define BCH_ALLOC_V4_U64s_V0   6
-#define BCH_ALLOC_V4_U64s      (sizeof(struct bch_alloc_v4) / sizeof(u64))
+#define BCH_ALLOC_V4_U64s      (sizeof(struct bch_alloc_v4) / sizeof(__u64))
 
 BITMASK(BCH_ALLOC_V4_NEED_DISCARD,     struct bch_alloc_v4, flags,  0,  1)
 BITMASK(BCH_ALLOC_V4_NEED_INC_GEN,     struct bch_alloc_v4, flags,  1,  2)
@@ -1081,20 +1081,20 @@ struct bch_reflink_v {
        struct bch_val          v;
        __le64                  refcount;
        union bch_extent_entry  start[0];
-       __u64                   _data[0];
+       __u64                   _data[];
 } __packed __aligned(8);
 
 struct bch_indirect_inline_data {
        struct bch_val          v;
        __le64                  refcount;
-       u8                      data[0];
+       u8                      data[];
 };
 
 /* Inline data */
 
 struct bch_inline_data {
        struct bch_val          v;
-       u8                      data[0];
+       u8                      data[];
 };
 
 /* Subvolumes: */
@@ -1108,6 +1108,14 @@ struct bch_subvolume {
        __le32                  flags;
        __le32                  snapshot;
        __le64                  inode;
+       /*
+        * Snapshot subvolumes form a tree, separate from the snapshot nodes
+        * tree - if this subvolume is a snapshot, this is the ID of the
+        * subvolume it was created from:
+        */
+       __le32                  parent;
+       __le32                  pad;
+       bch_le128               otime;
 };
 
 LE32_BITMASK(BCH_SUBVOLUME_RO,         struct bch_subvolume, flags,  0,  1)
@@ -1126,7 +1134,10 @@ struct bch_snapshot {
        __le32                  parent;
        __le32                  children[2];
        __le32                  subvol;
-       __le32                  pad;
+       /* corresponds to a bch_snapshot_tree in BTREE_ID_snapshot_trees */
+       __le32                  tree;
+       __le32                  depth;
+       __le32                  skip[3];
 };
 
 LE32_BITMASK(BCH_SNAPSHOT_DELETED,     struct bch_snapshot, flags,  0,  1)
@@ -1134,6 +1145,19 @@ LE32_BITMASK(BCH_SNAPSHOT_DELETED,       struct bch_snapshot, flags,  0,  1)
 /* True if a subvolume points to this snapshot node: */
 LE32_BITMASK(BCH_SNAPSHOT_SUBVOL,      struct bch_snapshot, flags,  1,  2)
 
+/*
+ * Snapshot trees:
+ *
+ * The snapshot_trees btree gives us persistent indentifier for each tree of
+ * bch_snapshot nodes, and allow us to record and easily find the root/master
+ * subvolume that other snapshots were created from:
+ */
+struct bch_snapshot_tree {
+       struct bch_val          v;
+       __le32                  master_subvol;
+       __le32                  root_snapshot;
+};
+
 /* LRU btree: */
 
 struct bch_lru {
@@ -1143,6 +1167,33 @@ struct bch_lru {
 
 #define LRU_ID_STRIPES         (1U << 16)
 
+/* Logged operations btree: */
+
+struct bch_logged_op_truncate {
+       struct bch_val          v;
+       __le32                  subvol;
+       __le32                  pad;
+       __le64                  inum;
+       __le64                  new_i_size;
+};
+
+enum logged_op_finsert_state {
+       LOGGED_OP_FINSERT_start,
+       LOGGED_OP_FINSERT_shift_extents,
+       LOGGED_OP_FINSERT_finish,
+};
+
+struct bch_logged_op_finsert {
+       struct bch_val          v;
+       __u8                    state;
+       __u8                    pad[3];
+       __le32                  subvol;
+       __le64                  inum;
+       __le64                  dst_offset;
+       __le64                  src_offset;
+       __le64                  pos;
+};
+
 /* Optional/variable size superblock sections: */
 
 struct bch_sb_field {
@@ -1153,7 +1204,7 @@ struct bch_sb_field {
 
 #define BCH_SB_FIELDS()                                \
        x(journal,      0)                      \
-       x(members,      1)                      \
+       x(members_v1,   1)                      \
        x(crypt,        2)                      \
        x(replicas_v0,  3)                      \
        x(quota,        4)                      \
@@ -1162,7 +1213,9 @@ struct bch_sb_field {
        x(replicas,     7)                      \
        x(journal_seq_blacklist, 8)             \
        x(journal_v2,   9)                      \
-       x(counters,     10)
+       x(counters,     10)                     \
+       x(members_v2,   11)                     \
+       x(errors,       12)
 
 enum bch_sb_field_type {
 #define x(f, nr)       BCH_SB_FIELD_##f = nr,
@@ -1183,7 +1236,7 @@ enum bch_sb_field_type {
 
 struct bch_sb_field_journal {
        struct bch_sb_field     field;
-       __le64                  buckets[0];
+       __le64                  buckets[];
 };
 
 struct bch_sb_field_journal_v2 {
@@ -1192,32 +1245,63 @@ struct bch_sb_field_journal_v2 {
        struct bch_sb_field_journal_v2_entry {
                __le64          start;
                __le64          nr;
-       }                       d[0];
+       }                       d[];
 };
 
-/* BCH_SB_FIELD_members: */
+/* BCH_SB_FIELD_members_v1: */
 
 #define BCH_MIN_NR_NBUCKETS    (1 << 6)
 
+#define BCH_IOPS_MEASUREMENTS()                        \
+       x(seqread,      0)                      \
+       x(seqwrite,     1)                      \
+       x(randread,     2)                      \
+       x(randwrite,    3)
+
+enum bch_iops_measurement {
+#define x(t, n) BCH_IOPS_##t = n,
+       BCH_IOPS_MEASUREMENTS()
+#undef x
+       BCH_IOPS_NR
+};
+
+#define BCH_MEMBER_ERROR_TYPES()               \
+       x(read,         0)                      \
+       x(write,        1)                      \
+       x(checksum,     2)
+
+enum bch_member_error_type {
+#define x(t, n) BCH_MEMBER_ERROR_##t = n,
+       BCH_MEMBER_ERROR_TYPES()
+#undef x
+       BCH_MEMBER_ERROR_NR
+};
+
 struct bch_member {
-       uuid_le                 uuid;
+       __uuid_t                uuid;
        __le64                  nbuckets;       /* device size */
        __le16                  first_bucket;   /* index of first bucket used */
        __le16                  bucket_size;    /* sectors */
        __le32                  pad;
        __le64                  last_mount;     /* time_t */
 
-       __le64                  flags[2];
+       __le64                  flags;
+       __le32                  iops[4];
+       __le64                  errors[BCH_MEMBER_ERROR_NR];
+       __le64                  errors_at_reset[BCH_MEMBER_ERROR_NR];
+       __le64                  errors_reset_time;
 };
 
-LE64_BITMASK(BCH_MEMBER_STATE,         struct bch_member, flags[0],  0,  4)
+#define BCH_MEMBER_V1_BYTES    56
+
+LE64_BITMASK(BCH_MEMBER_STATE,         struct bch_member, flags,  0,  4)
 /* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
-LE64_BITMASK(BCH_MEMBER_DISCARD,       struct bch_member, flags[0], 14, 15)
-LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,  struct bch_member, flags[0], 15, 20)
-LE64_BITMASK(BCH_MEMBER_GROUP,         struct bch_member, flags[0], 20, 28)
-LE64_BITMASK(BCH_MEMBER_DURABILITY,    struct bch_member, flags[0], 28, 30)
+LE64_BITMASK(BCH_MEMBER_DISCARD,       struct bch_member, flags, 14, 15)
+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,  struct bch_member, flags, 15, 20)
+LE64_BITMASK(BCH_MEMBER_GROUP,         struct bch_member, flags, 20, 28)
+LE64_BITMASK(BCH_MEMBER_DURABILITY,    struct bch_member, flags, 28, 30)
 LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
-                                       struct bch_member, flags[0], 30, 31)
+                                       struct bch_member, flags, 30, 31)
 
 #if 0
 LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS,        struct bch_member, flags[1], 0,  20);
@@ -1237,9 +1321,16 @@ enum bch_member_state {
        BCH_MEMBER_STATE_NR
 };
 
-struct bch_sb_field_members {
+struct bch_sb_field_members_v1 {
        struct bch_sb_field     field;
-       struct bch_member       members[0];
+       struct bch_member       _members[]; //Members are now variable size
+};
+
+struct bch_sb_field_members_v2 {
+       struct bch_sb_field     field;
+       __le16                  member_bytes; //size of single member entry
+       u8                      pad[6];
+       struct bch_member       _members[];
 };
 
 /* BCH_SB_FIELD_crypt: */
@@ -1253,10 +1344,10 @@ struct bch_key {
 };
 
 #define BCH_KEY_MAGIC                                  \
-       (((u64) 'b' <<  0)|((u64) 'c' <<  8)|           \
-        ((u64) 'h' << 16)|((u64) '*' << 24)|           \
-        ((u64) '*' << 32)|((u64) 'k' << 40)|           \
-        ((u64) 'e' << 48)|((u64) 'y' << 56))
+       (((__u64) 'b' <<  0)|((__u64) 'c' <<  8)|               \
+        ((__u64) 'h' << 16)|((__u64) '*' << 24)|               \
+        ((__u64) '*' << 32)|((__u64) 'k' << 40)|               \
+        ((__u64) 'e' << 48)|((__u64) 'y' << 56))
 
 struct bch_encrypted_key {
        __le64                  magic;
@@ -1357,7 +1448,7 @@ struct bch_replicas_entry {
 
 struct bch_sb_field_replicas {
        struct bch_sb_field     field;
-       struct bch_replicas_entry entries[0];
+       struct bch_replicas_entry entries[];
 } __packed __aligned(8);
 
 /* BCH_SB_FIELD_quota: */
@@ -1392,7 +1483,7 @@ LE64_BITMASK(BCH_GROUP_PARENT,            struct bch_disk_group, flags[0], 6, 24)
 
 struct bch_sb_field_disk_groups {
        struct bch_sb_field     field;
-       struct bch_disk_group   entries[0];
+       struct bch_disk_group   entries[];
 } __packed __aligned(8);
 
 /* BCH_SB_FIELD_counters */
@@ -1436,7 +1527,7 @@ struct bch_sb_field_disk_groups {
        x(move_extent_read,                             35)     \
        x(move_extent_write,                            36)     \
        x(move_extent_finish,                           37)     \
-       x(move_extent_race,                             38)     \
+       x(move_extent_fail,                             38)     \
        x(move_extent_alloc_mem_fail,                   39)     \
        x(copygc,                                       40)     \
        x(copygc_wait,                                  41)     \
@@ -1472,7 +1563,9 @@ struct bch_sb_field_disk_groups {
        x(trans_traverse_all,                           71)     \
        x(transaction_commit,                           72)     \
        x(write_super,                                  73)     \
-       x(trans_restart_would_deadlock_recursion_limit, 74)
+       x(trans_restart_would_deadlock_recursion_limit, 74)     \
+       x(trans_restart_write_buffer_flush,             75)     \
+       x(trans_restart_split_race,                     76)
 
 enum bch_persistent_counters {
 #define x(t, n, ...) BCH_COUNTER_##t,
@@ -1483,7 +1576,7 @@ enum bch_persistent_counters {
 
 struct bch_sb_field_counters {
        struct bch_sb_field     field;
-       __le64                  d[0];
+       __le64                  d[];
 };
 
 /*
@@ -1497,10 +1590,8 @@ struct jset_entry {
        __u8                    type; /* designates what this jset holds */
        __u8                    pad[3];
 
-       union {
-               struct bkey_i   start[0];
-               __u64           _data[0];
-       };
+       struct bkey_i           start[0];
+       __u64                   _data[];
 };
 
 struct bch_sb_field_clean {
@@ -1511,10 +1602,8 @@ struct bch_sb_field_clean {
        __le16                  _write_clock;
        __le64                  journal_seq;
 
-       union {
-               struct jset_entry start[0];
-               __u64           _data[0];
-       };
+       struct jset_entry       start[0];
+       __u64                   _data[];
 };
 
 struct journal_seq_blacklist_entry {
@@ -1524,13 +1613,20 @@ struct journal_seq_blacklist_entry {
 
 struct bch_sb_field_journal_seq_blacklist {
        struct bch_sb_field     field;
+       struct journal_seq_blacklist_entry start[];
+};
 
-       union {
-               struct journal_seq_blacklist_entry start[0];
-               __u64           _data[0];
-       };
+struct bch_sb_field_errors {
+       struct bch_sb_field     field;
+       struct bch_sb_field_error_entry {
+               __le64          v;
+               __le64          last_error_time;
+       }                       entries[];
 };
 
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID,    struct bch_sb_field_error_entry, v,  0, 16);
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR,    struct bch_sb_field_error_entry, v, 16, 64);
+
 /* Superblock: */
 
 /*
@@ -1538,42 +1634,83 @@ struct bch_sb_field_journal_seq_blacklist {
  * One common version number for all on disk data structures - superblock, btree
  * nodes, journal entries
  */
-#define BCH_JSET_VERSION_OLD                   2
-#define BCH_BSET_VERSION_OLD                   3
-
-#define BCH_METADATA_VERSIONS()                                \
-       x(bkey_renumber,                10)             \
-       x(inode_btree_change,           11)             \
-       x(snapshot,                     12)             \
-       x(inode_backpointers,           13)             \
-       x(btree_ptr_sectors_written,    14)             \
-       x(snapshot_2,                   15)             \
-       x(reflink_p_fix,                16)             \
-       x(subvol_dirent,                17)             \
-       x(inode_v2,                     18)             \
-       x(freespace,                    19)             \
-       x(alloc_v4,                     20)             \
-       x(new_data_types,               21)             \
-       x(backpointers,                 22)             \
-       x(inode_v3,                     23)             \
-       x(unwritten_extents,            24)             \
-       x(bucket_gens,                  25)
+#define BCH_VERSION_MAJOR(_v)          ((__u16) ((_v) >> 10))
+#define BCH_VERSION_MINOR(_v)          ((__u16) ((_v) & ~(~0U << 10)))
+#define BCH_VERSION(_major, _minor)    (((_major) << 10)|(_minor) << 0)
+
+#define RECOVERY_PASS_ALL_FSCK         (1ULL << 63)
+
+#define BCH_METADATA_VERSIONS()                                                \
+       x(bkey_renumber,                BCH_VERSION(0, 10),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(inode_btree_change,           BCH_VERSION(0, 11),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(snapshot,                     BCH_VERSION(0, 12),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(inode_backpointers,           BCH_VERSION(0, 13),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(btree_ptr_sectors_written,    BCH_VERSION(0, 14),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(snapshot_2,                   BCH_VERSION(0, 15),             \
+         BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)|         \
+         BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)|             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(reflink_p_fix,                BCH_VERSION(0, 16),             \
+         BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p))                     \
+       x(subvol_dirent,                BCH_VERSION(0, 17),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(inode_v2,                     BCH_VERSION(0, 18),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(freespace,                    BCH_VERSION(0, 19),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(alloc_v4,                     BCH_VERSION(0, 20),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(new_data_types,               BCH_VERSION(0, 21),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(backpointers,                 BCH_VERSION(0, 22),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(inode_v3,                     BCH_VERSION(0, 23),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(unwritten_extents,            BCH_VERSION(0, 24),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(bucket_gens,                  BCH_VERSION(0, 25),             \
+         BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)|                  \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(lru_v2,                       BCH_VERSION(0, 26),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(fragmentation_lru,            BCH_VERSION(0, 27),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(no_bps_in_alloc_keys,         BCH_VERSION(0, 28),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(snapshot_trees,               BCH_VERSION(0, 29),             \
+         RECOVERY_PASS_ALL_FSCK)                                       \
+       x(major_minor,                  BCH_VERSION(1,  0),             \
+         0)                                                            \
+       x(snapshot_skiplists,           BCH_VERSION(1,  1),             \
+         BIT_ULL(BCH_RECOVERY_PASS_check_snapshots))                   \
+       x(deleted_inodes,               BCH_VERSION(1,  2),             \
+         BIT_ULL(BCH_RECOVERY_PASS_check_inodes))                      \
+       x(rebalance_work,               BCH_VERSION(1,  3),             \
+         BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
-#define x(t, n)        bcachefs_metadata_version_##t = n,
+#define x(t, n, upgrade_passes)        bcachefs_metadata_version_##t = n,
        BCH_METADATA_VERSIONS()
 #undef x
        bcachefs_metadata_version_max
 };
 
+static const __maybe_unused
+unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_rebalance_work;
+
 #define bcachefs_metadata_version_current      (bcachefs_metadata_version_max - 1)
 
 #define BCH_SB_SECTOR                  8
 #define BCH_SB_MEMBERS_MAX             64 /* XXX kill */
 
 struct bch_sb_layout {
-       uuid_le                 magic;  /* bcachefs superblock UUID */
+       __uuid_t                magic;  /* bcachefs superblock UUID */
        __u8                    layout_type;
        __u8                    sb_max_size_bits; /* base 2 of 512 byte sectors */
        __u8                    nr_superblocks;
@@ -1604,9 +1741,9 @@ struct bch_sb {
        __le16                  version;
        __le16                  version_min;
        __le16                  pad[2];
-       uuid_le                 magic;
-       uuid_le                 uuid;
-       uuid_le                 user_uuid;
+       __uuid_t                magic;
+       __uuid_t                uuid;
+       __uuid_t                user_uuid;
        __u8                    label[BCH_SB_LABEL_SIZE];
        __le64                  offset;
        __le64                  seq;
@@ -1626,10 +1763,8 @@ struct bch_sb {
 
        struct bch_sb_layout    layout;
 
-       union {
-               struct bch_sb_field start[0];
-               __le64          _data[0];
-       };
+       struct bch_sb_field     start[0];
+       __le64                  _data[];
 } __packed __aligned(8);
 
 /*
@@ -1673,7 +1808,7 @@ LE64_BITMASK(BCH_SB_HAS_TOPOLOGY_ERRORS,struct bch_sb, flags[0], 61, 62);
 LE64_BITMASK(BCH_SB_BIG_ENDIAN,                struct bch_sb, flags[0], 62, 63);
 
 LE64_BITMASK(BCH_SB_STR_HASH_TYPE,     struct bch_sb, flags[1],  0,  4);
-LE64_BITMASK(BCH_SB_COMPRESSION_TYPE,  struct bch_sb, flags[1],  4,  8);
+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_LO,struct bch_sb, flags[1],  4,  8);
 LE64_BITMASK(BCH_SB_INODE_32BIT,       struct bch_sb, flags[1],  8,  9);
 
 LE64_BITMASK(BCH_SB_128_BIT_MACS,      struct bch_sb, flags[1],  9, 10);
@@ -1693,7 +1828,7 @@ LE64_BITMASK(BCH_SB_PROMOTE_TARGET,       struct bch_sb, flags[1], 28, 40);
 LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
 LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
 
-LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO,
                                        struct bch_sb, flags[2],  0,  4);
 LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES,  struct bch_sb, flags[2],  4, 64);
 
@@ -1704,9 +1839,40 @@ LE64_BITMASK(BCH_SB_INODES_USE_KEY_CACHE,struct bch_sb, flags[3], 29, 30);
 LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DELAY,struct bch_sb, flags[3], 30, 62);
 LE64_BITMASK(BCH_SB_JOURNAL_FLUSH_DISABLED,struct bch_sb, flags[3], 62, 63);
 LE64_BITMASK(BCH_SB_JOURNAL_RECLAIM_DELAY,struct bch_sb, flags[4], 0, 32);
-/* Obsolete, always enabled: */
 LE64_BITMASK(BCH_SB_JOURNAL_TRANSACTION_NAMES,struct bch_sb, flags[4], 32, 33);
 LE64_BITMASK(BCH_SB_NOCOW,             struct bch_sb, flags[4], 33, 34);
+LE64_BITMASK(BCH_SB_WRITE_BUFFER_SIZE, struct bch_sb, flags[4], 34, 54);
+LE64_BITMASK(BCH_SB_VERSION_UPGRADE,   struct bch_sb, flags[4], 54, 56);
+
+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE_HI,struct bch_sb, flags[4], 56, 60);
+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
+                                       struct bch_sb, flags[4], 60, 64);
+
+LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
+                                       struct bch_sb, flags[5],  0, 16);
+
+static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
+{
+       return BCH_SB_COMPRESSION_TYPE_LO(sb) | (BCH_SB_COMPRESSION_TYPE_HI(sb) << 4);
+}
+
+static inline void SET_BCH_SB_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
+{
+       SET_BCH_SB_COMPRESSION_TYPE_LO(sb, v);
+       SET_BCH_SB_COMPRESSION_TYPE_HI(sb, v >> 4);
+}
+
+static inline __u64 BCH_SB_BACKGROUND_COMPRESSION_TYPE(const struct bch_sb *sb)
+{
+       return BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb) |
+               (BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb) << 4);
+}
+
+static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u64 v)
+{
+       SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_LO(sb, v);
+       SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI(sb, v >> 4);
+}
 
 /*
  * Features:
@@ -1774,6 +1940,17 @@ enum bch_sb_compat {
 
 /* options: */
 
+#define BCH_VERSION_UPGRADE_OPTS()     \
+       x(compatible,           0)      \
+       x(incompatible,         1)      \
+       x(none,                 2)
+
+enum bch_version_upgrade_opts {
+#define x(t, n) BCH_VERSION_UPGRADE_##t = n,
+       BCH_VERSION_UPGRADE_OPTS()
+#undef x
+};
+
 #define BCH_REPLICAS_MAX               4U
 
 #define BCH_BKEY_PTRS_MAX              16U
@@ -1832,7 +2009,7 @@ enum bch_csum_type {
        BCH_CSUM_NR
 };
 
-static const unsigned bch_crc_bytes[] = {
+static const __maybe_unused unsigned bch_crc_bytes[] = {
        [BCH_CSUM_none]                         = 0,
        [BCH_CSUM_crc32c_nonzero]               = 4,
        [BCH_CSUM_crc32c]                       = 4,
@@ -1903,11 +2080,11 @@ enum bch_compression_opts {
  */
 
 #define BCACHE_MAGIC                                                   \
-       UUID_LE(0xf67385c6, 0x1a4e, 0xca45,                             \
-               0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
+       UUID_INIT(0xc68573f6, 0x4e1a, 0x45ca,                           \
+                 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
 #define BCHFS_MAGIC                                                    \
-       UUID_LE(0xf67385c6, 0xce66, 0xa990,                             \
-               0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
+       UUID_INIT(0xc68573f6, 0x66ce, 0x90a9,                           \
+                 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
 
 #define BCACHEFS_STATFS_MAGIC          0xca451a4e
 
@@ -2022,7 +2199,7 @@ struct jset_entry_dev_usage {
        __le64                  _buckets_unavailable; /* No longer used */
 
        struct jset_entry_dev_usage_type d[];
-} __packed;
+};
 
 static inline unsigned jset_entry_dev_usage_nr_types(struct jset_entry_dev_usage *u)
 {
@@ -2064,10 +2241,8 @@ struct jset {
        __le64                  last_seq;
 
 
-       union {
-               struct jset_entry start[0];
-               __u64           _data[0];
-       };
+       struct jset_entry       start[0];
+       __u64                   _data[];
 } __packed __aligned(8);
 
 LE32_BITMASK(JSET_CSUM_TYPE,   struct jset, flags, 0, 4);
@@ -2078,25 +2253,74 @@ LE32_BITMASK(JSET_NO_FLUSH,     struct jset, flags, 5, 6);
 
 /* Btree: */
 
-#define BCH_BTREE_IDS()                                \
-       x(extents,              0)              \
-       x(inodes,               1)              \
-       x(dirents,              2)              \
-       x(xattrs,               3)              \
-       x(alloc,                4)              \
-       x(quotas,               5)              \
-       x(stripes,              6)              \
-       x(reflink,              7)              \
-       x(subvolumes,           8)              \
-       x(snapshots,            9)              \
-       x(lru,                  10)             \
-       x(freespace,            11)             \
-       x(need_discard,         12)             \
-       x(backpointers,         13)             \
-       x(bucket_gens,          14)
+enum btree_id_flags {
+       BTREE_ID_EXTENTS        = BIT(0),
+       BTREE_ID_SNAPSHOTS      = BIT(1),
+       BTREE_ID_DATA           = BIT(2),
+};
+
+#define BCH_BTREE_IDS()                                                                \
+       x(extents,              0,      BTREE_ID_EXTENTS|BTREE_ID_SNAPSHOTS|BTREE_ID_DATA,\
+         BIT_ULL(KEY_TYPE_whiteout)|                                           \
+         BIT_ULL(KEY_TYPE_error)|                                              \
+         BIT_ULL(KEY_TYPE_cookie)|                                             \
+         BIT_ULL(KEY_TYPE_extent)|                                             \
+         BIT_ULL(KEY_TYPE_reservation)|                                        \
+         BIT_ULL(KEY_TYPE_reflink_p)|                                          \
+         BIT_ULL(KEY_TYPE_inline_data))                                        \
+       x(inodes,               1,      BTREE_ID_SNAPSHOTS,                     \
+         BIT_ULL(KEY_TYPE_whiteout)|                                           \
+         BIT_ULL(KEY_TYPE_inode)|                                              \
+         BIT_ULL(KEY_TYPE_inode_v2)|                                           \
+         BIT_ULL(KEY_TYPE_inode_v3)|                                           \
+         BIT_ULL(KEY_TYPE_inode_generation))                                   \
+       x(dirents,              2,      BTREE_ID_SNAPSHOTS,                     \
+         BIT_ULL(KEY_TYPE_whiteout)|                                           \
+         BIT_ULL(KEY_TYPE_hash_whiteout)|                                      \
+         BIT_ULL(KEY_TYPE_dirent))                                             \
+       x(xattrs,               3,      BTREE_ID_SNAPSHOTS,                     \
+         BIT_ULL(KEY_TYPE_whiteout)|                                           \
+         BIT_ULL(KEY_TYPE_cookie)|                                             \
+         BIT_ULL(KEY_TYPE_hash_whiteout)|                                      \
+         BIT_ULL(KEY_TYPE_xattr))                                              \
+       x(alloc,                4,      0,                                      \
+         BIT_ULL(KEY_TYPE_alloc)|                                              \
+         BIT_ULL(KEY_TYPE_alloc_v2)|                                           \
+         BIT_ULL(KEY_TYPE_alloc_v3)|                                           \
+         BIT_ULL(KEY_TYPE_alloc_v4))                                           \
+       x(quotas,               5,      0,                                      \
+         BIT_ULL(KEY_TYPE_quota))                                              \
+       x(stripes,              6,      0,                                      \
+         BIT_ULL(KEY_TYPE_stripe))                                             \
+       x(reflink,              7,      BTREE_ID_EXTENTS|BTREE_ID_DATA,         \
+         BIT_ULL(KEY_TYPE_reflink_v)|                                          \
+         BIT_ULL(KEY_TYPE_indirect_inline_data))                               \
+       x(subvolumes,           8,      0,                                      \
+         BIT_ULL(KEY_TYPE_subvolume))                                          \
+       x(snapshots,            9,      0,                                      \
+         BIT_ULL(KEY_TYPE_snapshot))                                           \
+       x(lru,                  10,     0,                                      \
+         BIT_ULL(KEY_TYPE_set))                                                \
+       x(freespace,            11,     BTREE_ID_EXTENTS,                       \
+         BIT_ULL(KEY_TYPE_set))                                                \
+       x(need_discard,         12,     0,                                      \
+         BIT_ULL(KEY_TYPE_set))                                                \
+       x(backpointers,         13,     0,                                      \
+         BIT_ULL(KEY_TYPE_backpointer))                                        \
+       x(bucket_gens,          14,     0,                                      \
+         BIT_ULL(KEY_TYPE_bucket_gens))                                        \
+       x(snapshot_trees,       15,     0,                                      \
+         BIT_ULL(KEY_TYPE_snapshot_tree))                                      \
+       x(deleted_inodes,       16,     BTREE_ID_SNAPSHOTS,                     \
+         BIT_ULL(KEY_TYPE_set))                                                \
+       x(logged_ops,           17,     0,                                      \
+         BIT_ULL(KEY_TYPE_logged_op_truncate)|                                 \
+         BIT_ULL(KEY_TYPE_logged_op_finsert))                                  \
+       x(rebalance_work,       18,     BTREE_ID_SNAPSHOTS,                     \
+         BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
 
 enum btree_id {
-#define x(kwd, val) BTREE_ID_##kwd = val,
+#define x(name, nr, ...) BTREE_ID_##name = nr,
        BCH_BTREE_IDS()
 #undef x
        BTREE_ID_NR
@@ -2128,10 +2352,8 @@ struct bset {
        __le16                  version;
        __le16                  u64s; /* count of d[] in u64s */
 
-       union {
-               struct bkey_packed start[0];
-               __u64           _data[0];
-       };
+       struct bkey_packed      start[0];
+       __u64                   _data[];
 } __packed __aligned(8);
 
 LE32_BITMASK(BSET_CSUM_TYPE,   struct bset, flags, 0, 4);
@@ -2167,13 +2389,25 @@ struct btree_node {
        };
 } __packed __aligned(8);
 
-LE64_BITMASK(BTREE_NODE_ID,    struct btree_node, flags,  0,  4);
+LE64_BITMASK(BTREE_NODE_ID_LO, struct btree_node, flags,  0,  4);
 LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags,  4,  8);
 LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
                                struct btree_node, flags,  8,  9);
-/* 9-32 unused */
+LE64_BITMASK(BTREE_NODE_ID_HI, struct btree_node, flags,  9, 25);
+/* 25-32 unused */
 LE64_BITMASK(BTREE_NODE_SEQ,   struct btree_node, flags, 32, 64);
 
+static inline __u64 BTREE_NODE_ID(struct btree_node *n)
+{
+       return BTREE_NODE_ID_LO(n) | (BTREE_NODE_ID_HI(n) << 4);
+}
+
+static inline void SET_BTREE_NODE_ID(struct btree_node *n, __u64 v)
+{
+       SET_BTREE_NODE_ID_LO(n, v);
+       SET_BTREE_NODE_ID_HI(n, v >> 4);
+}
+
 struct btree_node_entry {
        struct bch_csum         csum;
 
@@ -2183,7 +2417,6 @@ struct btree_node_entry {
                __u8            pad[22];
                __le16          u64s;
                __u64           _data[0];
-
        };
        };
 } __packed __aligned(8);