-#ifndef _LINUX_BCACHE_H
-#define _LINUX_BCACHE_H
+#ifndef _BCACHEFS_FORMAT_H
+#define _BCACHEFS_FORMAT_H
/*
- * Bcache on disk data structures
+ * bcachefs on disk data structures
+ *
+ * OVERVIEW:
+ *
+ * There are three main types of on disk data structures in bcachefs (this is
+ * reduced from 5 in bcache)
+ *
+ * - superblock
+ * - journal
+ * - btree
+ *
+ * The btree is the primary structure; most metadata exists as keys in the
+ * various btrees. There are only a small number of btrees, they're not
+ * sharded - we have one btree for extents, another for inodes, et cetera.
+ *
+ * SUPERBLOCK:
+ *
+ * The superblock contains the location of the journal, the list of devices in
+ * the filesystem, and in general any metadata we need in order to decide
+ * whether we can start a filesystem or prior to reading the journal/btree
+ * roots.
+ *
+ * The superblock is extensible, and most of the contents of the superblock are
+ * in variable length, type tagged fields; see struct bch_sb_field.
+ *
+ * Backup superblocks do not reside in a fixed location; also, superblocks do
+ * not have a fixed size. To locate backup superblocks we have struct
+ * bch_sb_layout; we store a copy of this inside every superblock, and also
+ * before the first superblock.
+ *
+ * JOURNAL:
+ *
+ * The journal primarily records btree updates in the order they occurred;
+ * journal replay consists of just iterating over all the keys in the open
+ * journal entries and re-inserting them into the btrees.
+ *
+ * The journal also contains entry types for the btree roots, and blacklisted
+ * journal sequence numbers (see journal_seq_blacklist.c).
+ *
+ * BTREE:
+ *
+ * bcachefs btrees are copy on write b+ trees, where nodes are big (typically
+ * 128k-256k) and log structured. We use struct btree_node for writing the first
+ * entry in a given node (offset 0), and struct btree_node_entry for all
+ * subsequent writes.
+ *
+ * After the header, btree node entries contain a list of keys in sorted order.
+ * Values are stored inline with the keys; since values are variable length (and
+ * keys effectively are variable length too, due to packing) we can't do random
+ * access without building up additional in memory tables in the btree node read
+ * path.
+ *
+ * BTREE KEYS (struct bkey):
+ *
+ * The various btrees share a common format for the key - so as to avoid
+ * switching in fastpath lookup/comparison code - but define their own
+ * structures for the key values.
+ *
+ * The size of a key/value pair is stored as a u8 in units of u64s, so the max
+ * size is just under 2k. The common part also contains a type tag for the
+ * value, and a format field indicating whether the key is packed or not (and
+ * also meant to allow adding new key fields in the future, if desired).
+ *
+ * bkeys, when stored within a btree node, may also be packed. In that case, the
+ * bkey_format in that node is used to unpack it. Packed bkeys mean that we can
+ * be generous with field sizes in the common part of the key format (64 bit
+ * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost.
*/
-#ifdef __cplusplus
-typedef bool _Bool;
-extern "C" {
-#endif
-
#include <asm/types.h>
#include <asm/byteorder.h>
+#include <linux/kernel.h>
#include <linux/uuid.h>
-#define LE32_BITMASK(name, type, field, offset, end) \
+#define LE_BITMASK(_bits, name, type, field, offset, end) \
static const unsigned name##_OFFSET = offset; \
static const unsigned name##_BITS = (end - offset); \
-static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \
+static const __u##_bits name##_MAX = (1ULL << (end - offset)) - 1; \
\
static inline __u64 name(const type *k) \
{ \
- return (__le32_to_cpu(k->field) >> offset) & \
+ return (__le##_bits##_to_cpu(k->field) >> offset) & \
~(~0ULL << (end - offset)); \
} \
\
static inline void SET_##name(type *k, __u64 v) \
{ \
- __u64 new = __le32_to_cpu(k->field); \
+ __u##_bits new = __le##_bits##_to_cpu(k->field); \
\
new &= ~(~(~0ULL << (end - offset)) << offset); \
new |= (v & ~(~0ULL << (end - offset))) << offset; \
- k->field = __cpu_to_le32(new); \
+ k->field = __cpu_to_le##_bits(new); \
}
-#define LE64_BITMASK(name, type, field, offset, end) \
-static const unsigned name##_OFFSET = offset; \
-static const unsigned name##_BITS = (end - offset); \
-static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \
- \
-static inline __u64 name(const type *k) \
-{ \
- return (__le64_to_cpu(k->field) >> offset) & \
- ~(~0ULL << (end - offset)); \
-} \
- \
-static inline void SET_##name(type *k, __u64 v) \
-{ \
- __u64 new = __le64_to_cpu(k->field); \
- \
- new &= ~(~(~0ULL << (end - offset)) << offset); \
- new |= (v & ~(~0ULL << (end - offset))) << offset; \
- k->field = __cpu_to_le64(new); \
-}
+#define LE16_BITMASK(n, t, f, o, e) LE_BITMASK(16, n, t, f, o, e)
+#define LE32_BITMASK(n, t, f, o, e) LE_BITMASK(32, n, t, f, o, e)
+#define LE64_BITMASK(n, t, f, o, e) LE_BITMASK(64, n, t, f, o, e)
struct bkey_format {
__u8 key_u64s;
/* Btree keys - all units are in sectors */
struct bpos {
- /* Word order matches machine byte order */
-#if defined(__LITTLE_ENDIAN)
+ /*
+ * Word order matches machine byte order - btree code treats a bpos as a
+ * single large integer, for search/comparison purposes
+ *
+ * Note that wherever a bpos is embedded in another on disk data
+ * structure, it has to be byte swabbed when reading in metadata that
+ * wasn't written in native endian order:
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
__u32 snapshot;
__u64 offset;
__u64 inode;
-#elif defined(__BIG_ENDIAN)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
__u64 inode;
__u64 offset; /* Points to end of extent - sectors */
__u32 snapshot;
#define KEY_INODE_MAX ((__u64)~0ULL)
#define KEY_OFFSET_MAX ((__u64)~0ULL)
#define KEY_SNAPSHOT_MAX ((__u32)~0U)
+#define KEY_SIZE_MAX ((__u32)~0U)
static inline struct bpos POS(__u64 inode, __u64 offset)
{
};
struct bversion {
-#if defined(__LITTLE_ENDIAN)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
__u64 lo;
__u32 hi;
-#elif defined(__BIG_ENDIAN)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
__u32 hi;
__u64 lo;
#endif
/* Type of the value */
__u8 type;
-#if defined(__LITTLE_ENDIAN)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
__u8 pad[1];
struct bversion version;
__u32 size; /* extent size, in sectors */
struct bpos p;
-#elif defined(__BIG_ENDIAN)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
struct bpos p;
__u32 size; /* extent size, in sectors */
struct bversion version;
} __attribute__((packed, aligned(8)));
#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64))
-#define KEY_PACKED_BITS_START 24
+#define BKEY_U64s_MAX U8_MAX
+#define BKEY_VAL_U64s_MAX (BKEY_U64s_MAX - BKEY_U64s)
-#define KEY_SIZE_MAX ((__u32)~0U)
+#define KEY_PACKED_BITS_START 24
#define KEY_FORMAT_LOCAL_BTREE 0
#define KEY_FORMAT_CURRENT 1
};
};
-#ifndef __cplusplus
-
#define KEY(_inode, _offset, _size) \
((struct bkey) { \
.u64s = BKEY_U64s, \
.size = _size, \
})
-#else
-
-static inline struct bkey KEY(__u64 inode, __u64 offset, __u64 size)
-{
- struct bkey ret;
-
- memset(&ret, 0, sizeof(ret));
- ret.u64s = BKEY_U64s;
- ret.format = KEY_FORMAT_CURRENT;
- ret.p.inode = inode;
- ret.p.offset = offset;
- ret.size = size;
-
- return ret;
-}
-
-#endif
-
static inline void bkey_init(struct bkey *k)
{
*k = KEY(0, 0, 0);
#define __BKEY_PADDED(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; }
-#define BKEY_VAL_TYPE(name, nr) \
-struct bkey_i_##name { \
- union { \
- struct bkey k; \
- struct bkey_i k_i; \
- }; \
- struct bch_##name v; \
-}
-
/*
* - DELETED keys are used internally to mark keys that should be ignored but
* override keys in composition order. Their version number is ignored.
* by new writes or cluster-wide GC. Node repair can also overwrite them with
* the same or a more recent version number, but not with an older version
* number.
+ *
+ * - WHITEOUT: for hash table btrees
*/
-#define KEY_TYPE_DELETED 0
-#define KEY_TYPE_DISCARD 1
-#define KEY_TYPE_ERROR 2
-#define KEY_TYPE_COOKIE 3
-#define KEY_TYPE_PERSISTENT_DISCARD 4
-#define KEY_TYPE_GENERIC_NR 128
+#define BCH_BKEY_TYPES() \
+ x(deleted, 0) \
+ x(discard, 1) \
+ x(error, 2) \
+ x(cookie, 3) \
+ x(whiteout, 4) \
+ x(btree_ptr, 5) \
+ x(extent, 6) \
+ x(reservation, 7) \
+ x(inode, 8) \
+ x(inode_generation, 9) \
+ x(dirent, 10) \
+ x(xattr, 11) \
+ x(alloc, 12) \
+ x(quota, 13) \
+ x(stripe, 14)
+
+enum bch_bkey_type {
+#define x(name, nr) KEY_TYPE_##name = nr,
+ BCH_BKEY_TYPES()
+#undef x
+ KEY_TYPE_MAX,
+};
struct bch_cookie {
struct bch_val v;
__le64 cookie;
};
-BKEY_VAL_TYPE(cookie, KEY_TYPE_COOKIE);
/* Extents */
*
* If an extent is not checksummed or compressed, when the extent is trimmed we
* don't have to remember the extent we originally allocated and wrote: we can
- * merely adjust ptr->offset to point to the start of the start of the data that
- * is currently live. The size field in struct bkey records the current (live)
- * size of the extent, and is also used to mean "size of region on disk that we
- * point to" in this case.
+ * merely adjust ptr->offset to point to the start of the data that is currently
+ * live. The size field in struct bkey records the current (live) size of the
+ * extent, and is also used to mean "size of region on disk that we point to" in
+ * this case.
*
* Thus an extent that is not checksummed or compressed will consist only of a
* list of bch_extent_ptrs, with none of the fields in
__le64 hi;
} __attribute__((packed, aligned(8)));
-#define BCH_CSUM_NONE 0U
-#define BCH_CSUM_CRC32C 1U
-#define BCH_CSUM_CRC64 2U
-#define BCH_CSUM_CHACHA20_POLY1305_80 3U
-#define BCH_CSUM_CHACHA20_POLY1305_128 4U
-#define BCH_CSUM_NR 5U
+enum bch_csum_type {
+ BCH_CSUM_NONE = 0,
+ BCH_CSUM_CRC32C_NONZERO = 1,
+ BCH_CSUM_CRC64_NONZERO = 2,
+ BCH_CSUM_CHACHA20_POLY1305_80 = 3,
+ BCH_CSUM_CHACHA20_POLY1305_128 = 4,
+ BCH_CSUM_CRC32C = 5,
+ BCH_CSUM_CRC64 = 6,
+ BCH_CSUM_NR = 7,
+};
+
+static const unsigned bch_crc_bytes[] = {
+ [BCH_CSUM_NONE] = 0,
+ [BCH_CSUM_CRC32C_NONZERO] = 4,
+ [BCH_CSUM_CRC32C] = 4,
+ [BCH_CSUM_CRC64_NONZERO] = 8,
+ [BCH_CSUM_CRC64] = 8,
+ [BCH_CSUM_CHACHA20_POLY1305_80] = 10,
+ [BCH_CSUM_CHACHA20_POLY1305_128] = 16,
+};
-static inline _Bool bch2_csum_type_is_encryption(unsigned type)
+static inline _Bool bch2_csum_type_is_encryption(enum bch_csum_type type)
{
switch (type) {
case BCH_CSUM_CHACHA20_POLY1305_80:
}
}
-enum bch_extent_entry_type {
- BCH_EXTENT_ENTRY_ptr = 0,
- BCH_EXTENT_ENTRY_crc32 = 1,
- BCH_EXTENT_ENTRY_crc64 = 2,
- BCH_EXTENT_ENTRY_crc128 = 3,
+enum bch_compression_type {
+ BCH_COMPRESSION_NONE = 0,
+ BCH_COMPRESSION_LZ4_OLD = 1,
+ BCH_COMPRESSION_GZIP = 2,
+ BCH_COMPRESSION_LZ4 = 3,
+ BCH_COMPRESSION_ZSTD = 4,
+ BCH_COMPRESSION_NR = 5,
};
-#define BCH_EXTENT_ENTRY_MAX 4
+#define BCH_EXTENT_ENTRY_TYPES() \
+ x(ptr, 0) \
+ x(crc32, 1) \
+ x(crc64, 2) \
+ x(crc128, 3) \
+ x(stripe_ptr, 4)
+#define BCH_EXTENT_ENTRY_MAX 5
+
+enum bch_extent_entry_type {
+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
/* Compressed/uncompressed size are stored biased by 1: */
struct bch_extent_crc32 {
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 compression_type:4,
csum_type:4,
- nonce:14,
+ nonce:13,
offset:13,
_uncompressed_size:13,
_compressed_size:13,
- type:3;
+ type:4;
#endif
struct bch_csum csum;
} __attribute__((packed, aligned(8)));
#define CRC128_SIZE_MAX (1U << 13)
#define CRC128_NONCE_MAX ((1U << 13) - 1)
-/*
- * Max size of an extent that may require bouncing to read or write
- * (checksummed, compressed): 64k
- */
-#define BCH_ENCODED_EXTENT_MAX 128U
-
/*
* @reservation - pointer hasn't been written to, just reserved
*/
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:1,
cached:1,
- erasure_coded:1,
+ unused:1,
reservation:1,
offset:44, /* 8 petabytes */
dev:8,
dev:8,
offset:44,
reservation:1,
- erasure_coded:1,
+ unused:1,
cached:1,
type:1;
#endif
} __attribute__((packed, aligned(8)));
-struct bch_extent_reservation {
+struct bch_extent_stripe_ptr {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:5,
- unused:23,
+ block:8,
+ idx:51;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u64 idx:51,
+ block:8,
+ type:5;
+#endif
+};
+
+struct bch_extent_reservation {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u64 type:6,
+ unused:22,
replicas:4,
generation:32;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 generation:32,
replicas:4,
- unused:23,
- type:5;
+ unused:22,
+ type:6;
#endif
};
union bch_extent_entry {
-#if defined(__LITTLE_ENDIAN) || __BITS_PER_LONG == 64
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
unsigned long type;
#elif __BITS_PER_LONG == 32
struct {
#else
#error edit for your odd byteorder.
#endif
- struct bch_extent_crc32 crc32;
- struct bch_extent_crc64 crc64;
- struct bch_extent_crc128 crc128;
- struct bch_extent_ptr ptr;
-};
-enum {
- BCH_EXTENT = 128,
+#define x(f, n) struct bch_extent_##f f;
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
+};
- /*
- * This is kind of a hack, we're overloading the type for a boolean that
- * really should be part of the value - BCH_EXTENT and BCH_EXTENT_CACHED
- * have the same value type:
- */
- BCH_EXTENT_CACHED = 129,
+struct bch_btree_ptr {
+ struct bch_val v;
- /*
- * Persistent reservation:
- */
- BCH_RESERVATION = 130,
-};
+ struct bch_extent_ptr start[0];
+ __u64 _data[0];
+} __attribute__((packed, aligned(8)));
struct bch_extent {
struct bch_val v;
union bch_extent_entry start[0];
__u64 _data[0];
} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(extent, BCH_EXTENT);
struct bch_reservation {
struct bch_val v;
__u8 nr_replicas;
__u8 pad[3];
} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(reservation, BCH_RESERVATION);
/* Maximum size (in u64s) a single pointer could be: */
#define BKEY_EXTENT_PTR_U64s_MAX\
sizeof(struct bch_extent_ptr)) / sizeof(u64))
/* Maximum possible size of an entire extent value: */
-/* There's a hack in the keylist code that needs to be fixed.. */
#define BKEY_EXTENT_VAL_U64s_MAX \
- (BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX)
+ (BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
+
+#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
/* * Maximum possible size of an entire extent, key + value: */
#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
#define BLOCKDEV_INODE_MAX 4096
-#define BCACHE_ROOT_INO 4096
-
-enum bch_inode_types {
- BCH_INODE_FS = 128,
- BCH_INODE_BLOCKDEV = 129,
-};
+#define BCACHEFS_ROOT_INO 4096
struct bch_inode {
struct bch_val v;
- __le64 i_hash_seed;
- __le32 i_flags;
- __le16 i_mode;
+ __le64 bi_hash_seed;
+ __le32 bi_flags;
+ __le16 bi_mode;
__u8 fields[0];
} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(inode, BCH_INODE_FS);
-
-#define BCH_INODE_FIELDS() \
- BCH_INODE_FIELD(i_atime, 64) \
- BCH_INODE_FIELD(i_ctime, 64) \
- BCH_INODE_FIELD(i_mtime, 64) \
- BCH_INODE_FIELD(i_otime, 64) \
- BCH_INODE_FIELD(i_size, 64) \
- BCH_INODE_FIELD(i_sectors, 64) \
- BCH_INODE_FIELD(i_uid, 32) \
- BCH_INODE_FIELD(i_gid, 32) \
- BCH_INODE_FIELD(i_nlink, 32) \
- BCH_INODE_FIELD(i_generation, 32) \
- BCH_INODE_FIELD(i_dev, 32)
+
+struct bch_inode_generation {
+ struct bch_val v;
+
+ __le32 bi_generation;
+ __le32 pad;
+} __attribute__((packed, aligned(8)));
+
+#define BCH_INODE_FIELDS() \
+ x(bi_atime, 64) \
+ x(bi_ctime, 64) \
+ x(bi_mtime, 64) \
+ x(bi_otime, 64) \
+ x(bi_size, 64) \
+ x(bi_sectors, 64) \
+ x(bi_uid, 32) \
+ x(bi_gid, 32) \
+ x(bi_nlink, 32) \
+ x(bi_generation, 32) \
+ x(bi_dev, 32) \
+ x(bi_data_checksum, 8) \
+ x(bi_compression, 8) \
+ x(bi_project, 32) \
+ x(bi_background_compression, 8) \
+ x(bi_data_replicas, 8) \
+ x(bi_promote_target, 16) \
+ x(bi_foreground_target, 16) \
+ x(bi_background_target, 16) \
+ x(bi_erasure_code, 16) \
+ x(bi_fields_set, 16)
+
+/* subset of BCH_INODE_FIELDS */
+#define BCH_INODE_OPTS() \
+ x(data_checksum, 8) \
+ x(compression, 8) \
+ x(project, 32) \
+ x(background_compression, 8) \
+ x(data_replicas, 8) \
+ x(promote_target, 16) \
+ x(foreground_target, 16) \
+ x(background_target, 16) \
+ x(erasure_code, 16)
+
+enum inode_opt_id {
+#define x(name, ...) \
+ Inode_opt_##name,
+ BCH_INODE_OPTS()
+#undef x
+ Inode_opt_nr,
+};
enum {
/*
__BCH_INODE_I_SIZE_DIRTY= 5,
__BCH_INODE_I_SECTORS_DIRTY= 6,
-
- /* not implemented yet: */
- __BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */
+ __BCH_INODE_UNLINKED = 7,
/* bits 20+ reserved for packed fields below: */
};
#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME)
#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS)
-
-LE32_BITMASK(INODE_STR_HASH, struct bch_inode, i_flags, 20, 24);
-LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, i_flags, 24, 32);
+#define BCH_INODE_UNLINKED (1 << __BCH_INODE_UNLINKED)
-struct bch_inode_blockdev {
- struct bch_val v;
-
- __le64 i_size;
- __le64 i_flags;
-
- /* Seconds: */
- __le64 i_ctime;
- __le64 i_mtime;
-
- uuid_le i_uuid;
- __u8 i_label[32];
-} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV);
-
-/* Thin provisioned volume, or cache for another block device? */
-LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1)
+LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24);
+LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32);
/* Dirents */
* collision:
*/
-enum {
- BCH_DIRENT = 128,
- BCH_DIRENT_WHITEOUT = 129,
-};
-
struct bch_dirent {
struct bch_val v;
__u8 d_name[];
} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(dirent, BCH_DIRENT);
-/* Xattrs */
+#define BCH_NAME_MAX (U8_MAX * sizeof(u64) - \
+ sizeof(struct bkey) - \
+ offsetof(struct bch_dirent, d_name))
-enum {
- BCH_XATTR = 128,
- BCH_XATTR_WHITEOUT = 129,
-};
-#define BCH_XATTR_INDEX_USER 0
-#define BCH_XATTR_INDEX_POSIX_ACL_ACCESS 1
-#define BCH_XATTR_INDEX_POSIX_ACL_DEFAULT 2
-#define BCH_XATTR_INDEX_TRUSTED 3
-#define BCH_XATTR_INDEX_SECURITY 4
+/* Xattrs */
+
+#define KEY_TYPE_XATTR_INDEX_USER 0
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1
+#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2
+#define KEY_TYPE_XATTR_INDEX_TRUSTED 3
+#define KEY_TYPE_XATTR_INDEX_SECURITY 4
struct bch_xattr {
struct bch_val v;
__le16 x_val_len;
__u8 x_name[];
} __attribute__((packed, aligned(8)));
-BKEY_VAL_TYPE(xattr, BCH_XATTR);
-
-/* Superblock */
-
-/* Version 0: Cache device
- * Version 1: Backing device
- * Version 2: Seed pointer into btree node checksum
- * Version 3: Cache device with new UUID format
- * Version 4: Backing device with data offset
- * Version 5: All the incompat changes
- * Version 6: Cache device UUIDs all in superblock, another incompat bset change
- * Version 7: Encryption (expanded checksum fields), other random things
- */
-#define BCACHE_SB_VERSION_CDEV_V0 0
-#define BCACHE_SB_VERSION_BDEV 1
-#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3
-#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
-#define BCACHE_SB_VERSION_CDEV_V2 5
-#define BCACHE_SB_VERSION_CDEV_V3 6
-#define BCACHE_SB_VERSION_CDEV_V4 7
-#define BCACHE_SB_VERSION_CDEV 7
-#define BCACHE_SB_MAX_VERSION 7
-#define BCH_SB_SECTOR 8
-#define BCH_SB_LABEL_SIZE 32
-#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
+/* Bucket/allocation information: */
+
+struct bch_alloc {
+ struct bch_val v;
+ __u8 fields;
+ __u8 gen;
+ __u8 data[];
+} __attribute__((packed, aligned(8)));
+
+#define BCH_ALLOC_FIELDS() \
+ x(read_time, 16) \
+ x(write_time, 16) \
+ x(data_type, 8) \
+ x(dirty_sectors, 16) \
+ x(cached_sectors, 16) \
+ x(oldest_gen, 8)
+
+enum {
+#define x(name, bytes) BCH_ALLOC_FIELD_##name,
+ BCH_ALLOC_FIELDS()
+#undef x
+ BCH_ALLOC_FIELD_NR
+};
+
+static const unsigned BCH_ALLOC_FIELD_BYTES[] = {
+#define x(name, bits) [BCH_ALLOC_FIELD_##name] = bits / 8,
+ BCH_ALLOC_FIELDS()
+#undef x
+};
+
+#define x(name, bits) + (bits / 8)
+static const unsigned BKEY_ALLOC_VAL_U64s_MAX =
+ DIV_ROUND_UP(offsetof(struct bch_alloc, data)
+ BCH_ALLOC_FIELDS(), sizeof(u64));
+#undef x
+
+/* Quotas: */
+
+enum quota_types {
+ QTYP_USR = 0,
+ QTYP_GRP = 1,
+ QTYP_PRJ = 2,
+ QTYP_NR = 3,
+};
+
+enum quota_counters {
+ Q_SPC = 0,
+ Q_INO = 1,
+ Q_COUNTERS = 2,
+};
+
+struct bch_quota_counter {
+ __le64 hardlimit;
+ __le64 softlimit;
+};
+
+struct bch_quota {
+ struct bch_val v;
+ struct bch_quota_counter c[Q_COUNTERS];
+} __attribute__((packed, aligned(8)));
+
+/* Erasure coding */
+
+struct bch_stripe {
+ struct bch_val v;
+ __le16 sectors;
+ __u8 algorithm;
+ __u8 nr_blocks;
+ __u8 nr_redundant;
+
+ __u8 csum_granularity_bits;
+ __u8 csum_type;
+ __u8 pad;
+
+ struct bch_extent_ptr ptrs[0];
+} __attribute__((packed, aligned(8)));
+
+/* Optional/variable size superblock sections: */
+
+struct bch_sb_field {
+ __u64 _data[0];
+ __le32 u64s;
+ __le32 type;
+};
+
+#define BCH_SB_FIELDS() \
+ x(journal, 0) \
+ x(members, 1) \
+ x(crypt, 2) \
+ x(replicas_v0, 3) \
+ x(quota, 4) \
+ x(disk_groups, 5) \
+ x(clean, 6) \
+ x(replicas, 7)
+
+enum bch_sb_field_type {
+#define x(f, nr) BCH_SB_FIELD_##f = nr,
+ BCH_SB_FIELDS()
+#undef x
+ BCH_SB_FIELD_NR
+};
+
+/* BCH_SB_FIELD_journal: */
+
+struct bch_sb_field_journal {
+ struct bch_sb_field field;
+ __le64 buckets[0];
+};
+
+/* BCH_SB_FIELD_members: */
+
+#define BCH_MIN_NR_NBUCKETS (1 << 6)
struct bch_member {
uuid_le uuid;
};
LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags[0], 0, 4)
-LE64_BITMASK(BCH_MEMBER_TIER, struct bch_member, flags[0], 4, 8)
-LE64_BITMASK(BCH_MEMBER_HAS_METADATA, struct bch_member, flags[0], 8, 9)
-LE64_BITMASK(BCH_MEMBER_HAS_DATA, struct bch_member, flags[0], 9, 10)
+/* 4-10 unused, was TIER, HAS_(META)DATA */
LE64_BITMASK(BCH_MEMBER_REPLACEMENT, struct bch_member, flags[0], 10, 14)
-LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags[0], 14, 15);
+LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags[0], 14, 15)
+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags[0], 15, 20)
+LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags[0], 20, 28)
+LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags[0], 28, 30)
+
+#define BCH_TIER_MAX 4U
#if 0
LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
BCH_MEMBER_STATE_NR = 4,
};
-#define BCH_TIER_MAX 4U
-
enum cache_replacement {
CACHE_REPLACEMENT_LRU = 0,
CACHE_REPLACEMENT_FIFO = 1,
CACHE_REPLACEMENT_NR = 3,
};
-struct bch_sb_layout {
- uuid_le magic; /* bcachefs superblock UUID */
- __u8 layout_type;
- __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */
- __u8 nr_superblocks;
- __u8 pad[5];
- __u64 sb_offset[61];
-} __attribute__((packed, aligned(8)));
-
-#define BCH_SB_LAYOUT_SECTOR 7
-
-struct bch_sb_field {
- __u64 _data[0];
- __le32 u64s;
- __le32 type;
-};
-
-enum bch_sb_field_type {
- BCH_SB_FIELD_journal = 0,
- BCH_SB_FIELD_members = 1,
- BCH_SB_FIELD_crypt = 2,
- BCH_SB_FIELD_NR = 3,
-};
-
-struct bch_sb_field_journal {
- struct bch_sb_field field;
- __le64 buckets[0];
-};
-
struct bch_sb_field_members {
struct bch_sb_field field;
struct bch_member members[0];
};
-/* Crypto: */
+/* BCH_SB_FIELD_crypt: */
struct nonce {
__le32 d[4];
LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
-struct bch_sb_field_replication {
+/* BCH_SB_FIELD_replicas: */
+
+enum bch_data_type {
+ BCH_DATA_NONE = 0,
+ BCH_DATA_SB = 1,
+ BCH_DATA_JOURNAL = 2,
+ BCH_DATA_BTREE = 3,
+ BCH_DATA_USER = 4,
+ BCH_DATA_CACHED = 5,
+ BCH_DATA_NR = 6,
+};
+
+struct bch_replicas_entry_v0 {
+ __u8 data_type;
+ __u8 nr_devs;
+ __u8 devs[0];
+} __attribute__((packed));
+
+struct bch_sb_field_replicas_v0 {
+ struct bch_sb_field field;
+ struct bch_replicas_entry_v0 entries[0];
+} __attribute__((packed, aligned(8)));
+
+struct bch_replicas_entry {
+ __u8 data_type;
+ __u8 nr_devs;
+ __u8 nr_required;
+ __u8 devs[0];
+} __attribute__((packed));
+
+struct bch_sb_field_replicas {
struct bch_sb_field field;
+ struct bch_replicas_entry entries[0];
+} __attribute__((packed, aligned(8)));
+
+/* BCH_SB_FIELD_quota: */
+
+struct bch_sb_quota_counter {
+ __le32 timelimit;
+ __le32 warnlimit;
+};
+
+struct bch_sb_quota_type {
+ __le64 flags;
+ struct bch_sb_quota_counter c[Q_COUNTERS];
};
+struct bch_sb_field_quota {
+ struct bch_sb_field field;
+ struct bch_sb_quota_type q[QTYP_NR];
+} __attribute__((packed, aligned(8)));
+
+/* BCH_SB_FIELD_disk_groups: */
+
+#define BCH_SB_LABEL_SIZE 32
+
+struct bch_disk_group {
+ __u8 label[BCH_SB_LABEL_SIZE];
+ __le64 flags[2];
+} __attribute__((packed, aligned(8)));
+
+LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
+LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
+
+struct bch_sb_field_disk_groups {
+ struct bch_sb_field field;
+ struct bch_disk_group entries[0];
+} __attribute__((packed, aligned(8)));
+
+/*
+ * On clean shutdown, store btree roots and current journal sequence number in
+ * the superblock:
+ */
+struct jset_entry {
+ __le16 u64s;
+ __u8 btree_id;
+ __u8 level;
+ __u8 type; /* designates what this jset holds */
+ __u8 pad[3];
+
+ union {
+ struct bkey_i start[0];
+ __u64 _data[0];
+ };
+};
+
+struct bch_sb_field_clean {
+ struct bch_sb_field field;
+
+ __le32 flags;
+ __le16 read_clock;
+ __le16 write_clock;
+ __le64 journal_seq;
+
+ union {
+ struct jset_entry start[0];
+ __u64 _data[0];
+ };
+};
+
+/* Superblock: */
+
+/*
+ * New versioning scheme:
+ * One common version number for all on disk data structures - superblock, btree
+ * nodes, journal entries
+ */
+#define BCH_JSET_VERSION_OLD 2
+#define BCH_BSET_VERSION_OLD 3
+
+enum bcachefs_metadata_version {
+ bcachefs_metadata_version_min = 9,
+ bcachefs_metadata_version_new_versioning = 10,
+ bcachefs_metadata_version_bkey_renumber = 10,
+ bcachefs_metadata_version_max = 11,
+};
+
+#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
+
+#define BCH_SB_SECTOR 8
+#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
+
+struct bch_sb_layout {
+ uuid_le magic; /* bcachefs superblock UUID */
+ __u8 layout_type;
+ __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */
+ __u8 nr_superblocks;
+ __u8 pad[5];
+ __le64 sb_offset[61];
+} __attribute__((packed, aligned(8)));
+
+#define BCH_SB_LAYOUT_SECTOR 7
+
/*
* @offset - sector where this sb was written
* @version - on disk format version
+ * @version_min - Oldest metadata version this filesystem contains; so we can
+ * safely drop compatibility code and refuse to mount filesystems
+ * we'd need it for
* @magic - identifies as a bcachefs superblock (BCACHE_MAGIC)
* @seq - incremented each time superblock is written
* @uuid - used for generating various magic numbers and identifying
*/
struct bch_sb {
struct bch_csum csum;
- __le64 version;
+ __le16 version;
+ __le16 version_min;
+ __le16 pad[2];
uuid_le magic;
uuid_le uuid;
uuid_le user_uuid;
* algorithm in use, if/when we get more than one
*/
+LE16_BITMASK(BCH_SB_BLOCK_SIZE, struct bch_sb, block_size, 0, 16);
+
LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1);
LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2);
LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8);
LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52);
LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56);
-LE64_BITMASK(BCH_SB_META_REPLICAS_HAVE, struct bch_sb, flags[0], 56, 60);
-LE64_BITMASK(BCH_SB_DATA_REPLICAS_HAVE, struct bch_sb, flags[0], 60, 64);
+LE64_BITMASK(BCH_SB_POSIX_ACL, struct bch_sb, flags[0], 56, 57);
+LE64_BITMASK(BCH_SB_USRQUOTA, struct bch_sb, flags[0], 57, 58);
+LE64_BITMASK(BCH_SB_GRPQUOTA, struct bch_sb, flags[0], 58, 59);
+LE64_BITMASK(BCH_SB_PRJQUOTA, struct bch_sb, flags[0], 59, 60);
+
+/* 60-64 unused */
LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8);
LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14);
-/* 14-20 unused, was JOURNAL_ENTRY_SIZE */
+
+/*
+ * Max size of an extent that may require bouncing to read or write
+ * (checksummed, compressed): 64k
+ */
+LE64_BITMASK(BCH_SB_ENCODED_EXTENT_MAX_BITS,
+ struct bch_sb, flags[1], 14, 20);
LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
+LE64_BITMASK(BCH_SB_PROMOTE_TARGET, struct bch_sb, flags[1], 28, 40);
+LE64_BITMASK(BCH_SB_FOREGROUND_TARGET, struct bch_sb, flags[1], 40, 52);
+LE64_BITMASK(BCH_SB_BACKGROUND_TARGET, struct bch_sb, flags[1], 52, 64);
+
+LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE,
+ struct bch_sb, flags[2], 0, 4);
+LE64_BITMASK(BCH_SB_GC_RESERVE_BYTES, struct bch_sb, flags[2], 4, 64);
+
+LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
+
/* Features: */
enum bch_sb_features {
BCH_FEATURE_LZ4 = 0,
BCH_FEATURE_GZIP = 1,
+ BCH_FEATURE_ZSTD = 2,
+ BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
+ BCH_FEATURE_EC = 4,
+ BCH_FEATURE_NR,
+};
+
+enum bch_sb_compat {
+ BCH_COMPAT_FEAT_ALLOC_INFO = 0,
};
/* options: */
#define BCH_REPLICAS_MAX 4U
-#if 0
-#define BCH_ERROR_ACTIONS() \
- x(BCH_ON_ERROR_CONTINUE, 0, "continue") \
- x(BCH_ON_ERROR_RO, 1, "remount-ro") \
- x(BCH_ON_ERROR_PANIC, 2, "panic") \
- x(BCH_NR_ERROR_ACTIONS, 3, NULL)
-
-enum bch_error_actions {
-#define x(_opt, _nr, _str) _opt = _nr,
- BCH_ERROR_ACTIONS()
-#undef x
-};
-#endif
-
enum bch_error_actions {
BCH_ON_ERROR_CONTINUE = 0,
BCH_ON_ERROR_RO = 1,
BCH_STR_HASH_NR = 3,
};
-enum bch_compression_opts {
- BCH_COMPRESSION_NONE = 0,
- BCH_COMPRESSION_LZ4 = 1,
- BCH_COMPRESSION_GZIP = 2,
- BCH_COMPRESSION_NR = 3,
-};
-
-/* backing device specific stuff: */
-
-struct backingdev_sb {
- __le64 csum;
- __le64 offset; /* sector where this sb was written */
- __le64 version; /* of on disk format */
-
- uuid_le magic; /* bcachefs superblock UUID */
-
- uuid_le disk_uuid;
-
- /*
- * Internal cache set UUID - xored with various magic numbers and thus
- * must never change:
- */
- union {
- uuid_le set_uuid;
- __le64 set_magic;
- };
- __u8 label[BCH_SB_LABEL_SIZE];
-
- __le64 flags;
-
- /* Incremented each time superblock is written: */
- __le64 seq;
-
- /*
- * User visible UUID for identifying the cache set the user is allowed
- * to change:
- *
- * XXX hooked up?
- */
- uuid_le user_uuid;
- __le64 pad1[6];
-
- __le64 data_offset;
- __le16 block_size; /* sectors */
- __le16 pad2[3];
+#define BCH_COMPRESSION_TYPES() \
+ x(NONE) \
+ x(LZ4) \
+ x(GZIP) \
+ x(ZSTD)
- __le32 last_mount; /* time_t */
- __le16 pad3;
- /* size of variable length portion - always 0 for backingdev superblock */
- __le16 u64s;
- __u64 _data[0];
+enum bch_compression_opts {
+#define x(t) BCH_COMPRESSION_OPT_##t,
+ BCH_COMPRESSION_TYPES()
+#undef x
+ BCH_COMPRESSION_OPT_NR
};
-LE64_BITMASK(BDEV_CACHE_MODE, struct backingdev_sb, flags, 0, 4);
-#define CACHE_MODE_WRITETHROUGH 0U
-#define CACHE_MODE_WRITEBACK 1U
-#define CACHE_MODE_WRITEAROUND 2U
-#define CACHE_MODE_NONE 3U
-
-LE64_BITMASK(BDEV_STATE, struct backingdev_sb, flags, 61, 63);
-#define BDEV_STATE_NONE 0U
-#define BDEV_STATE_CLEAN 1U
-#define BDEV_STATE_DIRTY 2U
-#define BDEV_STATE_STALE 3U
-
-#define BDEV_DATA_START_DEFAULT 16 /* sectors */
-
-static inline _Bool __SB_IS_BDEV(__u64 version)
-{
- return version == BCACHE_SB_VERSION_BDEV
- || version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
-}
-
-static inline _Bool SB_IS_BDEV(const struct bch_sb *sb)
-{
- return __SB_IS_BDEV(sb->version);
-}
-
/*
* Magic numbers
*
UUID_LE(0xf67385c6, 0x1a4e, 0xca45, \
0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81)
-#define BCACHE_STATFS_MAGIC 0xca451a4e
+#define BCACHEFS_STATFS_MAGIC 0xca451a4e
#define JSET_MAGIC __cpu_to_le64(0x245235c1a3625032ULL)
-#define PSET_MAGIC __cpu_to_le64(0x6750e15f87337f91ULL)
#define BSET_MAGIC __cpu_to_le64(0x90135c78b99e07f5ULL)
static inline __le64 __bch2_sb_magic(struct bch_sb *sb)
return __le64_to_cpu(__bch2_sb_magic(sb) ^ JSET_MAGIC);
}
-static inline __u64 __pset_magic(struct bch_sb *sb)
-{
- return __le64_to_cpu(__bch2_sb_magic(sb) ^ PSET_MAGIC);
-}
-
static inline __u64 __bset_magic(struct bch_sb *sb)
{
return __le64_to_cpu(__bch2_sb_magic(sb) ^ BSET_MAGIC);
/* Journal */
-#define BCACHE_JSET_VERSION_UUIDv1 1
-#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
-#define BCACHE_JSET_VERSION_JKEYS 2
-#define BCACHE_JSET_VERSION 2
+#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
-struct jset_entry {
- __le16 u64s;
- __u8 btree_id;
- __u8 level;
- __le32 flags; /* designates what this jset holds */
+#define BCH_JSET_ENTRY_TYPES() \
+ x(btree_keys, 0) \
+ x(btree_root, 1) \
+ x(prio_ptrs, 2) \
+ x(blacklist, 3) \
+ x(blacklist_v2, 4) \
+ x(usage, 5) \
+ x(data_usage, 6)
- union {
- struct bkey_i start[0];
- __u64 _data[0];
- };
+enum {
+#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
+ BCH_JSET_ENTRY_TYPES()
+#undef x
+ BCH_JSET_ENTRY_NR
};
-#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64))
+/*
+ * Journal sequence numbers can be blacklisted: bsets record the max sequence
+ * number of all the journal entries they contain updates for, so that on
+ * recovery we can ignore those bsets that contain index updates newer that what
+ * made it into the journal.
+ *
+ * This means that we can't reuse that journal_seq - we have to skip it, and
+ * then record that we skipped it so that the next time we crash and recover we
+ * don't think there was a missing journal entry.
+ */
+struct jset_entry_blacklist {
+ struct jset_entry entry;
+ __le64 seq;
+};
-LE32_BITMASK(JOURNAL_ENTRY_TYPE, struct jset_entry, flags, 0, 8);
-enum {
- JOURNAL_ENTRY_BTREE_KEYS = 0,
- JOURNAL_ENTRY_BTREE_ROOT = 1,
- JOURNAL_ENTRY_PRIO_PTRS = 2,
+struct jset_entry_blacklist_v2 {
+ struct jset_entry entry;
+ __le64 start;
+ __le64 end;
+};
- /*
- * Journal sequence numbers can be blacklisted: bsets record the max
- * sequence number of all the journal entries they contain updates for,
- * so that on recovery we can ignore those bsets that contain index
- * updates newer that what made it into the journal.
- *
- * This means that we can't reuse that journal_seq - we have to skip it,
- * and then record that we skipped it so that the next time we crash and
- * recover we don't think there was a missing journal entry.
- */
- JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED = 3,
+enum {
+ FS_USAGE_RESERVED = 0,
+ FS_USAGE_INODES = 1,
+ FS_USAGE_KEY_VERSION = 2,
+ FS_USAGE_NR = 3
};
+struct jset_entry_usage {
+ struct jset_entry entry;
+ __le64 v;
+} __attribute__((packed));
+
+struct jset_entry_data_usage {
+ struct jset_entry entry;
+ __le64 v;
+ struct bch_replicas_entry r;
+} __attribute__((packed));
+
/*
* On disk format for a journal entry:
* seq is monotonically increasing; every journal entry has its own unique
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
-#define BCH_JOURNAL_BUCKETS_MIN 20
-
-/* Bucket prios/gens */
-
-struct prio_set {
- struct bch_csum csum;
-
- __le64 magic;
- __le32 nonce[3];
- __le16 version;
- __le16 flags;
-
- __u8 encrypted_start[0];
-
- __le64 next_bucket;
-
- struct bucket_disk {
- __le16 read_prio;
- __le16 write_prio;
- __u8 gen;
- } __attribute__((packed)) data[];
-} __attribute__((packed, aligned(8)));
-
-LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4);
+#define BCH_JOURNAL_BUCKETS_MIN 8
/* Btree: */
-#define DEFINE_BCH_BTREE_IDS() \
- DEF_BTREE_ID(EXTENTS, 0, "extents") \
- DEF_BTREE_ID(INODES, 1, "inodes") \
- DEF_BTREE_ID(DIRENTS, 2, "dirents") \
- DEF_BTREE_ID(XATTRS, 3, "xattrs")
-
-#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val,
+#define BCH_BTREE_IDS() \
+ x(EXTENTS, 0, "extents") \
+ x(INODES, 1, "inodes") \
+ x(DIRENTS, 2, "dirents") \
+ x(XATTRS, 3, "xattrs") \
+ x(ALLOC, 4, "alloc") \
+ x(QUOTAS, 5, "quotas") \
+ x(EC, 6, "erasure_coding")
enum btree_id {
- DEFINE_BCH_BTREE_IDS()
+#define x(kwd, val, name) BTREE_ID_##kwd = val,
+ BCH_BTREE_IDS()
+#undef x
BTREE_ID_NR
};
-#undef DEF_BTREE_ID
-
#define BTREE_MAX_DEPTH 4U
/* Btree nodes */
-/* Version 1: Seed pointer into btree node checksum
- */
-#define BCACHE_BSET_CSUM 1
-#define BCACHE_BSET_KEY_v1 2
-#define BCACHE_BSET_JOURNAL_SEQ 3
-#define BCACHE_BSET_VERSION 3
-
/*
* Btree nodes
*
};
} __attribute__((packed, aligned(8)));
-LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
-LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
+LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
+LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
+/* 8-32 unused */
+LE64_BITMASK(BTREE_NODE_SEQ, struct btree_node, flags, 32, 64);
struct btree_node_entry {
struct bch_csum csum;
};
} __attribute__((packed, aligned(8)));
-#ifdef __cplusplus
-}
-#endif
-#endif /* _LINUX_BCACHE_H */
-
-/* vim: set foldnestmax=2: */
+#endif /* _BCACHEFS_FORMAT_H */