From 0c7db4eca3e6519043c10288cb41f8a0ee634a0b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 30 Nov 2018 12:38:54 -0500 Subject: [PATCH] Update bcachefs sources to 62de7539dc bcachefs: Make bkey types globally unique --- .bcachefs_revision | 2 +- cmd_debug.c | 6 +- cmd_migrate.c | 5 +- include/trace/events/bcachefs.h | 2 +- libbcachefs.c | 3 +- libbcachefs/acl.c | 6 +- libbcachefs/alloc_background.c | 28 +- libbcachefs/alloc_background.h | 2 +- libbcachefs/alloc_foreground.c | 11 +- libbcachefs/alloc_foreground.h | 2 +- libbcachefs/bcachefs.h | 17 +- libbcachefs/bcachefs_format.h | 188 ++--- libbcachefs/bkey.c | 2 +- libbcachefs/bkey.h | 88 +- libbcachefs/bkey_methods.c | 218 +++-- libbcachefs/bkey_methods.h | 53 +- libbcachefs/bkey_sort.c | 652 +++++++++++++++ libbcachefs/bkey_sort.h | 68 ++ libbcachefs/bset.h | 6 +- libbcachefs/btree_cache.c | 18 +- libbcachefs/btree_cache.h | 6 +- libbcachefs/btree_gc.c | 323 +++---- libbcachefs/btree_gc.h | 8 +- libbcachefs/btree_io.c | 511 ++---------- libbcachefs/btree_io.h | 42 - libbcachefs/btree_iter.c | 32 +- libbcachefs/btree_locking.h | 9 +- libbcachefs/btree_types.h | 36 +- libbcachefs/btree_update.h | 2 +- libbcachefs/btree_update_interior.c | 130 +-- libbcachefs/btree_update_leaf.c | 44 +- libbcachefs/buckets.c | 371 +++++---- libbcachefs/buckets.h | 9 +- libbcachefs/debug.c | 6 +- libbcachefs/dirent.c | 72 +- libbcachefs/dirent.h | 2 +- libbcachefs/ec.c | 131 ++- libbcachefs/ec.h | 18 +- libbcachefs/ec_types.h | 3 +- libbcachefs/extents.c | 1202 ++++++++++----------------- libbcachefs/extents.h | 590 +++++++------ libbcachefs/fs-io.c | 33 +- libbcachefs/fs.c | 12 +- libbcachefs/fsck.c | 28 +- libbcachefs/inode.c | 108 ++- libbcachefs/inode.h | 12 +- libbcachefs/io.c | 19 +- libbcachefs/journal.c | 16 +- libbcachefs/journal_io.c | 58 +- libbcachefs/migrate.c | 26 +- libbcachefs/move.c | 58 +- libbcachefs/move.h | 2 +- libbcachefs/movinggc.c | 34 +- libbcachefs/opts.h | 3 + libbcachefs/quota.c | 63 +- libbcachefs/quota.h | 8 +- libbcachefs/rebalance.c | 36 +- libbcachefs/recovery.c | 29 +- libbcachefs/replicas.c | 96 +-- libbcachefs/replicas.h | 5 +- libbcachefs/str_hash.h | 9 +- libbcachefs/super-io.c | 53 +- libbcachefs/super-io.h | 2 + libbcachefs/super.c | 3 +- libbcachefs/sysfs.c | 2 +- libbcachefs/xattr.c | 102 +-- libbcachefs/xattr.h | 2 +- 67 files changed, 2862 insertions(+), 2881 deletions(-) create mode 100644 libbcachefs/bkey_sort.c create mode 100644 libbcachefs/bkey_sort.h diff --git a/.bcachefs_revision b/.bcachefs_revision index abb9e48..34a8011 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -da7fefde294e3c56359ee498a62a77182a4733cd +62de7539dc2586b4bd7058b138de89f334d0c6bd diff --git a/cmd_debug.c b/cmd_debug.c index 1c5af4d..a9d5977 100644 --- a/cmd_debug.c +++ b/cmd_debug.c @@ -160,8 +160,7 @@ static void list_keys(struct bch_fs *c, enum btree_id btree_id, if (bkey_cmp(k.k->p, end) > 0) break; - bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, btree_id), k); + bch2_bkey_val_to_text(&PBUF(buf), c, k); puts(buf); } bch2_btree_iter_unlock(&iter); @@ -202,8 +201,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id, fputs(buf, stdout); for_each_btree_node_key_unpack(b, k, &node_iter, &unpacked) { - bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, btree_id), k); + bch2_bkey_val_to_text(&PBUF(buf), c, k); putchar('\t'); puts(buf); } diff --git a/cmd_migrate.c b/cmd_migrate.c index 497a418..7863dec 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -333,7 +333,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, e->k.p.inode = dst->bi_inum; e->k.p.offset = logical + sectors; e->k.size = sectors; - extent_ptr_append(e, (struct bch_extent_ptr) { + bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) { .offset = physical, .dev = 0, .gen = bucket(ca, b)->mark.gen, @@ -347,8 +347,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, die("error reserving space in new filesystem: %s", strerror(-ret)); - bch2_mark_bkey_replicas(c, BCH_DATA_USER, - extent_i_to_s_c(e).s_c); + bch2_mark_bkey_replicas(c, extent_i_to_s_c(e).s_c); ret = bch2_btree_insert(c, BTREE_ID_EXTENTS, &e->k_i, &res, NULL, 0); diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 73be887..6781a5b 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -164,7 +164,7 @@ TRACE_EVENT(btree_write, TP_ARGS(b, bytes, sectors), TP_STRUCT__entry( - __field(enum bkey_type, type) + __field(enum btree_node_type, type) __field(unsigned, bytes ) __field(unsigned, sectors ) ), diff --git a/libbcachefs.c b/libbcachefs.c index 98f058d..b24e7f3 100644 --- a/libbcachefs.c +++ b/libbcachefs.c @@ -185,7 +185,8 @@ struct bch_sb *bch2_format(struct format_opts opts, if (bch2_sb_realloc(&sb, 0)) die("insufficient memory"); - sb.sb->version = cpu_to_le64(BCH_SB_VERSION_MAX); + sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); + sb.sb->version_min = le16_to_cpu(bcachefs_metadata_version_current); sb.sb->magic = BCACHE_MAGIC; sb.sb->block_size = cpu_to_le16(opts.block_size); sb.sb->user_uuid = opts.uuid; diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 741e44e..348060b 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -23,9 +23,9 @@ static inline int acl_to_xattr_type(int type) { switch (type) { case ACL_TYPE_ACCESS: - return BCH_XATTR_INDEX_POSIX_ACL_ACCESS; + return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS; case ACL_TYPE_DEFAULT: - return BCH_XATTR_INDEX_POSIX_ACL_DEFAULT; + return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT; default: BUG(); } @@ -351,7 +351,7 @@ int bch2_acl_chmod(struct btree_trans *trans, iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &inode->ei_str_hash, inode->v.i_ino, - &X_SEARCH(BCH_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), + &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); if (IS_ERR(iter)) return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0; diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 8992916..2e2fb99 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -75,22 +75,15 @@ static unsigned bch_alloc_val_u64s(const struct bch_alloc *a) const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) { + struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + if (k.k->p.inode >= c->sb.nr_devices || !c->devs[k.k->p.inode]) return "invalid device"; - switch (k.k->type) { - case BCH_ALLOC: { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - - /* allow for unknown fields */ - if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v)) - return "incorrect value size"; - break; - } - default: - return "invalid type"; - } + /* allow for unknown fields */ + if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v)) + return "incorrect value size"; return NULL; } @@ -98,14 +91,9 @@ const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - switch (k.k->type) { - case BCH_ALLOC: { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - pr_buf(out, "gen %u", a.v->gen); - break; - } - } + pr_buf(out, "gen %u", a.v->gen); } static inline unsigned get_alloc_field(const u8 **p, unsigned bytes) @@ -157,7 +145,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k) struct bucket *g; const u8 *d; - if (k.k->type != BCH_ALLOC) + if (k.k->type != KEY_TYPE_alloc) return; a = bkey_s_c_to_alloc(k); diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 6911fa6..b382c8b 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -10,7 +10,7 @@ const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_alloc_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_alloc (struct bkey_ops) { \ .key_invalid = bch2_alloc_invalid, \ .val_to_text = bch2_alloc_to_text, \ } diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 91ab336..5024e56 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -922,7 +922,8 @@ err: * as allocated out of @ob */ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, - struct bkey_i_extent *e, unsigned sectors) + struct bkey_i *k, unsigned sectors) + { struct open_bucket *ob; unsigned i; @@ -934,13 +935,11 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev); struct bch_extent_ptr tmp = ob->ptr; - EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptr.dev)); - - tmp.cached = bkey_extent_is_cached(&e->k) || - (!ca->mi.durability && wp->type == BCH_DATA_USER); + tmp.cached = !ca->mi.durability && + wp->type == BCH_DATA_USER; tmp.offset += ca->mi.bucket_size - ob->sectors_free; - extent_ptr_append(e, tmp); + bch2_bkey_append_ptr(k, tmp); BUG_ON(sectors > ob->sectors_free); ob->sectors_free -= sectors; diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h index a332e9d..b0e44f7 100644 --- a/libbcachefs/alloc_foreground.h +++ b/libbcachefs/alloc_foreground.h @@ -100,7 +100,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *, struct closure *); void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *, - struct bkey_i_extent *, unsigned); + struct bkey_i *, unsigned); void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *); void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *, diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 05891a0..d69da3e 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -222,6 +222,8 @@ printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_err(c, fmt, ...) \ printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) +#define bch_err_ratelimited(c, fmt, ...) \ + printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_verbose(c, fmt, ...) \ do { \ @@ -331,6 +333,7 @@ enum bch_time_stats { struct btree; enum gc_phase { + GC_PHASE_NOT_RUNNING, GC_PHASE_START, GC_PHASE_SB, @@ -535,6 +538,7 @@ struct bch_fs { uuid_le uuid; uuid_le user_uuid; + u16 version; u16 encoded_extent_max; u8 nr_devices; @@ -684,16 +688,17 @@ struct bch_fs { /* REBALANCE */ struct bch_fs_rebalance rebalance; - /* ERASURE CODING */ - struct list_head ec_new_stripe_list; - struct mutex ec_new_stripe_lock; - - GENRADIX(struct ec_stripe) ec_stripes; - struct mutex ec_stripes_lock; + /* STRIPES: */ + GENRADIX(struct stripe) stripes[2]; + struct mutex ec_stripe_create_lock; ec_stripes_heap ec_stripes_heap; spinlock_t ec_stripes_heap_lock; + /* ERASURE CODING */ + struct list_head ec_new_stripe_list; + struct mutex ec_new_stripe_lock; + struct bio_set ec_bioset; struct work_struct ec_stripe_delete_work; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index c462ab2..6d8397b 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -302,15 +302,6 @@ static inline void bkey_init(struct bkey *k) #define __BKEY_PADDED(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } -#define BKEY_VAL_TYPE(name, nr) \ -struct bkey_i_##name { \ - union { \ - struct bkey k; \ - struct bkey_i k_i; \ - }; \ - struct bch_##name v; \ -} - /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. @@ -325,19 +316,37 @@ struct bkey_i_##name { \ * by new writes or cluster-wide GC. Node repair can also overwrite them with * the same or a more recent version number, but not with an older version * number. + * + * - WHITEOUT: for hash table btrees */ -#define KEY_TYPE_DELETED 0 -#define KEY_TYPE_DISCARD 1 -#define KEY_TYPE_ERROR 2 -#define KEY_TYPE_COOKIE 3 -#define KEY_TYPE_PERSISTENT_DISCARD 4 -#define KEY_TYPE_GENERIC_NR 128 +#define BCH_BKEY_TYPES() \ + x(deleted, 0) \ + x(discard, 1) \ + x(error, 2) \ + x(cookie, 3) \ + x(whiteout, 4) \ + x(btree_ptr, 5) \ + x(extent, 6) \ + x(reservation, 7) \ + x(inode, 8) \ + x(inode_generation, 9) \ + x(dirent, 10) \ + x(xattr, 11) \ + x(alloc, 12) \ + x(quota, 13) \ + x(stripe, 14) + +enum bch_bkey_type { +#define x(name, nr) KEY_TYPE_##name = nr, + BCH_BKEY_TYPES() +#undef x + KEY_TYPE_MAX, +}; struct bch_cookie { struct bch_val v; __le64 cookie; }; -BKEY_VAL_TYPE(cookie, KEY_TYPE_COOKIE); /* Extents */ @@ -615,21 +624,12 @@ union bch_extent_entry { #undef x }; -enum { - BCH_EXTENT = 128, - - /* - * This is kind of a hack, we're overloading the type for a boolean that - * really should be part of the value - BCH_EXTENT and BCH_EXTENT_CACHED - * have the same value type: - */ - BCH_EXTENT_CACHED = 129, +struct bch_btree_ptr { + struct bch_val v; - /* - * Persistent reservation: - */ - BCH_RESERVATION = 130, -}; + struct bch_extent_ptr start[0]; + __u64 _data[0]; +} __attribute__((packed, aligned(8))); struct bch_extent { struct bch_val v; @@ -637,7 +637,6 @@ struct bch_extent { union bch_extent_entry start[0]; __u64 _data[0]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(extent, BCH_EXTENT); struct bch_reservation { struct bch_val v; @@ -646,7 +645,6 @@ struct bch_reservation { __u8 nr_replicas; __u8 pad[3]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(reservation, BCH_RESERVATION); /* Maximum size (in u64s) a single pointer could be: */ #define BKEY_EXTENT_PTR_U64s_MAX\ @@ -674,12 +672,6 @@ BKEY_VAL_TYPE(reservation, BCH_RESERVATION); #define BCACHEFS_ROOT_INO 4096 -enum bch_inode_types { - BCH_INODE_FS = 128, - BCH_INODE_BLOCKDEV = 129, - BCH_INODE_GENERATION = 130, -}; - struct bch_inode { struct bch_val v; @@ -688,7 +680,6 @@ struct bch_inode { __le16 bi_mode; __u8 fields[0]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode, BCH_INODE_FS); struct bch_inode_generation { struct bch_val v; @@ -696,7 +687,6 @@ struct bch_inode_generation { __le32 bi_generation; __le32 pad; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION); #define BCH_INODE_FIELDS() \ BCH_INODE_FIELD(bi_atime, 64) \ @@ -761,24 +751,6 @@ enum { LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32); -struct bch_inode_blockdev { - struct bch_val v; - - __le64 i_size; - __le64 i_flags; - - /* Seconds: */ - __le64 i_ctime; - __le64 i_mtime; - - uuid_le i_uuid; - __u8 i_label[32]; -} __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV); - -/* Thin provisioned volume, or cache for another block device? */ -LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1) - /* Dirents */ /* @@ -792,11 +764,6 @@ LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1) * collision: */ -enum { - BCH_DIRENT = 128, - BCH_DIRENT_WHITEOUT = 129, -}; - struct bch_dirent { struct bch_val v; @@ -811,7 +778,6 @@ struct bch_dirent { __u8 d_name[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(dirent, BCH_DIRENT); #define BCH_NAME_MAX (U8_MAX * sizeof(u64) - \ sizeof(struct bkey) - \ @@ -820,16 +786,11 @@ BKEY_VAL_TYPE(dirent, BCH_DIRENT); /* Xattrs */ -enum { - BCH_XATTR = 128, - BCH_XATTR_WHITEOUT = 129, -}; - -#define BCH_XATTR_INDEX_USER 0 -#define BCH_XATTR_INDEX_POSIX_ACL_ACCESS 1 -#define BCH_XATTR_INDEX_POSIX_ACL_DEFAULT 2 -#define BCH_XATTR_INDEX_TRUSTED 3 -#define BCH_XATTR_INDEX_SECURITY 4 +#define KEY_TYPE_XATTR_INDEX_USER 0 +#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define KEY_TYPE_XATTR_INDEX_TRUSTED 3 +#define KEY_TYPE_XATTR_INDEX_SECURITY 4 struct bch_xattr { struct bch_val v; @@ -838,14 +799,9 @@ struct bch_xattr { __le16 x_val_len; __u8 x_name[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(xattr, BCH_XATTR); /* Bucket/allocation information: */ -enum { - BCH_ALLOC = 128, -}; - enum { BCH_ALLOC_FIELD_READ_TIME = 0, BCH_ALLOC_FIELD_WRITE_TIME = 1, @@ -857,14 +813,9 @@ struct bch_alloc { __u8 gen; __u8 data[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(alloc, BCH_ALLOC); /* Quotas: */ -enum { - BCH_QUOTA = 128, -}; - enum quota_types { QTYP_USR = 0, QTYP_GRP = 1, @@ -887,14 +838,9 @@ struct bch_quota { struct bch_val v; struct bch_quota_counter c[Q_COUNTERS]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(quota, BCH_QUOTA); /* Erasure coding */ -enum { - BCH_STRIPE = 128, -}; - struct bch_stripe { struct bch_val v; __le16 sectors; @@ -908,7 +854,6 @@ struct bch_stripe { struct bch_extent_ptr ptrs[0]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(stripe, BCH_STRIPE); /* Optional/variable size superblock sections: */ @@ -1144,15 +1089,21 @@ struct bch_sb_field_clean { /* Superblock: */ /* - * Version 8: BCH_SB_ENCODED_EXTENT_MAX_BITS - * BCH_MEMBER_DATA_ALLOWED - * Version 9: incompatible extent nonce change + * New versioning scheme: + * One common version number for all on disk data structures - superblock, btree + * nodes, journal entries */ +#define BCH_JSET_VERSION_OLD 2 +#define BCH_BSET_VERSION_OLD 3 + +enum bcachefs_metadata_version { + bcachefs_metadata_version_min = 9, + bcachefs_metadata_version_new_versioning = 10, + bcachefs_metadata_version_bkey_renumber = 10, + bcachefs_metadata_version_max = 11, +}; -#define BCH_SB_VERSION_MIN 7 -#define BCH_SB_VERSION_EXTENT_MAX 8 -#define BCH_SB_VERSION_EXTENT_NONCE_V1 9 -#define BCH_SB_VERSION_MAX 9 +#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define BCH_SB_SECTOR 8 #define BCH_SB_MEMBERS_MAX 64 /* XXX kill */ @@ -1171,6 +1122,9 @@ struct bch_sb_layout { /* * @offset - sector where this sb was written * @version - on disk format version + * @version_min - Oldest metadata version this filesystem contains; so we can + * safely drop compatibility code and refuse to mount filesystems + * we'd need it for * @magic - identifies as a bcachefs superblock (BCACHE_MAGIC) * @seq - incremented each time superblock is written * @uuid - used for generating various magic numbers and identifying @@ -1183,7 +1137,9 @@ struct bch_sb_layout { */ struct bch_sb { struct bch_csum csum; - __le64 version; + __le16 version; + __le16 version_min; + __le16 pad[2]; uuid_le magic; uuid_le uuid; uuid_le user_uuid; @@ -1359,11 +1315,6 @@ static inline __u64 __bset_magic(struct bch_sb *sb) /* Journal */ -#define BCACHE_JSET_VERSION_UUIDv1 1 -#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ -#define BCACHE_JSET_VERSION_JKEYS 2 -#define BCACHE_JSET_VERSION 2 - #define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64)) #define BCH_JSET_ENTRY_TYPES() \ @@ -1443,35 +1394,26 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); /* Btree: */ -#define DEFINE_BCH_BTREE_IDS() \ - DEF_BTREE_ID(EXTENTS, 0, "extents") \ - DEF_BTREE_ID(INODES, 1, "inodes") \ - DEF_BTREE_ID(DIRENTS, 2, "dirents") \ - DEF_BTREE_ID(XATTRS, 3, "xattrs") \ - DEF_BTREE_ID(ALLOC, 4, "alloc") \ - DEF_BTREE_ID(QUOTAS, 5, "quotas") \ - DEF_BTREE_ID(EC, 6, "erasure_coding") - -#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val, +#define BCH_BTREE_IDS() \ + x(EXTENTS, 0, "extents") \ + x(INODES, 1, "inodes") \ + x(DIRENTS, 2, "dirents") \ + x(XATTRS, 3, "xattrs") \ + x(ALLOC, 4, "alloc") \ + x(QUOTAS, 5, "quotas") \ + x(EC, 6, "erasure_coding") enum btree_id { - DEFINE_BCH_BTREE_IDS() +#define x(kwd, val, name) BTREE_ID_##kwd = val, + BCH_BTREE_IDS() +#undef x BTREE_ID_NR }; -#undef DEF_BTREE_ID - #define BTREE_MAX_DEPTH 4U /* Btree nodes */ -/* Version 1: Seed pointer into btree node checksum - */ -#define BCACHE_BSET_CSUM 1 -#define BCACHE_BSET_KEY_v1 2 -#define BCACHE_BSET_JOURNAL_SEQ 3 -#define BCACHE_BSET_VERSION 3 - /* * Btree nodes * diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c index 135ecb8..25725e4 100644 --- a/libbcachefs/bkey.c +++ b/libbcachefs/bkey.c @@ -484,7 +484,7 @@ enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out, pack_state_finish(&state, out); out->u64s = f->key_u64s; out->format = KEY_FORMAT_LOCAL_BTREE; - out->type = KEY_TYPE_DELETED; + out->type = KEY_TYPE_deleted; #ifdef CONFIG_BCACHEFS_DEBUG if (exact) { diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index 28bf646..1539709 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -52,10 +52,12 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64)); } -#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_DELETED) +#define bkey_val_end(_k) vstruct_idx((_k).v, bkey_val_u64s((_k).k)) + +#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted) #define bkey_whiteout(_k) \ - ((_k)->type == KEY_TYPE_DELETED || (_k)->type == KEY_TYPE_DISCARD) + ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard) #define bkey_packed_typecheck(_k) \ ({ \ @@ -430,7 +432,15 @@ static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k) * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion * functions. */ -#define __BKEY_VAL_ACCESSORS(name, nr, _assert) \ +#define BKEY_VAL_ACCESSORS(name) \ +struct bkey_i_##name { \ + union { \ + struct bkey k; \ + struct bkey_i k_i; \ + }; \ + struct bch_##name v; \ +}; \ + \ struct bkey_s_c_##name { \ union { \ struct { \ @@ -455,20 +465,20 @@ struct bkey_s_##name { \ \ static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return container_of(&k->k, struct bkey_i_##name, k); \ } \ \ static inline const struct bkey_i_##name * \ bkey_i_to_##name##_c(const struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return container_of(&k->k, struct bkey_i_##name, k); \ } \ \ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \ { \ - _assert(k.k->type, nr); \ + EBUG_ON(k.k->type != KEY_TYPE_##name); \ return (struct bkey_s_##name) { \ .k = k.k, \ .v = container_of(k.v, struct bch_##name, v), \ @@ -477,7 +487,7 @@ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \ \ static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\ { \ - _assert(k.k->type, nr); \ + EBUG_ON(k.k->type != KEY_TYPE_##name); \ return (struct bkey_s_c_##name) { \ .k = k.k, \ .v = container_of(k.v, struct bch_##name, v), \ @@ -503,7 +513,7 @@ name##_i_to_s_c(const struct bkey_i_##name *k) \ \ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return (struct bkey_s_##name) { \ .k = &k->k, \ .v = container_of(&k->v, struct bch_##name, v), \ @@ -513,27 +523,13 @@ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \ static inline struct bkey_s_c_##name \ bkey_i_to_s_c_##name(const struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return (struct bkey_s_c_##name) { \ .k = &k->k, \ .v = container_of(&k->v, struct bch_##name, v), \ }; \ } \ \ -static inline struct bch_##name * \ -bkey_p_##name##_val(const struct bkey_format *f, \ - struct bkey_packed *k) \ -{ \ - return container_of(bkeyp_val(f, k), struct bch_##name, v); \ -} \ - \ -static inline const struct bch_##name * \ -bkey_p_c_##name##_val(const struct bkey_format *f, \ - const struct bkey_packed *k) \ -{ \ - return container_of(bkeyp_val(f, k), struct bch_##name, v); \ -} \ - \ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ { \ struct bkey_i_##name *k = \ @@ -541,45 +537,23 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ \ bkey_init(&k->k); \ memset(&k->v, 0, sizeof(k->v)); \ - k->k.type = nr; \ + k->k.type = KEY_TYPE_##name; \ set_bkey_val_bytes(&k->k, sizeof(k->v)); \ \ return k; \ } -#define __BKEY_VAL_ASSERT(_type, _nr) EBUG_ON(_type != _nr) - -#define BKEY_VAL_ACCESSORS(name, _nr) \ - static inline void __bch_##name##_assert(u8 type, u8 nr) \ - { \ - EBUG_ON(type != _nr); \ - } \ - \ - __BKEY_VAL_ACCESSORS(name, _nr, __bch_##name##_assert) - -BKEY_VAL_ACCESSORS(cookie, KEY_TYPE_COOKIE); - -static inline void __bch2_extent_assert(u8 type, u8 nr) -{ - EBUG_ON(type != BCH_EXTENT && type != BCH_EXTENT_CACHED); -} - -__BKEY_VAL_ACCESSORS(extent, BCH_EXTENT, __bch2_extent_assert); -BKEY_VAL_ACCESSORS(reservation, BCH_RESERVATION); - -BKEY_VAL_ACCESSORS(inode, BCH_INODE_FS); -BKEY_VAL_ACCESSORS(inode_blockdev, BCH_INODE_BLOCKDEV); -BKEY_VAL_ACCESSORS(inode_generation, BCH_INODE_GENERATION); - -BKEY_VAL_ACCESSORS(dirent, BCH_DIRENT); - -BKEY_VAL_ACCESSORS(xattr, BCH_XATTR); - -BKEY_VAL_ACCESSORS(alloc, BCH_ALLOC); - -BKEY_VAL_ACCESSORS(quota, BCH_QUOTA); - -BKEY_VAL_ACCESSORS(stripe, BCH_STRIPE); +BKEY_VAL_ACCESSORS(cookie); +BKEY_VAL_ACCESSORS(btree_ptr); +BKEY_VAL_ACCESSORS(extent); +BKEY_VAL_ACCESSORS(reservation); +BKEY_VAL_ACCESSORS(inode); +BKEY_VAL_ACCESSORS(inode_generation); +BKEY_VAL_ACCESSORS(dirent); +BKEY_VAL_ACCESSORS(xattr); +BKEY_VAL_ACCESSORS(alloc); +BKEY_VAL_ACCESSORS(quota); +BKEY_VAL_ACCESSORS(stripe); /* byte order helpers */ diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index 97d72d2..6b04bef 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -11,66 +11,84 @@ #include "quota.h" #include "xattr.h" -const struct bkey_ops bch2_bkey_ops[] = { - [BKEY_TYPE_EXTENTS] = bch2_bkey_extent_ops, - [BKEY_TYPE_INODES] = bch2_bkey_inode_ops, - [BKEY_TYPE_DIRENTS] = bch2_bkey_dirent_ops, - [BKEY_TYPE_XATTRS] = bch2_bkey_xattr_ops, - [BKEY_TYPE_ALLOC] = bch2_bkey_alloc_ops, - [BKEY_TYPE_QUOTAS] = bch2_bkey_quota_ops, - [BKEY_TYPE_EC] = bch2_bkey_ec_ops, - [BKEY_TYPE_BTREE] = bch2_bkey_btree_ops, +const char * const bch_bkey_types[] = { +#define x(name, nr) #name, + BCH_BKEY_TYPES() +#undef x + NULL }; -const char *bch2_bkey_val_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +static const char *deleted_key_invalid(const struct bch_fs *c, + struct bkey_s_c k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + return NULL; +} + +const struct bkey_ops bch2_bkey_ops_deleted = { + .key_invalid = deleted_key_invalid, +}; + +const struct bkey_ops bch2_bkey_ops_discard = { + .key_invalid = deleted_key_invalid, +}; - switch (k.k->type) { - case KEY_TYPE_DELETED: - case KEY_TYPE_DISCARD: - return NULL; +static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + if (bkey_val_bytes(k.k)) + return "value size should be zero"; - case KEY_TYPE_ERROR: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + return NULL; +} - case KEY_TYPE_COOKIE: - return bkey_val_bytes(k.k) != sizeof(struct bch_cookie) - ? "incorrect value size" - : NULL; +const struct bkey_ops bch2_bkey_ops_error = { + .key_invalid = empty_val_key_invalid, +}; - default: - if (k.k->type < KEY_TYPE_GENERIC_NR) - return "invalid type"; +static const char *key_type_cookie_invalid(const struct bch_fs *c, + struct bkey_s_c k) +{ + if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie)) + return "incorrect value size"; - return ops->key_invalid(c, k); - } + return NULL; } -const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +const struct bkey_ops bch2_bkey_ops_cookie = { + .key_invalid = key_type_cookie_invalid, +}; + +const struct bkey_ops bch2_bkey_ops_whiteout = { + .key_invalid = empty_val_key_invalid, +}; + +static const struct bkey_ops bch2_bkey_ops[] = { +#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, + BCH_BKEY_TYPES() +#undef x +}; + +const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + if (k.k->type >= KEY_TYPE_MAX) + return "invalid type"; + + return bch2_bkey_ops[k.k->type].key_invalid(c, k); +} +const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type) +{ if (k.k->u64s < BKEY_U64s) return "u64s too small"; - if (!ops->is_extents) { - if (k.k->size) - return "nonzero size field"; - } else { + if (btree_node_type_is_extents(type)) { if ((k.k->size == 0) != bkey_deleted(k.k)) return "bad size field"; + } else { + if (k.k->size) + return "nonzero size field"; } - if (ops->is_extents && - !k.k->size && - !bkey_deleted(k.k)) - return "zero size field"; - if (k.k->p.snapshot) return "nonzero snapshot"; @@ -81,11 +99,11 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, return NULL; } -const char *bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type) { - return __bch2_bkey_invalid(c, type, k) ?: - bch2_bkey_val_invalid(c, type, k); + return __bch2_bkey_invalid(c, k, type) ?: + bch2_bkey_val_invalid(c, k); } const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) @@ -101,24 +119,22 @@ const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { - enum bkey_type type = btree_node_type(b); - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; const char *invalid; BUG_ON(!k.k->u64s); - invalid = bch2_bkey_invalid(c, type, k) ?: + invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, k); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, k); + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "invalid bkey %s: %s", buf, invalid); return; } - if (k.k->type >= KEY_TYPE_GENERIC_NR && - ops->key_debugcheck) + if (ops->key_debugcheck) ops->key_debugcheck(c, b, k); } @@ -143,46 +159,90 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k) } void bch2_val_to_text(struct printbuf *out, struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k) -{ - const struct bkey_ops *ops = &bch2_bkey_ops[type]; - - switch (k.k->type) { - case KEY_TYPE_DELETED: - pr_buf(out, " deleted"); - break; - case KEY_TYPE_DISCARD: - pr_buf(out, " discard"); - break; - case KEY_TYPE_ERROR: - pr_buf(out, " error"); - break; - case KEY_TYPE_COOKIE: - pr_buf(out, " cookie"); - break; - default: - if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text) - ops->val_to_text(out, c, k); - break; - } + struct bkey_s_c k) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; + + if (likely(ops->val_to_text)) + ops->val_to_text(out, c, k); + else + pr_buf(out, " %s", bch_bkey_types[k.k->type]); } void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k) + struct bkey_s_c k) { bch2_bkey_to_text(out, k.k); pr_buf(out, ": "); - bch2_val_to_text(out, c, type, k); + bch2_val_to_text(out, c, k); } -void bch2_bkey_swab(enum bkey_type type, - const struct bkey_format *f, - struct bkey_packed *k) +void bch2_bkey_swab(const struct bkey_format *f, + struct bkey_packed *k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + const struct bkey_ops *ops = &bch2_bkey_ops[k->type]; bch2_bkey_swab_key(f, k); if (ops->swab) ops->swab(f, k); } + +bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; + + return ops->key_normalize + ? ops->key_normalize(c, k) + : false; +} + +enum merge_result bch2_bkey_merge(struct bch_fs *c, + struct bkey_i *l, struct bkey_i *r) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type]; + + if (!key_merging_disabled(c) && + ops->key_merge && + l->k.type == r->k.type && + !bversion_cmp(l->k.version, r->k.version) && + !bkey_cmp(l->k.p, bkey_start_pos(&r->k))) + return ops->key_merge(c, l, r); + + return BCH_MERGE_NOMERGE; +} + +static const struct old_bkey_type { + u8 btree_node_type; + u8 old; + u8 new; +} bkey_renumber_table[] = { + {BKEY_TYPE_BTREE, 128, KEY_TYPE_btree_ptr }, + {BKEY_TYPE_EXTENTS, 128, KEY_TYPE_extent }, + {BKEY_TYPE_EXTENTS, 129, KEY_TYPE_extent }, + {BKEY_TYPE_EXTENTS, 130, KEY_TYPE_reservation }, + {BKEY_TYPE_INODES, 128, KEY_TYPE_inode }, + {BKEY_TYPE_INODES, 130, KEY_TYPE_inode_generation }, + {BKEY_TYPE_DIRENTS, 128, KEY_TYPE_dirent }, + {BKEY_TYPE_DIRENTS, 129, KEY_TYPE_whiteout }, + {BKEY_TYPE_XATTRS, 128, KEY_TYPE_xattr }, + {BKEY_TYPE_XATTRS, 129, KEY_TYPE_whiteout }, + {BKEY_TYPE_ALLOC, 128, KEY_TYPE_alloc }, + {BKEY_TYPE_QUOTAS, 128, KEY_TYPE_quota }, +}; + +void bch2_bkey_renumber(enum btree_node_type btree_node_type, + struct bkey_packed *k, + int write) +{ + const struct old_bkey_type *i; + + for (i = bkey_renumber_table; + i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table); + i++) + if (btree_node_type == i->btree_node_type && + k->type == (write ? i->new : i->old)) { + k->type = write ? i->old : i->new; + break; + } +} diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h index 11ce12f..cf7a9e9 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/bkey_methods.h @@ -3,24 +3,12 @@ #include "bkey.h" -#define DEF_BTREE_ID(kwd, val, name) BKEY_TYPE_##kwd = val, - -enum bkey_type { - DEFINE_BCH_BTREE_IDS() - BKEY_TYPE_BTREE, -}; - -#undef DEF_BTREE_ID - -/* Type of a key in btree @id at level @level: */ -static inline enum bkey_type bkey_type(unsigned level, enum btree_id id) -{ - return level ? BKEY_TYPE_BTREE : (enum bkey_type) id; -} - struct bch_fs; struct btree; struct bkey; +enum btree_node_type; + +extern const char * const bch_bkey_types[]; enum merge_result { BCH_MERGE_NOMERGE, @@ -33,12 +21,6 @@ enum merge_result { BCH_MERGE_MERGE, }; -typedef bool (*key_filter_fn)(struct bch_fs *, struct btree *, - struct bkey_s); -typedef enum merge_result (*key_merge_fn)(struct bch_fs *, - struct btree *, - struct bkey_i *, struct bkey_i *); - struct bkey_ops { /* Returns reason for being invalid if invalid, else NULL: */ const char * (*key_invalid)(const struct bch_fs *, @@ -48,29 +30,34 @@ struct bkey_ops { void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(const struct bkey_format *, struct bkey_packed *); - key_filter_fn key_normalize; - key_merge_fn key_merge; - bool is_extents; + bool (*key_normalize)(struct bch_fs *, struct bkey_s); + enum merge_result (*key_merge)(struct bch_fs *, + struct bkey_i *, struct bkey_i *); }; -const char *bch2_bkey_val_invalid(struct bch_fs *, enum bkey_type, - struct bkey_s_c); -const char *__bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c); -const char *bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c); +const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c); +const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, + enum btree_node_type); +const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, + enum btree_node_type); const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c); void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); -void bch2_val_to_text(struct printbuf *, struct bch_fs *, enum bkey_type, +void bch2_val_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *, - enum bkey_type, struct bkey_s_c); + struct bkey_s_c); + +void bch2_bkey_swab(const struct bkey_format *, struct bkey_packed *); + +bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s); -void bch2_bkey_swab(enum bkey_type, const struct bkey_format *, - struct bkey_packed *); +enum merge_result bch2_bkey_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); -extern const struct bkey_ops bch2_bkey_ops[]; +void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); #endif /* _BCACHEFS_BKEY_METHODS_H */ diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c new file mode 100644 index 0000000..c47c862 --- /dev/null +++ b/libbcachefs/bkey_sort.c @@ -0,0 +1,652 @@ +#include "bcachefs.h" +#include "bkey_sort.h" +#include "bset.h" +#include "extents.h" + +/* too many iterators, need to clean this up */ + +/* btree_node_iter_large: */ + +#define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r) + +static inline bool +bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter) +{ + return !iter->used; +} + +static inline struct bkey_packed * +bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter, + struct btree *b) +{ + return bch2_btree_node_iter_large_end(iter) + ? NULL + : __btree_node_offset_to_key(b, iter->data->k); +} + +static void +bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter, + struct btree *b) +{ + iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s; + + EBUG_ON(!iter->used); + EBUG_ON(iter->data->k > iter->data->end); + + if (iter->data->k == iter->data->end) + heap_del(iter, 0, btree_node_iter_cmp_heap, NULL); + else + heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL); +} + +static inline struct bkey_packed * +bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter, + struct btree *b) +{ + struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b); + + if (ret) + bch2_btree_node_iter_large_advance(iter, b); + + return ret; +} + +void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter, + struct btree *b, + const struct bkey_packed *k, + const struct bkey_packed *end) +{ + if (k != end) { + struct btree_node_iter_set n = + ((struct btree_node_iter_set) { + __btree_node_key_to_offset(b, k), + __btree_node_key_to_offset(b, end) + }); + + __heap_add(iter, n, btree_node_iter_cmp_heap, NULL); + } +} + +static void sort_key_next(struct btree_node_iter_large *iter, + struct btree *b, + struct btree_node_iter_set *i) +{ + i->k += __btree_node_offset_to_key(b, i->k)->u64s; + + if (i->k == i->end) + *i = iter->data[--iter->used]; +} + +/* regular sort_iters */ + +typedef int (*sort_cmp_fn)(struct btree *, + struct bkey_packed *, + struct bkey_packed *); + +static inline void __sort_iter_sift(struct sort_iter *iter, + unsigned from, + sort_cmp_fn cmp) +{ + unsigned i; + + for (i = from; + i + 1 < iter->used && + cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0; + i++) + swap(iter->data[i], iter->data[i + 1]); +} + +static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp) +{ + + __sort_iter_sift(iter, 0, cmp); +} + +static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp) +{ + unsigned i = iter->used; + + while (i--) + __sort_iter_sift(iter, i, cmp); +} + +static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter) +{ + return iter->used ? iter->data->k : NULL; +} + +static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp) +{ + iter->data->k = bkey_next(iter->data->k); + + BUG_ON(iter->data->k > iter->data->end); + + if (iter->data->k == iter->data->end) + array_remove_item(iter->data, iter->used, 0); + else + sort_iter_sift(iter, cmp); +} + +static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, + sort_cmp_fn cmp) +{ + struct bkey_packed *ret = sort_iter_peek(iter); + + if (ret) + sort_iter_advance(iter, cmp); + + return ret; +} + +/* + * Returns true if l > r - unless l == r, in which case returns true if l is + * older than r. + * + * Necessary for btree_sort_fixup() - if there are multiple keys that compare + * equal in different sets, we have to process them newest to oldest. + */ +#define key_sort_cmp(h, l, r) \ +({ \ + bkey_cmp_packed(b, \ + __btree_node_offset_to_key(b, (l).k), \ + __btree_node_offset_to_key(b, (r).k)) \ + \ + ?: (l).k - (r).k; \ +}) + +static inline bool should_drop_next_key(struct btree_node_iter_large *iter, + struct btree *b) +{ + struct btree_node_iter_set *l = iter->data, *r = iter->data + 1; + struct bkey_packed *k = __btree_node_offset_to_key(b, l->k); + + if (bkey_whiteout(k)) + return true; + + if (iter->used < 2) + return false; + + if (iter->used > 2 && + key_sort_cmp(iter, r[0], r[1]) >= 0) + r++; + + /* + * key_sort_cmp() ensures that when keys compare equal the older key + * comes first; so if l->k compares equal to r->k then l->k is older and + * should be dropped. + */ + return !bkey_cmp_packed(b, + __btree_node_offset_to_key(b, l->k), + __btree_node_offset_to_key(b, r->k)); +} + +struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst, + struct btree *b, + struct btree_node_iter_large *iter) +{ + struct bkey_packed *out = dst->start; + struct btree_nr_keys nr; + + memset(&nr, 0, sizeof(nr)); + + heap_resort(iter, key_sort_cmp, NULL); + + while (!bch2_btree_node_iter_large_end(iter)) { + if (!should_drop_next_key(iter, b)) { + struct bkey_packed *k = + __btree_node_offset_to_key(b, iter->data->k); + + bkey_copy(out, k); + btree_keys_account_key_add(&nr, 0, out); + out = bkey_next(out); + } + + sort_key_next(iter, b, iter->data); + heap_sift_down(iter, 0, key_sort_cmp, NULL); + } + + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); + return nr; +} + +/* + * If keys compare equal, compare by pointer order: + * + * Necessary for sort_fix_overlapping() - if there are multiple keys that + * compare equal in different sets, we have to process them newest to oldest. + */ +#define extent_sort_cmp(h, l, r) \ +({ \ + struct bkey _ul = bkey_unpack_key(b, \ + __btree_node_offset_to_key(b, (l).k)); \ + struct bkey _ur = bkey_unpack_key(b, \ + __btree_node_offset_to_key(b, (r).k)); \ + \ + bkey_cmp(bkey_start_pos(&_ul), \ + bkey_start_pos(&_ur)) ?: (r).k - (l).k; \ +}) + +static inline void extent_sort_sift(struct btree_node_iter_large *iter, + struct btree *b, size_t i) +{ + heap_sift_down(iter, i, extent_sort_cmp, NULL); +} + +static inline void extent_sort_next(struct btree_node_iter_large *iter, + struct btree *b, + struct btree_node_iter_set *i) +{ + sort_key_next(iter, b, i); + heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL); +} + +static void extent_sort_append(struct bch_fs *c, + struct btree *b, + struct btree_nr_keys *nr, + struct bkey_packed *start, + struct bkey_packed **prev, + struct bkey_packed *k) +{ + struct bkey_format *f = &b->format; + BKEY_PADDED(k) tmp; + + if (bkey_whiteout(k)) + return; + + bch2_bkey_unpack(b, &tmp.k, k); + + if (*prev && + bch2_bkey_merge(c, (void *) *prev, &tmp.k)) + return; + + if (*prev) { + bch2_bkey_pack(*prev, (void *) *prev, f); + + btree_keys_account_key_add(nr, 0, *prev); + *prev = bkey_next(*prev); + } else { + *prev = start; + } + + bkey_copy(*prev, &tmp.k); +} + +struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, + struct bset *dst, + struct btree *b, + struct btree_node_iter_large *iter) +{ + struct bkey_format *f = &b->format; + struct btree_node_iter_set *_l = iter->data, *_r; + struct bkey_packed *prev = NULL, *out, *lk, *rk; + struct bkey l_unpacked, r_unpacked; + struct bkey_s l, r; + struct btree_nr_keys nr; + + memset(&nr, 0, sizeof(nr)); + + heap_resort(iter, extent_sort_cmp, NULL); + + while (!bch2_btree_node_iter_large_end(iter)) { + lk = __btree_node_offset_to_key(b, _l->k); + + if (iter->used == 1) { + extent_sort_append(c, b, &nr, dst->start, &prev, lk); + extent_sort_next(iter, b, _l); + continue; + } + + _r = iter->data + 1; + if (iter->used > 2 && + extent_sort_cmp(iter, _r[0], _r[1]) >= 0) + _r++; + + rk = __btree_node_offset_to_key(b, _r->k); + + l = __bkey_disassemble(b, lk, &l_unpacked); + r = __bkey_disassemble(b, rk, &r_unpacked); + + /* If current key and next key don't overlap, just append */ + if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) { + extent_sort_append(c, b, &nr, dst->start, &prev, lk); + extent_sort_next(iter, b, _l); + continue; + } + + /* Skip 0 size keys */ + if (!r.k->size) { + extent_sort_next(iter, b, _r); + continue; + } + + /* + * overlap: keep the newer key and trim the older key so they + * don't overlap. comparing pointers tells us which one is + * newer, since the bsets are appended one after the other. + */ + + /* can't happen because of comparison func */ + BUG_ON(_l->k < _r->k && + !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k))); + + if (_l->k > _r->k) { + /* l wins, trim r */ + if (bkey_cmp(l.k->p, r.k->p) >= 0) { + sort_key_next(iter, b, _r); + } else { + __bch2_cut_front(l.k->p, r); + extent_save(b, rk, r.k); + } + + extent_sort_sift(iter, b, _r - iter->data); + } else if (bkey_cmp(l.k->p, r.k->p) > 0) { + BKEY_PADDED(k) tmp; + + /* + * r wins, but it overlaps in the middle of l - split l: + */ + bkey_reassemble(&tmp.k, l.s_c); + bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k); + + __bch2_cut_front(r.k->p, l); + extent_save(b, lk, l.k); + + extent_sort_sift(iter, b, 0); + + extent_sort_append(c, b, &nr, dst->start, &prev, + bkey_to_packed(&tmp.k)); + } else { + bch2_cut_back(bkey_start_pos(r.k), l.k); + extent_save(b, lk, l.k); + } + } + + if (prev) { + bch2_bkey_pack(prev, (void *) prev, f); + btree_keys_account_key_add(&nr, 0, prev); + out = bkey_next(prev); + } else { + out = dst->start; + } + + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); + return nr; +} + +/* Sort + repack in a new format: */ +struct btree_nr_keys +bch2_sort_repack(struct bset *dst, struct btree *src, + struct btree_node_iter *src_iter, + struct bkey_format *out_f, + bool filter_whiteouts) +{ + struct bkey_format *in_f = &src->format; + struct bkey_packed *in, *out = vstruct_last(dst); + struct btree_nr_keys nr; + + memset(&nr, 0, sizeof(nr)); + + while ((in = bch2_btree_node_iter_next_all(src_iter, src))) { + if (filter_whiteouts && bkey_whiteout(in)) + continue; + + if (bch2_bkey_transform(out_f, out, bkey_packed(in) + ? in_f : &bch2_bkey_format_current, in)) + out->format = KEY_FORMAT_LOCAL_BTREE; + else + bch2_bkey_unpack(src, (void *) out, in); + + btree_keys_account_key_add(&nr, 0, out); + out = bkey_next(out); + } + + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); + return nr; +} + +/* Sort, repack, and merge: */ +struct btree_nr_keys +bch2_sort_repack_merge(struct bch_fs *c, + struct bset *dst, struct btree *src, + struct btree_node_iter *iter, + struct bkey_format *out_f, + bool filter_whiteouts) +{ + struct bkey_packed *k, *prev = NULL, *out; + struct btree_nr_keys nr; + BKEY_PADDED(k) tmp; + + memset(&nr, 0, sizeof(nr)); + + while ((k = bch2_btree_node_iter_next_all(iter, src))) { + if (filter_whiteouts && bkey_whiteout(k)) + continue; + + /* + * The filter might modify pointers, so we have to unpack the + * key and values to &tmp.k: + */ + bch2_bkey_unpack(src, &tmp.k, k); + + if (filter_whiteouts && + bch2_bkey_normalize(c, bkey_i_to_s(&tmp.k))) + continue; + + /* prev is always unpacked, for key merging: */ + + if (prev && + bch2_bkey_merge(c, (void *) prev, &tmp.k) == + BCH_MERGE_MERGE) + continue; + + /* + * the current key becomes the new prev: advance prev, then + * copy the current key - but first pack prev (in place): + */ + if (prev) { + bch2_bkey_pack(prev, (void *) prev, out_f); + + btree_keys_account_key_add(&nr, 0, prev); + prev = bkey_next(prev); + } else { + prev = vstruct_last(dst); + } + + bkey_copy(prev, &tmp.k); + } + + if (prev) { + bch2_bkey_pack(prev, (void *) prev, out_f); + btree_keys_account_key_add(&nr, 0, prev); + out = bkey_next(prev); + } else { + out = vstruct_last(dst); + } + + dst->u64s = cpu_to_le16((u64 *) out - dst->_data); + return nr; +} + +static inline int sort_keys_cmp(struct btree *b, + struct bkey_packed *l, + struct bkey_packed *r) +{ + return bkey_cmp_packed(b, l, r) ?: + (int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?: + (int) l->needs_whiteout - (int) r->needs_whiteout; +} + +unsigned bch2_sort_keys(struct bkey_packed *dst, + struct sort_iter *iter, + bool filter_whiteouts) +{ + const struct bkey_format *f = &iter->b->format; + struct bkey_packed *in, *next, *out = dst; + + sort_iter_sort(iter, sort_keys_cmp); + + while ((in = sort_iter_next(iter, sort_keys_cmp))) { + if (bkey_whiteout(in) && + (filter_whiteouts || !in->needs_whiteout)) + continue; + + if (bkey_whiteout(in) && + (next = sort_iter_peek(iter)) && + !bkey_cmp_packed(iter->b, in, next)) { + BUG_ON(in->needs_whiteout && + next->needs_whiteout); + /* + * XXX racy, called with read lock from write path + * + * leads to spurious BUG_ON() in bkey_unpack_key() in + * debug mode + */ + next->needs_whiteout |= in->needs_whiteout; + continue; + } + + if (bkey_whiteout(in)) { + memcpy_u64s(out, in, bkeyp_key_u64s(f, in)); + set_bkeyp_val_u64s(f, out, 0); + } else { + bkey_copy(out, in); + } + out = bkey_next(out); + } + + return (u64 *) out - (u64 *) dst; +} + +static inline int sort_extents_cmp(struct btree *b, + struct bkey_packed *l, + struct bkey_packed *r) +{ + return bkey_cmp_packed(b, l, r) ?: + (int) bkey_deleted(l) - (int) bkey_deleted(r); +} + +unsigned bch2_sort_extents(struct bkey_packed *dst, + struct sort_iter *iter, + bool filter_whiteouts) +{ + struct bkey_packed *in, *out = dst; + + sort_iter_sort(iter, sort_extents_cmp); + + while ((in = sort_iter_next(iter, sort_extents_cmp))) { + if (bkey_deleted(in)) + continue; + + if (bkey_whiteout(in) && + (filter_whiteouts || !in->needs_whiteout)) + continue; + + bkey_copy(out, in); + out = bkey_next(out); + } + + return (u64 *) out - (u64 *) dst; +} + +static inline int sort_key_whiteouts_cmp(struct btree *b, + struct bkey_packed *l, + struct bkey_packed *r) +{ + return bkey_cmp_packed(b, l, r); +} + +unsigned bch2_sort_key_whiteouts(struct bkey_packed *dst, + struct sort_iter *iter) +{ + struct bkey_packed *in, *out = dst; + + sort_iter_sort(iter, sort_key_whiteouts_cmp); + + while ((in = sort_iter_next(iter, sort_key_whiteouts_cmp))) { + bkey_copy(out, in); + out = bkey_next(out); + } + + return (u64 *) out - (u64 *) dst; +} + +static inline int sort_extent_whiteouts_cmp(struct btree *b, + struct bkey_packed *l, + struct bkey_packed *r) +{ + struct bkey ul = bkey_unpack_key(b, l); + struct bkey ur = bkey_unpack_key(b, r); + + return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur)); +} + +unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst, + struct sort_iter *iter) +{ + const struct bkey_format *f = &iter->b->format; + struct bkey_packed *in, *out = dst; + struct bkey_i l, r; + bool prev = false, l_packed = false; + u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE); + u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET); + u64 new_size; + + max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX); + + sort_iter_sort(iter, sort_extent_whiteouts_cmp); + + while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) { + if (bkey_deleted(in)) + continue; + + EBUG_ON(bkeyp_val_u64s(f, in)); + EBUG_ON(in->type != KEY_TYPE_discard); + + r.k = bkey_unpack_key(iter->b, in); + + if (prev && + bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) { + if (bkey_cmp(l.k.p, r.k.p) >= 0) + continue; + + new_size = l_packed + ? min(max_packed_size, max_packed_offset - + bkey_start_offset(&l.k)) + : KEY_SIZE_MAX; + + new_size = min(new_size, r.k.p.offset - + bkey_start_offset(&l.k)); + + BUG_ON(new_size < l.k.size); + + bch2_key_resize(&l.k, new_size); + + if (bkey_cmp(l.k.p, r.k.p) >= 0) + continue; + + bch2_cut_front(l.k.p, &r); + } + + if (prev) { + if (!bch2_bkey_pack(out, &l, f)) { + BUG_ON(l_packed); + bkey_copy(out, &l); + } + out = bkey_next(out); + } + + l = r; + prev = true; + l_packed = bkey_packed(in); + } + + if (prev) { + if (!bch2_bkey_pack(out, &l, f)) { + BUG_ON(l_packed); + bkey_copy(out, &l); + } + out = bkey_next(out); + } + + return (u64 *) out - (u64 *) dst; +} diff --git a/libbcachefs/bkey_sort.h b/libbcachefs/bkey_sort.h new file mode 100644 index 0000000..d189d81 --- /dev/null +++ b/libbcachefs/bkey_sort.h @@ -0,0 +1,68 @@ +#ifndef _BCACHEFS_BKEY_SORT_H +#define _BCACHEFS_BKEY_SORT_H + +struct btree_node_iter_large { + u16 used; + + struct btree_node_iter_set data[MAX_BSETS]; +}; + +void bch2_btree_node_iter_large_push(struct btree_node_iter_large *, + struct btree *, + const struct bkey_packed *, + const struct bkey_packed *); + +struct sort_iter { + struct btree *b; + unsigned used; + + struct sort_iter_set { + struct bkey_packed *k, *end; + } data[MAX_BSETS + 1]; +}; + +static inline void sort_iter_init(struct sort_iter *iter, struct btree *b) +{ + memset(iter, 0, sizeof(*iter)); + iter->b = b; +} + +static inline void sort_iter_add(struct sort_iter *iter, + struct bkey_packed *k, + struct bkey_packed *end) +{ + BUG_ON(iter->used >= ARRAY_SIZE(iter->data)); + + if (k != end) + iter->data[iter->used++] = (struct sort_iter_set) { k, end }; +} + +struct btree_nr_keys +bch2_key_sort_fix_overlapping(struct bset *, struct btree *, + struct btree_node_iter_large *); +struct btree_nr_keys +bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *, + struct btree *, + struct btree_node_iter_large *); + +struct btree_nr_keys +bch2_sort_repack(struct bset *, struct btree *, + struct btree_node_iter *, + struct bkey_format *, bool); +struct btree_nr_keys +bch2_sort_repack_merge(struct bch_fs *, + struct bset *, struct btree *, + struct btree_node_iter *, + struct bkey_format *, bool); + +unsigned bch2_sort_keys(struct bkey_packed *, + struct sort_iter *, bool); +unsigned bch2_sort_extents(struct bkey_packed *, + struct sort_iter *, bool); + +unsigned bch2_sort_key_whiteouts(struct bkey_packed *, + struct sort_iter *); +unsigned bch2_sort_extent_whiteouts(struct bkey_packed *, + struct sort_iter *); + +#endif /* _BCACHEFS_BKEY_SORT_H */ diff --git a/libbcachefs/bset.h b/libbcachefs/bset.h index a27c8a2..8bc2fdf 100644 --- a/libbcachefs/bset.h +++ b/libbcachefs/bset.h @@ -381,7 +381,7 @@ bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k) static inline struct bkey_packed * bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k) { - return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_DISCARD + 1); + return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1); } enum bch_extent_overlap { @@ -513,7 +513,7 @@ bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, static inline struct bkey_packed * bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b) { - return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_DISCARD + 1); + return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1); } static inline struct bkey_packed * @@ -539,7 +539,7 @@ bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree *b) static inline struct bkey_packed * bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b) { - return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1); + return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1); } struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 28ac862..d99441a 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -5,20 +5,17 @@ #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" -#include "extents.h" #include #include -#define DEF_BTREE_ID(kwd, val, name) name, - const char * const bch2_btree_ids[] = { - DEFINE_BCH_BTREE_IDS() +#define x(kwd, val, name) name, + BCH_BTREE_IDS() +#undef x NULL }; -#undef DEF_BTREE_ID - void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; @@ -99,7 +96,7 @@ static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) if (!b) return NULL; - bkey_extent_init(&b->key); + bkey_btree_ptr_init(&b->key); six_lock_init(&b->lock); INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); @@ -115,7 +112,7 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); /* Cause future lookups for this node to fail: */ - bkey_i_to_extent(&b->key)->v._data[0] = 0; + PTR_HASH(&b->key) = 0; } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) @@ -602,7 +599,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, /* raced with another fill: */ /* mark as unhashed... */ - bkey_i_to_extent(&b->key)->v._data[0] = 0; + PTR_HASH(&b->key) = 0; mutex_lock(&bc->lock); list_add(&b->list, &bc->freeable); @@ -904,8 +901,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, b->data->min_key.offset, b->data->max_key.inode, b->data->max_key.offset); - bch2_val_to_text(out, c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); pr_buf(out, "\n" " format: u64s %u fields %u %u %u %u %u\n" " unpack fn len: %u\n" diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index 399f8b9..08e6f2a 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -3,7 +3,6 @@ #include "bcachefs.h" #include "btree_types.h" -#include "extents.h" struct btree_iter; @@ -36,12 +35,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *); int bch2_fs_btree_cache_init(struct bch_fs *); void bch2_fs_btree_cache_init_early(struct btree_cache *); -#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0]) +#define PTR_HASH(_k) *((u64 *) &bkey_i_to_btree_ptr_c(_k)->v) /* is btree node in hash table? */ static inline bool btree_node_hashed(struct btree *b) { - return bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key); + return b->key.k.type == KEY_TYPE_btree_ptr && + PTR_HASH(&b->key); } #define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \ diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 9fe438d..c30d1f7 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -109,152 +109,11 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b, /* marking of btree keys/nodes: */ -static bool bkey_type_needs_gc(enum bkey_type type) -{ - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - case BKEY_TYPE_EC: - return true; - default: - return false; - } -} - -static void ptr_gen_recalc_oldest(struct bch_fs *c, - const struct bch_extent_ptr *ptr, - u8 *max_stale) -{ - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - size_t b = PTR_BUCKET_NR(ca, ptr); - - if (gen_after(ca->oldest_gens[b], ptr->gen)) - ca->oldest_gens[b] = ptr->gen; - - *max_stale = max(*max_stale, ptr_stale(ca, ptr)); -} - -static u8 ptr_gens_recalc_oldest(struct bch_fs *c, - enum bkey_type type, - struct bkey_s_c k) -{ - const struct bch_extent_ptr *ptr; - u8 max_stale = 0; - - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - - extent_for_each_ptr(e, ptr) - ptr_gen_recalc_oldest(c, ptr, &max_stale); - break; - } - } - break; - case BKEY_TYPE_EC: - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) - ptr_gen_recalc_oldest(c, ptr, &max_stale); - } - } - default: - break; - } - - return max_stale; -} - -static int ptr_gen_check(struct bch_fs *c, - enum bkey_type type, - const struct bch_extent_ptr *ptr) -{ - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - size_t b = PTR_BUCKET_NR(ca, ptr); - struct bucket *g = PTR_BUCKET(ca, ptr); - int ret = 0; - - if (mustfix_fsck_err_on(!g->mark.gen_valid, c, - "found ptr with missing gen in alloc btree,\n" - "type %u gen %u", - type, ptr->gen)) { - g->_mark.gen = ptr->gen; - g->_mark.gen_valid = 1; - set_bit(b, ca->buckets_dirty); - } - - if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, - "%u ptr gen in the future: %u > %u", - type, ptr->gen, g->mark.gen)) { - g->_mark.gen = ptr->gen; - g->_mark.gen_valid = 1; - set_bit(b, ca->buckets_dirty); - set_bit(BCH_FS_FIXED_GENS, &c->flags); - } -fsck_err: - return ret; -} - -static int ptr_gens_check(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, + u8 *max_stale, bool initial) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - int ret = 0; - - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - - extent_for_each_ptr(e, ptr) { - ret = ptr_gen_check(c, type, ptr); - if (ret) - return ret; - - } - break; - } - } - break; - case BKEY_TYPE_EC: - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) { - ret = ptr_gen_check(c, type, ptr); - if (ret) - return ret; - } - } - } - break; - default: - break; - } - - return ret; -} - -/* - * For runtime mark and sweep: - */ -static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k, bool initial) -{ struct gc_pos pos = { 0 }; unsigned flags = BCH_BUCKET_MARK_GC| @@ -269,52 +128,77 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type, atomic64_set(&c->key_version, k.k->version.lo); if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || - fsck_err_on(!bch2_bkey_replicas_marked(c, type, k, - false), c, + fsck_err_on(!bch2_bkey_replicas_marked(c, k, false), c, "superblock not marked as containing replicas (type %u)", - type)) { - ret = bch2_mark_bkey_replicas(c, type, k); + k.k->type)) { + ret = bch2_mark_bkey_replicas(c, k); if (ret) return ret; } - ret = ptr_gens_check(c, type, k); - if (ret) - return ret; + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + size_t b = PTR_BUCKET_NR(ca, ptr); + struct bucket *g = PTR_BUCKET(ca, ptr); + + if (mustfix_fsck_err_on(!g->mark.gen_valid, c, + "found ptr with missing gen in alloc btree,\n" + "type %u gen %u", + k.k->type, ptr->gen)) { + g->_mark.gen = ptr->gen; + g->_mark.gen_valid = 1; + set_bit(b, ca->buckets_dirty); + } + + if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, + "%u ptr gen in the future: %u > %u", + k.k->type, ptr->gen, g->mark.gen)) { + g->_mark.gen = ptr->gen; + g->_mark.gen_valid = 1; + set_bit(b, ca->buckets_dirty); + set_bit(BCH_FS_FIXED_GENS, &c->flags); + } + } } - bch2_mark_key(c, type, k, true, k.k->size, pos, NULL, 0, flags); + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + size_t b = PTR_BUCKET_NR(ca, ptr); + + if (gen_after(ca->oldest_gens[b], ptr->gen)) + ca->oldest_gens[b] = ptr->gen; + + *max_stale = max(*max_stale, ptr_stale(ca, ptr)); + } - ret = ptr_gens_recalc_oldest(c, type, k); + bch2_mark_key(c, k, true, k.k->size, pos, NULL, 0, flags); fsck_err: return ret; } static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, - bool initial) + u8 *max_stale, bool initial) { - enum bkey_type type = btree_node_type(b); struct btree_node_iter iter; struct bkey unpacked; struct bkey_s_c k; - u8 stale = 0; - int ret; + int ret = 0; + + *max_stale = 0; - if (!bkey_type_needs_gc(type)) + if (!btree_node_type_needs_gc(btree_node_type(b))) return 0; for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { bch2_bkey_debugcheck(c, b, k); - ret = bch2_gc_mark_key(c, type, k, initial); - if (ret < 0) - return ret; - - stale = max_t(u8, stale, ret); + ret = bch2_gc_mark_key(c, k, max_stale, initial); + if (ret) + break; } - return stale; + return ret; } static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, @@ -323,15 +207,12 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, struct btree_iter iter; struct btree *b; struct range_checks r; - unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1; - unsigned max_stale; + unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1; + u8 max_stale; int ret = 0; gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0)); - if (!c->btree_roots[btree_id].b) - return 0; - /* * if expensive_debug_checks is on, run range_checks on all leaf nodes: * @@ -349,7 +230,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bch2_verify_btree_nr_keys(b); - max_stale = btree_gc_mark_node(c, b, initial); + ret = btree_gc_mark_node(c, b, &max_stale, initial); + if (ret) + break; gc_pos_set(c, gc_pos_btree_node(b)); @@ -370,7 +253,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, bch2_btree_iter_cond_resched(&iter); } - ret = bch2_btree_iter_unlock(&iter); + ret = bch2_btree_iter_unlock(&iter) ?: ret; if (ret) return ret; @@ -378,8 +261,8 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, b = c->btree_roots[btree_id].b; if (!btree_node_fake(b)) - bch2_gc_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key), initial); + bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), + &max_stale, initial); gc_pos_set(c, gc_pos_btree_root(b->btree_id)); mutex_unlock(&c->btree_root_lock); @@ -396,6 +279,7 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, bool initial) { enum btree_id ids[BTREE_ID_NR]; + u8 max_stale; unsigned i; for (i = 0; i < BTREE_ID_NR; i++) @@ -404,13 +288,13 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, for (i = 0; i < BTREE_ID_NR; i++) { enum btree_id id = ids[i]; - enum bkey_type type = bkey_type(0, id); + enum btree_node_type type = __btree_node_type(0, id); int ret = bch2_gc_btree(c, id, initial); if (ret) return ret; - if (journal && bkey_type_needs_gc(type)) { + if (journal && btree_node_type_needs_gc(type)) { struct bkey_i *k, *n; struct jset_entry *j; struct journal_replay *r; @@ -418,10 +302,11 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, list_for_each_entry(r, journal, list) for_each_jset_key(k, n, j, &r->j) { - if (type == bkey_type(j->level, j->btree_id)) { - ret = bch2_gc_mark_key(c, type, - bkey_i_to_s_c(k), initial); - if (ret < 0) + if (type == __btree_node_type(j->level, j->btree_id)) { + ret = bch2_gc_mark_key(c, + bkey_i_to_s_c(k), + &max_stale, initial); + if (ret) return ret; } } @@ -519,8 +404,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) for_each_pending_btree_node_free(c, as, d) if (d->index_update_done) - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&d->key), + bch2_mark_key(c, bkey_i_to_s_c(&d->key), true, 0, pos, NULL, 0, BCH_BUCKET_MARK_GC); @@ -579,6 +463,8 @@ static void bch2_gc_free(struct bch_fs *c) struct bch_dev *ca; unsigned i; + genradix_free(&c->stripes[1]); + for_each_member_device(ca, c, i) { kvpfree(rcu_dereference_protected(ca->buckets[1], 1), sizeof(struct bucket_array) + @@ -599,6 +485,25 @@ static void bch2_gc_done_nocheck(struct bch_fs *c) unsigned i; int cpu; + { + struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0); + struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0); + struct stripe *dst, *src; + + c->ec_stripes_heap.used = 0; + + while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) && + (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) { + *dst = *src; + + if (dst->alive) + bch2_stripes_heap_insert(c, dst, dst_iter.pos); + + genradix_iter_advance(&dst_iter, &c->stripes[0]); + genradix_iter_advance(&src_iter, &c->stripes[1]); + } + } + for_each_member_device(ca, c, i) { struct bucket_array *src = __bucket_array(ca, 1); @@ -646,13 +551,21 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) #define copy_field(_f, _msg, ...) \ if (dst._f != src._f) { \ - pr_info(_msg ": got %llu, should be %llu, fixing" \ + bch_err(c, _msg ": got %llu, should be %llu, fixing"\ , ##__VA_ARGS__, dst._f, src._f); \ dst._f = src._f; \ } +#define copy_stripe_field(_f, _msg, ...) \ + if (dst->_f != src->_f) { \ + bch_err_ratelimited(c, "stripe %zu has wrong "_msg \ + ": got %u, should be %u, fixing", \ + dst_iter.pos, ##__VA_ARGS__, \ + dst->_f, src->_f); \ + dst->_f = src->_f; \ + } #define copy_bucket_field(_f) \ if (dst->b[b].mark._f != src->b[b].mark._f) { \ - pr_info("dev %u bucket %zu has wrong " #_f \ + bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\ ": got %u, should be %u, fixing", \ i, b, dst->b[b].mark._f, src->b[b].mark._f); \ dst->b[b]._mark._f = src->b[b].mark._f; \ @@ -669,6 +582,36 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) goto out; } + { + struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0); + struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0); + struct stripe *dst, *src; + unsigned i; + + c->ec_stripes_heap.used = 0; + + while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) && + (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) { + copy_stripe_field(alive, "alive"); + copy_stripe_field(sectors, "sectors"); + copy_stripe_field(algorithm, "algorithm"); + copy_stripe_field(nr_blocks, "nr_blocks"); + copy_stripe_field(nr_redundant, "nr_redundant"); + copy_stripe_field(blocks_nonempty.counter, + "blocks_nonempty"); + + for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++) + copy_stripe_field(block_sectors[i].counter, + "block_sectors[%u]", i); + + if (dst->alive) + bch2_stripes_heap_insert(c, dst, dst_iter.pos); + + genradix_iter_advance(&dst_iter, &c->stripes[0]); + genradix_iter_advance(&src_iter, &c->stripes[1]); + } + } + for_each_member_device(ca, c, i) { struct bucket_array *dst = __bucket_array(ca, 0); struct bucket_array *src = __bucket_array(ca, 1); @@ -753,10 +696,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial) out: percpu_up_write(&c->usage_lock); -#undef copy_field #undef copy_fs_field #undef copy_dev_field #undef copy_bucket_field +#undef copy_stripe_field +#undef copy_field } static int bch2_gc_start(struct bch_fs *c) @@ -764,6 +708,12 @@ static int bch2_gc_start(struct bch_fs *c) struct bch_dev *ca; unsigned i; + /* + * indicate to stripe code that we need to allocate for the gc stripes + * radix tree, too + */ + gc_pos_set(c, gc_phase(GC_PHASE_START)); + BUG_ON(c->usage[1]); c->usage[1] = alloc_percpu(struct bch_fs_usage); @@ -805,7 +755,7 @@ static int bch2_gc_start(struct bch_fs *c) percpu_up_write(&c->usage_lock); - return 0; + return bch2_ec_mem_alloc(c, true); } /** @@ -870,7 +820,7 @@ out: bch2_gc_done(c, initial); /* Indicates that gc is no longer in progress: */ - __gc_pos_set(c, gc_phase(GC_PHASE_START)); + __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); bch2_gc_free(c); up_write(&c->gc_lock); @@ -1110,7 +1060,6 @@ next: /* Free the old nodes and update our sliding window */ for (i = 0; i < nr_old_nodes; i++) { bch2_btree_node_free_inmem(c, old_nodes[i], iter); - six_unlock_intent(&old_nodes[i]->lock); /* * the index update might have triggered a split, in which case diff --git a/libbcachefs/btree_gc.h b/libbcachefs/btree_gc.h index d7809c2..8af5f84 100644 --- a/libbcachefs/btree_gc.h +++ b/libbcachefs/btree_gc.h @@ -3,8 +3,6 @@ #include "btree_types.h" -enum bkey_type; - void bch2_coalesce(struct bch_fs *); int bch2_gc(struct bch_fs *, struct list_head *, bool); void bch2_gc_thread_stop(struct bch_fs *); @@ -57,9 +55,9 @@ static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r) static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id) { switch (id) { -#define DEF_BTREE_ID(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n; - DEFINE_BCH_BTREE_IDS() -#undef DEF_BTREE_ID +#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n; + BCH_BTREE_IDS() +#undef x default: BUG(); } diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index a4da979..231ace4 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1,6 +1,7 @@ #include "bcachefs.h" #include "bkey_methods.h" +#include "bkey_sort.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" @@ -19,40 +20,6 @@ #include -/* btree_node_iter_large: */ - -#define btree_node_iter_cmp_heap(h, _l, _r) btree_node_iter_cmp(b, _l, _r) - -void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter, - struct btree *b, - const struct bkey_packed *k, - const struct bkey_packed *end) -{ - if (k != end) { - struct btree_node_iter_set n = - ((struct btree_node_iter_set) { - __btree_node_key_to_offset(b, k), - __btree_node_key_to_offset(b, end) - }); - - __heap_add(iter, n, btree_node_iter_cmp_heap, NULL); - } -} - -void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter, - struct btree *b) -{ - iter->data->k += __btree_node_offset_to_key(b, iter->data->k)->u64s; - - EBUG_ON(!iter->used); - EBUG_ON(iter->data->k > iter->data->end); - - if (iter->data->k == iter->data->end) - heap_del(iter, 0, btree_node_iter_cmp_heap, NULL); - else - heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL); -} - static void verify_no_dups(struct btree *b, struct bkey_packed *start, struct bkey_packed *end) @@ -113,193 +80,6 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order, return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); } -typedef int (*sort_cmp_fn)(struct btree *, - struct bkey_packed *, - struct bkey_packed *); - -struct sort_iter { - struct btree *b; - unsigned used; - - struct sort_iter_set { - struct bkey_packed *k, *end; - } data[MAX_BSETS + 1]; -}; - -static void sort_iter_init(struct sort_iter *iter, struct btree *b) -{ - memset(iter, 0, sizeof(*iter)); - iter->b = b; -} - -static inline void __sort_iter_sift(struct sort_iter *iter, - unsigned from, - sort_cmp_fn cmp) -{ - unsigned i; - - for (i = from; - i + 1 < iter->used && - cmp(iter->b, iter->data[i].k, iter->data[i + 1].k) > 0; - i++) - swap(iter->data[i], iter->data[i + 1]); -} - -static inline void sort_iter_sift(struct sort_iter *iter, sort_cmp_fn cmp) -{ - - __sort_iter_sift(iter, 0, cmp); -} - -static inline void sort_iter_sort(struct sort_iter *iter, sort_cmp_fn cmp) -{ - unsigned i = iter->used; - - while (i--) - __sort_iter_sift(iter, i, cmp); -} - -static void sort_iter_add(struct sort_iter *iter, - struct bkey_packed *k, - struct bkey_packed *end) -{ - BUG_ON(iter->used >= ARRAY_SIZE(iter->data)); - - if (k != end) - iter->data[iter->used++] = (struct sort_iter_set) { k, end }; -} - -static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter) -{ - return iter->used ? iter->data->k : NULL; -} - -static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp) -{ - iter->data->k = bkey_next(iter->data->k); - - BUG_ON(iter->data->k > iter->data->end); - - if (iter->data->k == iter->data->end) - array_remove_item(iter->data, iter->used, 0); - else - sort_iter_sift(iter, cmp); -} - -static inline struct bkey_packed *sort_iter_next(struct sort_iter *iter, - sort_cmp_fn cmp) -{ - struct bkey_packed *ret = sort_iter_peek(iter); - - if (ret) - sort_iter_advance(iter, cmp); - - return ret; -} - -static inline int sort_key_whiteouts_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - return bkey_cmp_packed(b, l, r); -} - -static unsigned sort_key_whiteouts(struct bkey_packed *dst, - struct sort_iter *iter) -{ - struct bkey_packed *in, *out = dst; - - sort_iter_sort(iter, sort_key_whiteouts_cmp); - - while ((in = sort_iter_next(iter, sort_key_whiteouts_cmp))) { - bkey_copy(out, in); - out = bkey_next(out); - } - - return (u64 *) out - (u64 *) dst; -} - -static inline int sort_extent_whiteouts_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - struct bkey ul = bkey_unpack_key(b, l); - struct bkey ur = bkey_unpack_key(b, r); - - return bkey_cmp(bkey_start_pos(&ul), bkey_start_pos(&ur)); -} - -static unsigned sort_extent_whiteouts(struct bkey_packed *dst, - struct sort_iter *iter) -{ - const struct bkey_format *f = &iter->b->format; - struct bkey_packed *in, *out = dst; - struct bkey_i l, r; - bool prev = false, l_packed = false; - u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE); - u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET); - u64 new_size; - - max_packed_size = min_t(u64, max_packed_size, KEY_SIZE_MAX); - - sort_iter_sort(iter, sort_extent_whiteouts_cmp); - - while ((in = sort_iter_next(iter, sort_extent_whiteouts_cmp))) { - if (bkey_deleted(in)) - continue; - - EBUG_ON(bkeyp_val_u64s(f, in)); - EBUG_ON(in->type != KEY_TYPE_DISCARD); - - r.k = bkey_unpack_key(iter->b, in); - - if (prev && - bkey_cmp(l.k.p, bkey_start_pos(&r.k)) >= 0) { - if (bkey_cmp(l.k.p, r.k.p) >= 0) - continue; - - new_size = l_packed - ? min(max_packed_size, max_packed_offset - - bkey_start_offset(&l.k)) - : KEY_SIZE_MAX; - - new_size = min(new_size, r.k.p.offset - - bkey_start_offset(&l.k)); - - BUG_ON(new_size < l.k.size); - - bch2_key_resize(&l.k, new_size); - - if (bkey_cmp(l.k.p, r.k.p) >= 0) - continue; - - bch2_cut_front(l.k.p, &r); - } - - if (prev) { - if (!bch2_bkey_pack(out, &l, f)) { - BUG_ON(l_packed); - bkey_copy(out, &l); - } - out = bkey_next(out); - } - - l = r; - prev = true; - l_packed = bkey_packed(in); - } - - if (prev) { - if (!bch2_bkey_pack(out, &l, f)) { - BUG_ON(l_packed); - bkey_copy(out, &l); - } - out = bkey_next(out); - } - - return (u64 *) out - (u64 *) dst; -} - static unsigned should_compact_bset(struct btree *b, struct bset_tree *t, bool compacting, enum compact_mode mode) @@ -420,11 +200,10 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, BUG_ON((void *) unwritten_whiteouts_start(c, b) < (void *) btree_bkey_last(b, bset_tree_last(b))); - u64s = btree_node_is_extents(b) - ? sort_extent_whiteouts(unwritten_whiteouts_start(c, b), - &sort_iter) - : sort_key_whiteouts(unwritten_whiteouts_start(c, b), - &sort_iter); + u64s = (btree_node_is_extents(b) + ? bch2_sort_extent_whiteouts + : bch2_sort_key_whiteouts)(unwritten_whiteouts_start(c, b), + &sort_iter); BUG_ON(u64s > b->whiteout_u64s); BUG_ON(u64s != b->whiteout_u64s && !btree_node_is_extents(b)); @@ -499,87 +278,6 @@ static bool bch2_drop_whiteouts(struct btree *b) return ret; } -static inline int sort_keys_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - return bkey_cmp_packed(b, l, r) ?: - (int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?: - (int) l->needs_whiteout - (int) r->needs_whiteout; -} - -static unsigned sort_keys(struct bkey_packed *dst, - struct sort_iter *iter, - bool filter_whiteouts) -{ - const struct bkey_format *f = &iter->b->format; - struct bkey_packed *in, *next, *out = dst; - - sort_iter_sort(iter, sort_keys_cmp); - - while ((in = sort_iter_next(iter, sort_keys_cmp))) { - if (bkey_whiteout(in) && - (filter_whiteouts || !in->needs_whiteout)) - continue; - - if (bkey_whiteout(in) && - (next = sort_iter_peek(iter)) && - !bkey_cmp_packed(iter->b, in, next)) { - BUG_ON(in->needs_whiteout && - next->needs_whiteout); - /* - * XXX racy, called with read lock from write path - * - * leads to spurious BUG_ON() in bkey_unpack_key() in - * debug mode - */ - next->needs_whiteout |= in->needs_whiteout; - continue; - } - - if (bkey_whiteout(in)) { - memcpy_u64s(out, in, bkeyp_key_u64s(f, in)); - set_bkeyp_val_u64s(f, out, 0); - } else { - bkey_copy(out, in); - } - out = bkey_next(out); - } - - return (u64 *) out - (u64 *) dst; -} - -static inline int sort_extents_cmp(struct btree *b, - struct bkey_packed *l, - struct bkey_packed *r) -{ - return bkey_cmp_packed(b, l, r) ?: - (int) bkey_deleted(l) - (int) bkey_deleted(r); -} - -static unsigned sort_extents(struct bkey_packed *dst, - struct sort_iter *iter, - bool filter_whiteouts) -{ - struct bkey_packed *in, *out = dst; - - sort_iter_sort(iter, sort_extents_cmp); - - while ((in = sort_iter_next(iter, sort_extents_cmp))) { - if (bkey_deleted(in)) - continue; - - if (bkey_whiteout(in) && - (filter_whiteouts || !in->needs_whiteout)) - continue; - - bkey_copy(out, in); - out = bkey_next(out); - } - - return (u64 *) out - (u64 *) dst; -} - static void btree_node_sort(struct bch_fs *c, struct btree *b, struct btree_iter *iter, unsigned start_idx, @@ -618,9 +316,11 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, if (btree_node_is_extents(b)) filter_whiteouts = bset_written(b, start_bset); - u64s = btree_node_is_extents(b) - ? sort_extents(out->keys.start, &sort_iter, filter_whiteouts) - : sort_keys(out->keys.start, &sort_iter, filter_whiteouts); + u64s = (btree_node_is_extents(b) + ? bch2_sort_extents + : bch2_sort_keys)(out->keys.start, + &sort_iter, + filter_whiteouts); out->keys.u64s = cpu_to_le16(u64s); @@ -678,101 +378,6 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b, bch2_verify_btree_nr_keys(b); } -/* Sort + repack in a new format: */ -static struct btree_nr_keys sort_repack(struct bset *dst, - struct btree *src, - struct btree_node_iter *src_iter, - struct bkey_format *out_f, - bool filter_whiteouts) -{ - struct bkey_format *in_f = &src->format; - struct bkey_packed *in, *out = vstruct_last(dst); - struct btree_nr_keys nr; - - memset(&nr, 0, sizeof(nr)); - - while ((in = bch2_btree_node_iter_next_all(src_iter, src))) { - if (filter_whiteouts && bkey_whiteout(in)) - continue; - - if (bch2_bkey_transform(out_f, out, bkey_packed(in) - ? in_f : &bch2_bkey_format_current, in)) - out->format = KEY_FORMAT_LOCAL_BTREE; - else - bch2_bkey_unpack(src, (void *) out, in); - - btree_keys_account_key_add(&nr, 0, out); - out = bkey_next(out); - } - - dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - return nr; -} - -/* Sort, repack, and merge: */ -static struct btree_nr_keys sort_repack_merge(struct bch_fs *c, - struct bset *dst, - struct btree *src, - struct btree_node_iter *iter, - struct bkey_format *out_f, - bool filter_whiteouts, - key_filter_fn filter, - key_merge_fn merge) -{ - struct bkey_packed *k, *prev = NULL, *out; - struct btree_nr_keys nr; - BKEY_PADDED(k) tmp; - - memset(&nr, 0, sizeof(nr)); - - while ((k = bch2_btree_node_iter_next_all(iter, src))) { - if (filter_whiteouts && bkey_whiteout(k)) - continue; - - /* - * The filter might modify pointers, so we have to unpack the - * key and values to &tmp.k: - */ - bch2_bkey_unpack(src, &tmp.k, k); - - if (filter && filter(c, src, bkey_i_to_s(&tmp.k))) - continue; - - /* prev is always unpacked, for key merging: */ - - if (prev && - merge && - merge(c, src, (void *) prev, &tmp.k) == BCH_MERGE_MERGE) - continue; - - /* - * the current key becomes the new prev: advance prev, then - * copy the current key - but first pack prev (in place): - */ - if (prev) { - bch2_bkey_pack(prev, (void *) prev, out_f); - - btree_keys_account_key_add(&nr, 0, prev); - prev = bkey_next(prev); - } else { - prev = vstruct_last(dst); - } - - bkey_copy(prev, &tmp.k); - } - - if (prev) { - bch2_bkey_pack(prev, (void *) prev, out_f); - btree_keys_account_key_add(&nr, 0, prev); - out = bkey_next(prev); - } else { - out = vstruct_last(dst); - } - - dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - return nr; -} - void bch2_btree_sort_into(struct bch_fs *c, struct btree *dst, struct btree *src) @@ -787,16 +392,13 @@ void bch2_btree_sort_into(struct bch_fs *c, bch2_btree_node_iter_init_from_start(&src_iter, src); - if (btree_node_ops(src)->key_normalize || - btree_node_ops(src)->key_merge) - nr = sort_repack_merge(c, btree_bset_first(dst), + if (btree_node_is_extents(src)) + nr = bch2_sort_repack_merge(c, btree_bset_first(dst), src, &src_iter, &dst->format, - true, - btree_node_ops(src)->key_normalize, - btree_node_ops(src)->key_merge); + true); else - nr = sort_repack(btree_bset_first(dst), + nr = bch2_sort_repack(btree_bset_first(dst), src, &src_iter, &dst->format, true); @@ -1000,8 +602,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b, { struct bkey_packed *k, *prev = NULL; struct bpos prev_pos = POS_MIN; - enum bkey_type type = btree_node_type(b); bool seen_non_whiteout = false; + unsigned version; const char *err; int ret = 0; @@ -1047,13 +649,12 @@ static int validate_bset(struct bch_fs *c, struct btree *b, "invalid bkey format: %s", err); } - if (btree_err_on(le16_to_cpu(i->version) != BCACHE_BSET_VERSION, - BTREE_ERR_FIXABLE, c, b, i, - "unsupported bset version")) { - i->version = cpu_to_le16(BCACHE_BSET_VERSION); - i->u64s = 0; - return 0; - } + version = le16_to_cpu(i->version); + btree_err_on((version != BCH_BSET_VERSION_OLD && + version < bcachefs_metadata_version_min) || + version >= bcachefs_metadata_version_max, + BTREE_ERR_FATAL, c, b, i, + "unsupported bset version"); if (btree_err_on(b->written + sectors > c->opts.btree_node_size, BTREE_ERR_FIXABLE, c, b, i, @@ -1102,17 +703,21 @@ static int validate_bset(struct bch_fs *c, struct btree *b, } if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) - bch2_bkey_swab(type, &b->format, k); + bch2_bkey_swab(&b->format, k); + + if (!write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(btree_node_type(b), k, write); u = bkey_disassemble(b, k, &tmp); - invalid = __bch2_bkey_invalid(c, type, u) ?: + invalid = __bch2_bkey_invalid(c, u, btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, u) ?: - (write ? bch2_bkey_val_invalid(c, type, u) : NULL); + (write ? bch2_bkey_val_invalid(c, u) : NULL); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, u); + bch2_bkey_val_to_text(&PBUF(buf), c, u); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey:\n%s\n%s", invalid, buf); @@ -1122,6 +727,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b, continue; } + if (write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(btree_node_type(b), k, write); + /* * with the separate whiteouts thing (used for extents), the * second set of keys actually can have whiteouts too, so we @@ -1287,17 +896,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { - enum bkey_type type = btree_node_type(b); struct bkey tmp; struct bkey_s_c u = bkey_disassemble(b, k, &tmp); - const char *invalid = bch2_bkey_val_invalid(c, type, u); + const char *invalid = bch2_bkey_val_invalid(c, u); if (invalid || (inject_invalid_keys(c) && !bversion_cmp(u.k->version, MAX_VERSION))) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, u); + bch2_bkey_val_to_text(&PBUF(buf), c, u); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey %s: %s", buf, invalid); @@ -1367,7 +975,9 @@ start: bch2_mark_io_failure(&failed, &rb->pick); - can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0; + can_retry = bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + &failed, &rb->pick) > 0; if (!bio->bi_status && !bch2_btree_node_read_done(c, b, can_retry)) @@ -1410,7 +1020,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, trace_btree_read(c, b); - ret = bch2_btree_pick_ptr(c, b, NULL, &pick); + ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), + NULL, &pick); if (bch2_fs_fatal_err_on(ret <= 0, c, "btree node read error: no device to read from")) { set_btree_node_read_error(b); @@ -1537,8 +1148,8 @@ static void bch2_btree_node_write_error(struct bch_fs *c, { struct btree *b = wbio->wbio.bio.bi_private; __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; - struct bkey_i_extent *new_key; - struct bkey_s_extent e; + struct bkey_i_btree_ptr *new_key; + struct bkey_s_btree_ptr bp; struct bch_extent_ptr *ptr; struct btree_iter iter; int ret; @@ -1562,13 +1173,13 @@ retry: bkey_copy(&tmp.k, &b->key); - new_key = bkey_i_to_extent(&tmp.k); - e = extent_i_to_s(new_key); + new_key = bkey_i_to_btree_ptr(&tmp.k); + bp = btree_ptr_i_to_s(new_key); - bch2_extent_drop_ptrs(e, ptr, + bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr, bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); - if (!bch2_extent_nr_ptrs(e.c)) + if (!bch2_bkey_nr_ptrs(bp.s_c)) goto err; ret = bch2_btree_node_update_key(c, &iter, b, new_key); @@ -1671,12 +1282,11 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - const struct bch_extent_ptr *ptr; unsigned whiteout_u64s = 0; int ret; - extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr) - break; + if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE)) + return -1; ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false); if (ret) @@ -1694,7 +1304,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct btree_node *bn = NULL; struct btree_node_entry *bne = NULL; BKEY_PADDED(key) k; - struct bkey_s_extent e; struct bch_extent_ptr *ptr; struct sort_iter sort_iter; struct nonce nonce; @@ -1702,6 +1311,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, u64 seq = 0; bool used_mempool; unsigned long old, new; + bool validate_before_checksum = false; void *data; if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) @@ -1815,8 +1425,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, b->whiteout_u64s = 0; u64s = btree_node_is_extents(b) - ? sort_extents(vstruct_last(i), &sort_iter, false) - : sort_keys(i->start, &sort_iter, false); + ? bch2_sort_extents(vstruct_last(i), &sort_iter, false) + : bch2_sort_keys(i->start, &sort_iter, false); le16_add_cpu(&i->u64s, u64s); clear_needs_whiteout(i); @@ -1835,11 +1445,21 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); - i->version = cpu_to_le16(BCACHE_BSET_VERSION); + i->version = c->sb.version < bcachefs_metadata_version_new_versioning + ? cpu_to_le16(BCH_BSET_VERSION_OLD) + : cpu_to_le16(c->sb.version); SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c)); + if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))) + validate_before_checksum = true; + + /* validate_bset will be modifying: */ + if (le16_to_cpu(i->version) < + bcachefs_metadata_version_bkey_renumber) + validate_before_checksum = true; + /* if we're going to be encrypting, check metadata validity first: */ - if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && + if (validate_before_checksum && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; @@ -1853,7 +1473,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); /* if we're not encrypting, check metadata after checksumming: */ - if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && + if (!validate_before_checksum && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; @@ -1907,9 +1527,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, */ bkey_copy(&k.key, &b->key); - e = bkey_i_to_s_extent(&k.key); - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr) ptr->offset += b->written; b->written += sectors_to_write; diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 48833a9..4be3221 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -142,46 +142,4 @@ void bch2_btree_flush_all_writes(struct bch_fs *); void bch2_btree_verify_flushed(struct bch_fs *); ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *, char *); -/* Sorting */ - -struct btree_node_iter_large { - u16 used; - - struct btree_node_iter_set data[MAX_BSETS]; -}; - -void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *, - struct btree *); - -void bch2_btree_node_iter_large_push(struct btree_node_iter_large *, - struct btree *, - const struct bkey_packed *, - const struct bkey_packed *); - -static inline bool bch2_btree_node_iter_large_end(struct btree_node_iter_large *iter) -{ - return !iter->used; -} - -static inline struct bkey_packed * -bch2_btree_node_iter_large_peek_all(struct btree_node_iter_large *iter, - struct btree *b) -{ - return bch2_btree_node_iter_large_end(iter) - ? NULL - : __btree_node_offset_to_key(b, iter->data->k); -} - -static inline struct bkey_packed * -bch2_btree_node_iter_large_next_all(struct btree_node_iter_large *iter, - struct btree *b) -{ - struct bkey_packed *ret = bch2_btree_node_iter_large_peek_all(iter, b); - - if (ret) - bch2_btree_node_iter_large_advance(iter, b); - - return ret; -} - #endif /* _BCACHEFS_BTREE_IO_H */ diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index ae1d4f8..f4922bc 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -263,10 +263,13 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, /* Btree iterator locking: */ #ifdef CONFIG_BCACHEFS_DEBUG -void bch2_btree_iter_verify_locks(struct btree_iter *iter) +void __bch2_btree_iter_verify_locks(struct btree_iter *iter) { unsigned l; + BUG_ON((iter->flags & BTREE_ITER_NOUNLOCK) && + !btree_node_locked(iter, 0)); + for (l = 0; btree_iter_node(iter, l); l++) { if (iter->uptodate >= BTREE_ITER_NEED_RELOCK && !btree_node_locked(iter, l)) @@ -276,6 +279,15 @@ void bch2_btree_iter_verify_locks(struct btree_iter *iter) btree_node_locked_type(iter, l)); } } + +void bch2_btree_iter_verify_locks(struct btree_iter *iter) +{ + struct btree_iter *linked; + + for_each_btree_iter(iter, linked) + __bch2_btree_iter_verify_locks(linked); + +} #endif __flatten @@ -381,9 +393,9 @@ void __bch2_btree_iter_downgrade(struct btree_iter *iter, break; } } - - bch2_btree_iter_verify_locks(linked); } + + bch2_btree_iter_verify_locks(iter); } int bch2_btree_iter_unlock(struct btree_iter *iter) @@ -420,7 +432,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, * whiteouts) */ k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS - ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD) + ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_discard) : bch2_btree_node_iter_prev_all(&tmp, b); if (k && btree_iter_pos_cmp(iter, b, k) > 0) { char buf[100]; @@ -609,7 +621,7 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, * signal to bch2_btree_iter_peek_slot() that we're currently at * a hole */ - u->type = KEY_TYPE_DELETED; + u->type = KEY_TYPE_deleted; return bkey_s_c_null; } @@ -775,9 +787,17 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b) struct btree_iter *linked; unsigned level = b->level; + /* caller now responsible for unlocking @b */ + + BUG_ON(iter->l[level].b != b); + BUG_ON(!btree_node_intent_locked(iter, level)); + + iter->l[level].b = BTREE_ITER_NOT_END; + mark_btree_node_unlocked(iter, level); + for_each_btree_iter(iter, linked) if (linked->l[level].b == b) { - btree_node_unlock(linked, level); + __btree_node_unlock(linked, level); linked->l[level].b = BTREE_ITER_NOT_END; } } diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index 9bbed99..33260a9 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -94,7 +94,7 @@ btree_lock_want(struct btree_iter *iter, int level) return BTREE_NODE_UNLOCKED; } -static inline void btree_node_unlock(struct btree_iter *iter, unsigned level) +static inline void __btree_node_unlock(struct btree_iter *iter, unsigned level) { int lock_type = btree_node_locked_type(iter, level); @@ -105,6 +105,13 @@ static inline void btree_node_unlock(struct btree_iter *iter, unsigned level) mark_btree_node_unlocked(iter, level); } +static inline void btree_node_unlock(struct btree_iter *iter, unsigned level) +{ + BUG_ON(!level && iter->flags & BTREE_ITER_NOUNLOCK); + + __btree_node_unlock(iter, level); +} + static inline void __bch2_btree_iter_unlock(struct btree_iter *iter) { btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK); diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index a7eda11..a91a37e 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -191,6 +191,7 @@ enum btree_iter_type { */ #define BTREE_ITER_IS_EXTENTS (1 << 4) #define BTREE_ITER_ERROR (1 << 5) +#define BTREE_ITER_NOUNLOCK (1 << 6) enum btree_iter_uptodate { BTREE_ITER_UPTODATE = 0, @@ -403,20 +404,45 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) return i - (void *) b->data; } +enum btree_node_type { +#define x(kwd, val, name) BKEY_TYPE_##kwd = val, + BCH_BTREE_IDS() +#undef x + BKEY_TYPE_BTREE, +}; + +/* Type of a key in btree @id at level @level: */ +static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) +{ + return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id; +} + /* Type of keys @b contains: */ -static inline enum bkey_type btree_node_type(struct btree *b) +static inline enum btree_node_type btree_node_type(struct btree *b) { - return b->level ? BKEY_TYPE_BTREE : b->btree_id; + return __btree_node_type(b->level, b->btree_id); } -static inline const struct bkey_ops *btree_node_ops(struct btree *b) +static inline bool btree_node_type_is_extents(enum btree_node_type type) { - return &bch2_bkey_ops[btree_node_type(b)]; + return type == BKEY_TYPE_EXTENTS; } static inline bool btree_node_is_extents(struct btree *b) { - return btree_node_type(b) == BKEY_TYPE_EXTENTS; + return btree_node_type_is_extents(btree_node_type(b)); +} + +static inline bool btree_node_type_needs_gc(enum btree_node_type type) +{ + switch (type) { + case BKEY_TYPE_BTREE: + case BKEY_TYPE_EXTENTS: + case BKEY_TYPE_EC: + return true; + default: + return false; + } } struct btree_root { diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 882e1c2..7683636 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -119,7 +119,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id, int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *, __le64, unsigned); int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *, - struct btree *, struct bkey_i_extent *); + struct btree *, struct bkey_i_btree_ptr *); /* new transactional interface: */ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 537b8da..ee19b13 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -131,13 +131,15 @@ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, /* Btree node freeing/allocation: */ static bool btree_key_matches(struct bch_fs *c, - struct bkey_s_c_extent l, - struct bkey_s_c_extent r) + struct bkey_s_c l, + struct bkey_s_c r) { + struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(l); + struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(r); const struct bch_extent_ptr *ptr1, *ptr2; - extent_for_each_ptr(l, ptr1) - extent_for_each_ptr(r, ptr2) + bkey_for_each_ptr(ptrs1, ptr1) + bkey_for_each_ptr(ptrs2, ptr2) if (ptr1->dev == ptr2->dev && ptr1->gen == ptr2->gen && ptr1->offset == ptr2->offset) @@ -159,17 +161,11 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, { struct bch_fs *c = as->c; struct pending_btree_node_free *d; - - /* - * btree_update lock is only needed here to avoid racing with - * gc: - */ - mutex_lock(&c->btree_interior_update_lock); + struct gc_pos pos = { 0 }; for (d = as->pending; d < as->pending + as->nr_pending; d++) if (!bkey_cmp(k.k->p, d->key.k.p) && - btree_key_matches(c, bkey_s_c_to_extent(k), - bkey_i_to_s_c_extent(&d->key))) + btree_key_matches(c, k, bkey_i_to_s_c(&d->key))) goto found; BUG(); found: @@ -200,20 +196,11 @@ found: if (gc_pos_cmp(c->gc_pos, b ? gc_pos_btree_node(b) : gc_pos_btree_root(as->btree_id)) >= 0 && - gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) { - struct gc_pos pos = { 0 }; - - bch2_mark_key(c, BKEY_TYPE_BTREE, + gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) + bch2_mark_key_locked(c, bkey_i_to_s_c(&d->key), false, 0, pos, NULL, 0, BCH_BUCKET_MARK_GC); - /* - * Don't apply tmp - pending deletes aren't tracked in - * bch_alloc_stats: - */ - } - - mutex_unlock(&c->btree_interior_update_lock); } static void __btree_node_free(struct bch_fs *c, struct btree *b) @@ -256,6 +243,11 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b) void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, struct btree_iter *iter) { + struct btree_iter *linked; + + for_each_btree_iter(iter, linked) + BUG_ON(linked->l[b->level].b == b); + /* * Is this a node that isn't reachable on disk yet? * @@ -267,11 +259,10 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, */ btree_update_drop_new_node(c, b); - __bch2_btree_node_lock_write(b, iter); + six_lock_write(&b->lock); __btree_node_free(c, b); six_unlock_write(&b->lock); - - bch2_btree_iter_node_drop(iter, b); + six_unlock_intent(&b->lock); } static void bch2_btree_node_free_ondisk(struct bch_fs *c, @@ -279,8 +270,7 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c, { BUG_ON(!pending->index_update_done); - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&pending->key), + bch2_mark_key(c, bkey_i_to_s_c(&pending->key), false, 0, gc_phase(GC_PHASE_PENDING_DELETE), NULL, 0, 0); @@ -294,7 +284,6 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, struct write_point *wp; struct btree *b; BKEY_PADDED(k) tmp; - struct bkey_i_extent *e; struct open_buckets ob = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; unsigned nr_reserve; @@ -345,8 +334,8 @@ retry: goto retry; } - e = bkey_extent_init(&tmp.k); - bch2_alloc_sectors_append_ptrs(c, wp, e, c->opts.btree_node_size); + bkey_btree_ptr_init(&tmp.k); + bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size); bch2_open_bucket_get(c, wp, &ob); bch2_alloc_sectors_done(c, wp); @@ -384,7 +373,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev b->data->flags = 0; SET_BTREE_NODE_ID(b->data, as->btree_id); SET_BTREE_NODE_LEVEL(b->data, level); - b->data->ptr = bkey_i_to_extent(&b->key)->v.start->ptr; + b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0]; bch2_btree_build_aux_trees(b); @@ -537,8 +526,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, goto err_free; } - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); if (ret) goto err_free; @@ -1078,8 +1066,10 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) __bch2_btree_set_root_inmem(c, b); - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key), + mutex_lock(&c->btree_interior_update_lock); + percpu_down_read_preempt_disable(&c->usage_lock); + + bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key), true, 0, gc_pos_btree_root(b->btree_id), &stats, 0, 0); @@ -1090,6 +1080,9 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) &stats); bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res, gc_pos_btree_root(b->btree_id)); + + percpu_up_read_preempt_enable(&c->usage_lock); + mutex_unlock(&c->btree_interior_update_lock); } static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b, int rw) @@ -1166,11 +1159,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b)); - if (bkey_extent_is_data(&insert->k)) - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(insert), - true, 0, - gc_pos_btree_node(b), &stats, 0, 0); + mutex_lock(&c->btree_interior_update_lock); + percpu_down_read_preempt_disable(&c->usage_lock); + + bch2_mark_key_locked(c, bkey_i_to_s_c(insert), + true, 0, + gc_pos_btree_node(b), &stats, 0, 0); while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) && bkey_iter_pos_cmp(b, &insert->k.p, k) > 0) @@ -1188,6 +1182,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res, gc_pos_btree_node(b)); + percpu_up_read_preempt_enable(&c->usage_lock); + mutex_unlock(&c->btree_interior_update_lock); + bch2_btree_bset_insert_key(iter, b, node_iter, insert); set_btree_node_dirty(b); set_btree_node_need_write(b); @@ -1420,25 +1417,19 @@ static void btree_split(struct btree_update *as, struct btree *b, if (n3) bch2_open_buckets_put(c, &n3->ob); - /* - * Note - at this point other linked iterators could still have @b read - * locked; we're depending on the bch2_btree_iter_node_replace() calls - * below removing all references to @b so we don't return with other - * iterators pointing to a node they have locked that's been freed. - * - * We have to free the node first because the bch2_iter_node_replace() - * calls will drop _our_ iterator's reference - and intent lock - to @b. - */ - bch2_btree_node_free_inmem(c, b, iter); - /* Successful split, update the iterator to point to the new nodes: */ + bch2_btree_iter_node_drop(iter, b); if (n3) bch2_btree_iter_node_replace(iter, n3); if (n2) bch2_btree_iter_node_replace(iter, n2); bch2_btree_iter_node_replace(iter, n1); + bch2_btree_node_free_inmem(c, b, iter); + + bch2_btree_iter_verify_locks(iter); + bch2_time_stats_update(&c->times[BCH_TIME_btree_split], start_time); } @@ -1734,17 +1725,21 @@ retry: bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); bch2_open_buckets_put(c, &n->ob); - bch2_btree_node_free_inmem(c, b, iter); - bch2_btree_node_free_inmem(c, m, iter); + + bch2_btree_iter_node_drop(iter, b); bch2_btree_iter_node_replace(iter, n); bch2_btree_iter_verify(iter, n); + bch2_btree_node_free_inmem(c, b, iter); + bch2_btree_node_free_inmem(c, m, iter); + bch2_btree_update_done(as); - six_unlock_intent(&m->lock); up_read(&c->gc_lock); out: + bch2_btree_iter_verify_locks(iter); + /* * Don't downgrade locks here: we're called after successful insert, * and the caller will downgrade locks after a successful insert @@ -1827,9 +1822,9 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, bch2_open_buckets_put(c, &n->ob); - bch2_btree_node_free_inmem(c, b, iter); - + bch2_btree_iter_node_drop(iter, b); bch2_btree_iter_node_replace(iter, n); + bch2_btree_node_free_inmem(c, b, iter); bch2_btree_update_done(as); return 0; @@ -1892,7 +1887,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, struct btree_update *as, struct btree_iter *iter, struct btree *b, struct btree *new_hash, - struct bkey_i_extent *new_key) + struct bkey_i_btree_ptr *new_key) { struct btree *parent; int ret; @@ -1955,8 +1950,10 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, bch2_btree_node_lock_write(b, iter); - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&new_key->k_i), + mutex_lock(&c->btree_interior_update_lock); + percpu_down_read_preempt_disable(&c->usage_lock); + + bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i), true, 0, gc_pos_btree_root(b->btree_id), &stats, 0, 0); @@ -1966,6 +1963,9 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res, gc_pos_btree_root(b->btree_id)); + percpu_up_read_preempt_enable(&c->usage_lock); + mutex_unlock(&c->btree_interior_update_lock); + if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) { mutex_lock(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, b); @@ -1986,7 +1986,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, } int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, - struct btree *b, struct bkey_i_extent *new_key) + struct btree *b, + struct bkey_i_btree_ptr *new_key) { struct btree *parent = btree_node_parent(iter, b); struct btree_update *as = NULL; @@ -2052,8 +2053,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, goto err; } - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - extent_i_to_s_c(new_key).s_c); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&new_key->k_i)); if (ret) goto err_free_update; @@ -2111,9 +2111,9 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) b->level = 0; b->btree_id = id; - bkey_extent_init(&b->key); + bkey_btree_ptr_init(&b->key); b->key.k.p = POS_MAX; - bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id; + PTR_HASH(&b->key) = U64_MAX - id; bch2_bset_init_first(b, &b->data->keys); bch2_btree_build_aux_trees(b); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index e8d6e07..57c5c7a 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -70,7 +70,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, goto overwrite; } - k->type = KEY_TYPE_DELETED; + k->type = KEY_TYPE_deleted; bch2_btree_node_iter_fix(iter, b, node_iter, k, k->u64s, k->u64s); bch2_btree_iter_verify(iter, b); @@ -186,7 +186,6 @@ bch2_insert_fixup_key(struct btree_insert *trans, insert->k)) bch2_btree_journal_key(trans, iter, insert->k); - trans->did_work = true; return BTREE_INSERT_OK; } @@ -312,7 +311,6 @@ btree_key_can_insert(struct btree_insert *trans, return BTREE_INSERT_BTREE_NODE_FULL; if (!bch2_bkey_replicas_marked(c, - insert->iter->btree_id, bkey_i_to_s_c(insert->k), true)) return BTREE_INSERT_NEED_MARK_REPLICAS; @@ -337,6 +335,7 @@ static inline int do_btree_insert_at(struct btree_insert *trans, { struct bch_fs *c = trans->c; struct btree_insert_entry *i; + struct btree_iter *linked; unsigned u64s; int ret; @@ -414,12 +413,25 @@ static inline int do_btree_insert_at(struct btree_insert *trans, i->k->k.version = MAX_VERSION; } + if (trans->flags & BTREE_INSERT_NOUNLOCK) { + /* + * linked iterators that weren't being updated may or may not + * have been traversed/locked, depending on what the caller was + * doing: + */ + for_each_btree_iter(trans->entries[0].iter, linked) + if (linked->uptodate < BTREE_ITER_NEED_RELOCK) + linked->flags |= BTREE_ITER_NOUNLOCK; + } + trans->did_work = true; + trans_for_each_entry(trans, i) { switch (btree_insert_key_leaf(trans, i)) { case BTREE_INSERT_OK: break; case BTREE_INSERT_NEED_TRAVERSE: - BUG_ON((trans->flags & BTREE_INSERT_ATOMIC)); + BUG_ON((trans->flags & + (BTREE_INSERT_ATOMIC|BTREE_INSERT_NOUNLOCK))); ret = -EINTR; goto out; default: @@ -440,8 +452,8 @@ static inline void btree_insert_entry_checks(struct bch_fs *c, BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); BUG_ON(debug_check_bkeys(c) && !bkey_deleted(&i->k->k) && - bch2_bkey_invalid(c, i->iter->btree_id, - bkey_i_to_s_c(i->k))); + bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), + i->iter->btree_id)); } /** @@ -465,8 +477,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans) BUG_ON(!trans->nr); - for_each_btree_iter(trans->entries[0].iter, linked) - bch2_btree_iter_verify_locks(linked); + bch2_btree_iter_verify_locks(trans->entries[0].iter); /* for the sake of sanity: */ BUG_ON(trans->nr > 1 && !(trans->flags & BTREE_INSERT_ATOMIC)); @@ -508,15 +519,11 @@ retry: out: percpu_ref_put(&c->writes); - if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { - /* make sure we didn't drop or screw up locks: */ - for_each_btree_iter(trans->entries[0].iter, linked) { - bch2_btree_iter_verify_locks(linked); - BUG_ON((trans->flags & BTREE_INSERT_NOUNLOCK) && - trans->did_work && - !btree_node_locked(linked, 0)); - } - } + /* make sure we didn't drop or screw up locks: */ + bch2_btree_iter_verify_locks(trans->entries[0].iter); + + for_each_btree_iter(trans->entries[0].iter, linked) + linked->flags &= ~BTREE_ITER_NOUNLOCK; BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR); @@ -581,8 +588,7 @@ err: } bch2_btree_iter_unlock(trans->entries[0].iter); - ret = bch2_mark_bkey_replicas(c, i->iter->btree_id, - bkey_i_to_s_c(i->k)) + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k)) ?: -EINTR; break; default: diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 6037763..401ff82 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -302,7 +302,7 @@ static inline int is_fragmented_bucket(struct bucket_mark m, static inline enum bch_data_type bucket_type(struct bucket_mark m) { return m.cached_sectors && !m.dirty_sectors - ? BCH_DATA_CACHED + ? BCH_DATA_CACHED : m.data_type; } @@ -322,6 +322,8 @@ void bch2_fs_usage_apply(struct bch_fs *c, s64 added = sum.data + sum.reserved; s64 should_not_have_added; + percpu_rwsem_assert_held(&c->usage_lock); + /* * Not allowed to reduce sectors_available except by getting a * reservation: @@ -338,7 +340,6 @@ void bch2_fs_usage_apply(struct bch_fs *c, stats->online_reserved -= added; } - percpu_down_read_preempt_disable(&c->usage_lock); /* online_reserved not subject to gc: */ this_cpu_ptr(c->usage[0])->online_reserved += stats->online_reserved; @@ -350,7 +351,6 @@ void bch2_fs_usage_apply(struct bch_fs *c, bch2_usage_add(this_cpu_ptr(c->usage[1]), stats); bch2_fs_stats_verify(c); - percpu_up_read_preempt_enable(&c->usage_lock); memset(stats, 0, sizeof(*stats)); } @@ -372,14 +372,14 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, dev_usage = this_cpu_ptr(ca->usage[gc]); - if (bucket_type(old) != bucket_type(new)) { - if (bucket_type(old)) { - fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size; - dev_usage->buckets[bucket_type(old)]--; - } else { - fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size; - dev_usage->buckets[bucket_type(new)]++; - } + if (bucket_type(old)) { + fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size; + dev_usage->buckets[bucket_type(old)]--; + } + + if (bucket_type(new)) { + fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size; + dev_usage->buckets[bucket_type(new)]++; } dev_usage->buckets_alloc += @@ -402,11 +402,28 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_dev_stats_verify(ca); } -#define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \ +void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca) +{ + struct bucket_mark old = { .v.counter = 0 }; + struct bch_fs_usage *fs_usage; + struct bucket_array *buckets; + struct bucket *g; + + percpu_down_read_preempt_disable(&c->usage_lock); + fs_usage = this_cpu_ptr(c->usage[0]); + buckets = bucket_array(ca); + + for_each_bucket(g, buckets) + if (g->mark.data_type) + bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false); + percpu_up_read_preempt_enable(&c->usage_lock); +} + +#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \ ({ \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ \ - bch2_dev_usage_update(c, ca, stats, _old, new, gc); \ + bch2_dev_usage_update(c, ca, fs_usage, _old, new, gc); \ _old; \ }) @@ -486,12 +503,12 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, { struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]); struct bucket *g = __bucket(ca, b, gc); - struct bucket_mark old, new; + struct bucket_mark new; BUG_ON(type != BCH_DATA_SB && type != BCH_DATA_JOURNAL); - old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ + bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ new.data_type = type; checked_add(new.dirty_sectors, sectors); })); @@ -542,7 +559,7 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors) crc.uncompressed_size)); } -static s64 ptr_disk_sectors(struct bkey_s_c_extent e, +static s64 ptr_disk_sectors(const struct bkey *k, struct extent_ptr_decoded p, s64 sectors) { @@ -554,8 +571,8 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e, old_sectors = 0; new_sectors = sectors; } else { - old_sectors = e.k->size; - new_sectors = e.k->size + sectors; + old_sectors = k->size; + new_sectors = k->size + sectors; } sectors = -__disk_sectors(p.crc, old_sectors) @@ -571,7 +588,6 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e, * that with the gc pos seqlock held. */ static void bch2_mark_pointer(struct bch_fs *c, - struct bkey_s_c_extent e, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type, struct bch_fs_usage *fs_usage, @@ -630,23 +646,25 @@ static void bch2_mark_pointer(struct bch_fs *c, BUG_ON(!gc && bucket_became_unavailable(old, new)); } -static void bch2_mark_stripe_ptr(struct bch_fs *c, - struct bch_extent_stripe_ptr p, - s64 sectors, unsigned flags, - s64 *adjusted_disk_sectors, - unsigned *redundancy) +static int bch2_mark_stripe_ptr(struct bch_fs *c, + struct bch_extent_stripe_ptr p, + s64 sectors, unsigned flags, + s64 *adjusted_disk_sectors, + unsigned *redundancy, + bool gc) { - struct ec_stripe *m; + struct stripe *m; unsigned old, new, nr_data; int blocks_nonempty_delta; s64 parity_sectors; - m = genradix_ptr(&c->ec_stripes, p.idx); - if (WARN_ON(!m)) - return; + m = genradix_ptr(&c->stripes[gc], p.idx); - if (WARN_ON(!m->alive)) - return; + if (!m || !m->alive) { + bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", + (u64) p.idx); + return -1; + } nr_data = m->nr_blocks - m->nr_redundant; @@ -664,81 +682,74 @@ static void bch2_mark_stripe_ptr(struct bch_fs *c, blocks_nonempty_delta = (int) !!new - (int) !!old; if (!blocks_nonempty_delta) - return; + return 0; atomic_add(blocks_nonempty_delta, &m->blocks_nonempty); BUG_ON(atomic_read(&m->blocks_nonempty) < 0); - bch2_stripes_heap_update(c, m, p.idx); + if (!gc) + bch2_stripes_heap_update(c, m, p.idx); + + return 0; } -static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, - s64 sectors, enum bch_data_type data_type, - struct bch_fs_usage *stats, - u64 journal_seq, unsigned flags, - bool gc) +static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, + s64 sectors, enum bch_data_type data_type, + struct bch_fs_usage *stats, + u64 journal_seq, unsigned flags, + bool gc) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + s64 cached_sectors = 0; + s64 dirty_sectors = 0; + s64 ec_sectors = 0; + unsigned replicas = 0; + unsigned ec_redundancy = 0; + unsigned i; + int ret; + BUG_ON(!sectors); - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - s64 cached_sectors = 0; - s64 dirty_sectors = 0; - s64 ec_sectors = 0; - unsigned replicas = 0; - unsigned ec_redundancy = 0; - unsigned i; - - extent_for_each_ptr_decode(e, p, entry) { - s64 disk_sectors = ptr_disk_sectors(e, p, sectors); - s64 adjusted_disk_sectors = disk_sectors; - - bch2_mark_pointer(c, e, p, disk_sectors, data_type, - stats, journal_seq, flags, gc); - - if (!p.ptr.cached) - for (i = 0; i < p.ec_nr; i++) - bch2_mark_stripe_ptr(c, p.ec[i], - disk_sectors, flags, - &adjusted_disk_sectors, - &ec_redundancy); - if (!p.ptr.cached) - replicas++; - - if (p.ptr.cached) - cached_sectors += adjusted_disk_sectors; - else if (!p.ec_nr) - dirty_sectors += adjusted_disk_sectors; - else - ec_sectors += adjusted_disk_sectors; - } + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + s64 disk_sectors = ptr_disk_sectors(k.k, p, sectors); + s64 adjusted_disk_sectors = disk_sectors; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); - ec_redundancy = clamp_t(unsigned, ec_redundancy, - 1, ARRAY_SIZE(stats->replicas)); + bch2_mark_pointer(c, p, disk_sectors, data_type, + stats, journal_seq, flags, gc); - stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; - stats->replicas[replicas - 1].data[data_type] += dirty_sectors; - stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; - break; + if (!p.ptr.cached) + for (i = 0; i < p.ec_nr; i++) { + ret = bch2_mark_stripe_ptr(c, p.ec[i], + disk_sectors, flags, + &adjusted_disk_sectors, + &ec_redundancy, gc); + if (ret) + return ret; + } + if (!p.ptr.cached) + replicas++; + + if (p.ptr.cached) + cached_sectors += adjusted_disk_sectors; + else if (!p.ec_nr) + dirty_sectors += adjusted_disk_sectors; + else + ec_sectors += adjusted_disk_sectors; } - case BCH_RESERVATION: { - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - sectors *= replicas; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + ec_redundancy = clamp_t(unsigned, ec_redundancy, + 1, ARRAY_SIZE(stats->replicas)); - stats->replicas[replicas - 1].persistent_reserved += sectors; - break; - } - } + stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; + stats->replicas[replicas - 1].data[data_type] += dirty_sectors; + stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; + + return 0; } static void bucket_set_stripe(struct bch_fs *c, @@ -759,7 +770,7 @@ static void bucket_set_stripe(struct bch_fs *c, BUG_ON(ptr_stale(ca, ptr)); - old = bucket_cmpxchg(g, new, ({ + old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ new.stripe = enabled; if (journal_seq) { new.journal_seq_valid = 1; @@ -768,103 +779,143 @@ static void bucket_set_stripe(struct bch_fs *c, })); BUG_ON(old.stripe == enabled); - - bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); } } -static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, - bool inserting, - struct bch_fs_usage *fs_usage, - u64 journal_seq, unsigned flags, - bool gc) +static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, + bool inserting, + struct bch_fs_usage *fs_usage, + u64 journal_seq, unsigned flags, + bool gc) { - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - size_t idx = s.k->p.offset; - struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx); - unsigned i; + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + size_t idx = s.k->p.offset; + struct stripe *m = genradix_ptr(&c->stripes[gc], idx); + unsigned i; - BUG_ON(!m); - BUG_ON(m->alive == inserting); + if (!m || (!inserting && !m->alive)) { + bch_err_ratelimited(c, "error marking nonexistent stripe %zu", + idx); + return -1; + } - BUG_ON(atomic_read(&m->blocks_nonempty)); + if (inserting && m->alive) { + bch_err_ratelimited(c, "error marking stripe %zu: already exists", + idx); + return -1; + } - for (i = 0; i < EC_STRIPE_MAX; i++) - BUG_ON(atomic_read(&m->block_sectors[i])); + BUG_ON(atomic_read(&m->blocks_nonempty)); - if (inserting) { - m->sectors = le16_to_cpu(s.v->sectors); - m->algorithm = s.v->algorithm; - m->nr_blocks = s.v->nr_blocks; - m->nr_redundant = s.v->nr_redundant; - } + for (i = 0; i < EC_STRIPE_MAX; i++) + BUG_ON(atomic_read(&m->block_sectors[i])); + + if (inserting) { + m->sectors = le16_to_cpu(s.v->sectors); + m->algorithm = s.v->algorithm; + m->nr_blocks = s.v->nr_blocks; + m->nr_redundant = s.v->nr_redundant; + } + if (!gc) { if (inserting) bch2_stripes_heap_insert(c, m, idx); else bch2_stripes_heap_del(c, m, idx); - - bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); - break; - } + } else { + m->alive = inserting; } + + bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); + return 0; } -static void __bch2_mark_key(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, - bool inserting, s64 sectors, - struct bch_fs_usage *stats, - u64 journal_seq, unsigned flags, - bool gc) +static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, + bool inserting, s64 sectors, + struct bch_fs_usage *stats, + u64 journal_seq, unsigned flags, + bool gc) { - switch (type) { - case BKEY_TYPE_BTREE: - bch2_mark_extent(c, k, inserting - ? c->opts.btree_node_size - : -c->opts.btree_node_size, - BCH_DATA_BTREE, - stats, journal_seq, flags, gc); + int ret = 0; + + switch (k.k->type) { + case KEY_TYPE_btree_ptr: + ret = bch2_mark_extent(c, k, inserting + ? c->opts.btree_node_size + : -c->opts.btree_node_size, + BCH_DATA_BTREE, + stats, journal_seq, flags, gc); break; - case BKEY_TYPE_EXTENTS: - bch2_mark_extent(c, k, sectors, BCH_DATA_USER, - stats, journal_seq, flags, gc); + case KEY_TYPE_extent: + ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER, + stats, journal_seq, flags, gc); break; - case BKEY_TYPE_EC: - bch2_mark_stripe(c, k, inserting, - stats, journal_seq, flags, gc); + case KEY_TYPE_stripe: + ret = bch2_mark_stripe(c, k, inserting, + stats, journal_seq, flags, gc); + break; + case KEY_TYPE_reservation: { + unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; + + sectors *= replicas; + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + + stats->replicas[replicas - 1].persistent_reserved += sectors; break; + } default: break; } + + return ret; } -void bch2_mark_key(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, +int bch2_mark_key_locked(struct bch_fs *c, + struct bkey_s_c k, bool inserting, s64 sectors, struct gc_pos pos, struct bch_fs_usage *stats, u64 journal_seq, unsigned flags) { - percpu_down_read_preempt_disable(&c->usage_lock); + int ret; if (!(flags & BCH_BUCKET_MARK_GC)) { if (!stats) stats = this_cpu_ptr(c->usage[0]); - __bch2_mark_key(c, type, k, inserting, sectors, - stats, journal_seq, flags, false); + ret = __bch2_mark_key(c, k, inserting, sectors, + stats, journal_seq, flags, false); + if (ret) + return ret; } if ((flags & BCH_BUCKET_MARK_GC) || gc_visited(c, pos)) { - __bch2_mark_key(c, type, k, inserting, sectors, - this_cpu_ptr(c->usage[1]), - journal_seq, flags, true); + ret = __bch2_mark_key(c, k, inserting, sectors, + this_cpu_ptr(c->usage[1]), + journal_seq, flags, true); + if (ret) + return ret; } + return 0; +} + +int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, + bool inserting, s64 sectors, + struct gc_pos pos, + struct bch_fs_usage *stats, + u64 journal_seq, unsigned flags) +{ + int ret; + + percpu_down_read_preempt_disable(&c->usage_lock); + ret = bch2_mark_key_locked(c, k, inserting, sectors, + pos, stats, journal_seq, flags); percpu_up_read_preempt_enable(&c->usage_lock); + + return ret; } void bch2_mark_update(struct btree_insert *trans, @@ -878,15 +929,19 @@ void bch2_mark_update(struct btree_insert *trans, struct gc_pos pos = gc_pos_btree_node(b); struct bkey_packed *_k; + if (!btree_node_type_needs_gc(iter->btree_id)) + return; + + percpu_down_read_preempt_disable(&c->usage_lock); + if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) - bch2_mark_key(c, btree_node_type(b), bkey_i_to_s_c(insert->k), - true, - bpos_min(insert->k->k.p, b->key.k.p).offset - - bkey_start_offset(&insert->k->k), - pos, &stats, trans->journal_res.seq, 0); + bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, + bpos_min(insert->k->k.p, b->key.k.p).offset - + bkey_start_offset(&insert->k->k), + pos, &stats, trans->journal_res.seq, 0); while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, - KEY_TYPE_DISCARD))) { + KEY_TYPE_discard))) { struct bkey unpacked; struct bkey_s_c k; s64 sectors = 0; @@ -915,9 +970,8 @@ void bch2_mark_update(struct btree_insert *trans, sectors = k.k->p.offset - insert->k->k.p.offset; BUG_ON(sectors <= 0); - bch2_mark_key(c, btree_node_type(b), k, - true, sectors, - pos, &stats, trans->journal_res.seq, 0); + bch2_mark_key_locked(c, k, true, sectors, + pos, &stats, trans->journal_res.seq, 0); sectors = bkey_start_offset(&insert->k->k) - k.k->p.offset; @@ -927,14 +981,15 @@ void bch2_mark_update(struct btree_insert *trans, BUG_ON(sectors >= 0); } - bch2_mark_key(c, btree_node_type(b), k, - false, sectors, - pos, &stats, trans->journal_res.seq, 0); + bch2_mark_key_locked(c, k, false, sectors, + pos, &stats, trans->journal_res.seq, 0); bch2_btree_node_iter_advance(&node_iter, b); } bch2_fs_usage_apply(c, &stats, trans->disk_res, pos); + + percpu_up_read_preempt_enable(&c->usage_lock); } /* Disk reservations: */ diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 76ebe2e..17a9b44 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -219,9 +219,12 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, #define BCH_BUCKET_MARK_NOATOMIC (1 << 0) #define BCH_BUCKET_MARK_GC (1 << 1) -void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c, - bool, s64, struct gc_pos, - struct bch_fs_usage *, u64, unsigned); +int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, + bool, s64, struct gc_pos, + struct bch_fs_usage *, u64, unsigned); +int bch2_mark_key(struct bch_fs *, struct bkey_s_c, + bool, s64, struct gc_pos, + struct bch_fs_usage *, u64, unsigned); void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *); void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *); diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 77f90f7..e8a671a 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -55,7 +55,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) v->btree_id = b->btree_id; bch2_btree_keys_init(v, &c->expensive_debug_checks); - if (bch2_btree_pick_ptr(c, b, NULL, &pick) <= 0) + if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), + NULL, &pick) <= 0) return; ca = bch_dev_bkey_exists(c, pick.ptr.dev); @@ -222,8 +223,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, k = bch2_btree_iter_peek(&iter); while (k.k && !(err = btree_iter_err(k))) { - bch2_bkey_val_to_text(&PBUF(i->buf), i->c, - bkey_type(0, i->id), k); + bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); i->bytes = strlen(i->buf); BUG_ON(i->bytes >= PAGE_SIZE); i->buf[i->bytes] = '\n'; diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 3ec0b4c..9a40008 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -64,8 +64,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) const struct bch_hash_desc bch2_dirent_hash_desc = { .btree_id = BTREE_ID_DIRENTS, - .key_type = BCH_DIRENT, - .whiteout_type = BCH_DIRENT_WHITEOUT, + .key_type = KEY_TYPE_dirent, .hash_key = dirent_hash_key, .hash_bkey = dirent_hash_bkey, .cmp_key = dirent_cmp_key, @@ -74,58 +73,37 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_dirent d; + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); unsigned len; - switch (k.k->type) { - case BCH_DIRENT: - if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) - return "value too small"; - - d = bkey_s_c_to_dirent(k); - len = bch2_dirent_name_bytes(d); - - if (!len) - return "empty name"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) + return "value too small"; - /* - * older versions of bcachefs were buggy and creating dirent - * keys that were bigger than necessary: - */ - if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7)) - return "value too big"; + len = bch2_dirent_name_bytes(d); + if (!len) + return "empty name"; - if (len > BCH_NAME_MAX) - return "dirent name too big"; + /* + * older versions of bcachefs were buggy and creating dirent + * keys that were bigger than necessary: + */ + if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7)) + return "value too big"; - return NULL; - case BCH_DIRENT_WHITEOUT: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + if (len > BCH_NAME_MAX) + return "dirent name too big"; - default: - return "invalid type"; - } + return NULL; } void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_dirent d; - - switch (k.k->type) { - case BCH_DIRENT: - d = bkey_s_c_to_dirent(k); - - bch_scnmemcpy(out, d.v->d_name, - bch2_dirent_name_bytes(d)); - pr_buf(out, " -> %llu", d.v->d_inum); - break; - case BCH_DIRENT_WHITEOUT: - pr_buf(out, "whiteout"); - break; - } + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + + bch_scnmemcpy(out, d.v->d_name, + bch2_dirent_name_bytes(d)); + pr_buf(out, " -> %llu", d.v->d_inum); } static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, @@ -286,7 +264,7 @@ int bch2_dirent_rename(struct btree_trans *trans, * overwrite old_dst - just make sure to use a * whiteout when deleting src: */ - new_src->k.type = BCH_DIRENT_WHITEOUT; + new_src->k.type = KEY_TYPE_whiteout; } } else { /* Check if we need a whiteout to delete src: */ @@ -297,7 +275,7 @@ int bch2_dirent_rename(struct btree_trans *trans, return ret; if (ret) - new_src->k.type = BCH_DIRENT_WHITEOUT; + new_src->k.type = KEY_TYPE_whiteout; } } @@ -360,7 +338,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) if (k.k->p.inode > dir_inum) break; - if (k.k->type == BCH_DIRENT) { + if (k.k->type == KEY_TYPE_dirent) { ret = -ENOTEMPTY; break; } @@ -384,7 +362,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(inode->v.i_ino, ctx->pos), 0, k) { - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; dirent = bkey_s_c_to_dirent(k); diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index a57a538..ed09d30 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -8,7 +8,7 @@ extern const struct bch_hash_desc bch2_dirent_hash_desc; const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_dirent_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_dirent (struct bkey_ops) { \ .key_invalid = bch2_dirent_invalid, \ .val_to_text = bch2_dirent_to_text, \ } diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 02c51ea..c8115f6 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -122,49 +122,39 @@ static void *stripe_csum(struct bch_stripe *s, unsigned dev, unsigned csum_idx) return csums + (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes; } -const char *bch2_ec_key_invalid(const struct bch_fs *c, struct bkey_s_c k) +const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k) { + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + if (k.k->p.inode) return "invalid stripe key"; - switch (k.k->type) { - case BCH_STRIPE: { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - - if (bkey_val_bytes(k.k) < sizeof(*s)) - return "incorrect value size"; + if (bkey_val_bytes(k.k) < sizeof(*s)) + return "incorrect value size"; - if (bkey_val_u64s(k.k) != stripe_val_u64s(s)) - return "incorrect value size"; + if (bkey_val_u64s(k.k) != stripe_val_u64s(s)) + return "incorrect value size"; - return NULL; - } - default: - return "invalid type"; - } + return NULL; } -void bch2_ec_key_to_text(struct printbuf *out, struct bch_fs *c, +void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - switch (k.k->type) { - case BCH_STRIPE: { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - unsigned i; - - pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", - s->algorithm, - le16_to_cpu(s->sectors), - s->nr_blocks - s->nr_redundant, - s->nr_redundant, - s->csum_type, - 1U << s->csum_granularity_bits); - - for (i = 0; i < s->nr_blocks; i++) - pr_buf(out, " %u:%llu", s->ptrs[i].dev, - (u64) s->ptrs[i].offset); - } - } + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned i; + + pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", + s->algorithm, + le16_to_cpu(s->sectors), + s->nr_blocks - s->nr_redundant, + s->nr_redundant, + s->csum_type, + 1U << s->csum_granularity_bits); + + for (i = 0; i < s->nr_blocks; i++) + pr_buf(out, " %u:%llu", s->ptrs[i].dev, + (u64) s->ptrs[i].offset); } static int ptr_matches_stripe(struct bch_fs *c, @@ -453,7 +443,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) POS(0, stripe_idx), BTREE_ITER_SLOTS); k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k) || k.k->type != BCH_STRIPE) { + if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) { __bcache_io_error(c, "error doing reconstruct read: stripe not found"); kfree(buf); @@ -529,7 +519,7 @@ err: return ret; } -/* ec_stripe bucket accounting: */ +/* stripe bucket accounting: */ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) { @@ -550,7 +540,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) free_heap(&n); } - if (!genradix_ptr_alloc(&c->ec_stripes, idx, gfp)) + if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp)) + return -ENOMEM; + + if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING && + !genradix_ptr_alloc(&c->stripes[1], idx, gfp)) return -ENOMEM; return 0; @@ -591,27 +585,26 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h, { struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap); - genradix_ptr(&c->ec_stripes, h->data[i].idx)->heap_idx = i; + genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i; } static void heap_verify_backpointer(struct bch_fs *c, size_t idx) { ec_stripes_heap *h = &c->ec_stripes_heap; - struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx); + struct stripe *m = genradix_ptr(&c->stripes[0], idx); BUG_ON(!m->alive); BUG_ON(m->heap_idx >= h->used); BUG_ON(h->data[m->heap_idx].idx != idx); } -static inline unsigned stripe_entry_blocks(struct ec_stripe *m) +static inline unsigned stripe_entry_blocks(struct stripe *m) { - return atomic_read(&m->pin) - ? UINT_MAX : atomic_read(&m->blocks_nonempty); + return atomic_read(&m->blocks_nonempty); } void bch2_stripes_heap_update(struct bch_fs *c, - struct ec_stripe *m, size_t idx) + struct stripe *m, size_t idx) { ec_stripes_heap *h = &c->ec_stripes_heap; bool queue_delete; @@ -645,7 +638,7 @@ void bch2_stripes_heap_update(struct bch_fs *c, } void bch2_stripes_heap_del(struct bch_fs *c, - struct ec_stripe *m, size_t idx) + struct stripe *m, size_t idx) { spin_lock(&c->ec_stripes_heap_lock); heap_verify_backpointer(c, idx); @@ -658,7 +651,7 @@ void bch2_stripes_heap_del(struct bch_fs *c, } void bch2_stripes_heap_insert(struct bch_fs *c, - struct ec_stripe *m, size_t idx) + struct stripe *m, size_t idx) { spin_lock(&c->ec_stripes_heap_lock); @@ -677,7 +670,9 @@ void bch2_stripes_heap_insert(struct bch_fs *c, spin_unlock(&c->ec_stripes_heap_lock); } -static void ec_stripe_delete(struct bch_fs *c, unsigned idx) +/* stripe deletion */ + +static void ec_stripe_delete(struct bch_fs *c, size_t idx) { struct btree_iter iter; struct bch_stripe *v = NULL; @@ -689,7 +684,7 @@ static void ec_stripe_delete(struct bch_fs *c, unsigned idx) POS(0, idx), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k) || k.k->type != BCH_STRIPE) + if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) goto out; v = kmalloc(bkey_val_bytes(k.k), GFP_KERNEL); @@ -716,6 +711,7 @@ static void ec_stripe_delete_work(struct work_struct *work) ssize_t idx; down_read(&c->gc_lock); + mutex_lock(&c->ec_stripe_create_lock); while (1) { spin_lock(&c->ec_stripes_heap_lock); @@ -728,13 +724,15 @@ static void ec_stripe_delete_work(struct work_struct *work) ec_stripe_delete(c, idx); } + mutex_unlock(&c->ec_stripe_create_lock); up_read(&c->gc_lock); } +/* stripe creation: */ + static int ec_stripe_bkey_insert(struct bch_fs *c, struct bkey_i_stripe *stripe) { - struct ec_stripe *m; struct btree_iter iter; struct bkey_s_c k; int ret; @@ -754,18 +752,13 @@ retry: return bch2_btree_iter_unlock(&iter) ?: -ENOSPC; found_slot: - mutex_lock(&c->ec_stripes_lock); ret = ec_stripe_mem_alloc(c, &iter); - mutex_unlock(&c->ec_stripes_lock); if (ret == -EINTR) goto retry; if (ret) return ret; - m = genradix_ptr(&c->ec_stripes, iter.pos.offset); - atomic_inc(&m->pin); - stripe->k.p = iter.pos; ret = bch2_btree_insert_at(c, NULL, NULL, @@ -774,14 +767,9 @@ found_slot: BTREE_INSERT_ENTRY(&iter, &stripe->k_i)); bch2_btree_iter_unlock(&iter); - if (ret) - atomic_dec(&m->pin); - return ret; } -/* stripe creation: */ - static void extent_stripe_ptr_add(struct bkey_s_extent e, struct ec_stripe_buf *s, struct bch_extent_ptr *ptr, @@ -857,7 +845,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c, */ static void ec_stripe_create(struct ec_stripe_new *s) { - struct ec_stripe *ec_stripe; struct bch_fs *c = s->c; struct open_bucket *ob; struct bkey_i *k; @@ -897,10 +884,12 @@ static void ec_stripe_create(struct ec_stripe_new *s) goto err_put_writes; } + mutex_lock(&c->ec_stripe_create_lock); + ret = ec_stripe_bkey_insert(c, &s->stripe.key); if (ret) { bch_err(c, "error creating stripe: error creating stripe key"); - goto err_put_writes; + goto err_unlock; } for_each_keylist_key(&s->keys, k) { @@ -909,12 +898,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) break; } - ec_stripe = genradix_ptr(&c->ec_stripes, s->stripe.key.k.p.offset); - - atomic_dec(&ec_stripe->pin); - bch2_stripes_heap_update(c, ec_stripe, - s->stripe.key.k.p.offset); - +err_unlock: + mutex_unlock(&c->ec_stripe_create_lock); err_put_writes: percpu_ref_put(&c->writes); err: @@ -1221,7 +1206,7 @@ unlock: mutex_unlock(&c->ec_new_stripe_lock); } -int bch2_fs_ec_start(struct bch_fs *c) +int bch2_ec_mem_alloc(struct bch_fs *c, bool gc) { struct btree_iter iter; struct bkey_s_c k; @@ -1237,19 +1222,25 @@ int bch2_fs_ec_start(struct bch_fs *c) if (ret) return ret; - if (!init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx), + if (!gc && + !init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx), GFP_KERNEL)) return -ENOMEM; #if 0 - ret = genradix_prealloc(&c->ec_stripes, idx, GFP_KERNEL); + ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL); #else for (i = 0; i < idx; i++) - if (!genradix_ptr_alloc(&c->ec_stripes, i, GFP_KERNEL)) + if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL)) return -ENOMEM; #endif return 0; } +int bch2_fs_ec_start(struct bch_fs *c) +{ + return bch2_ec_mem_alloc(c, false); +} + void bch2_fs_ec_exit(struct bch_fs *c) { struct ec_stripe_head *h; @@ -1270,7 +1261,7 @@ void bch2_fs_ec_exit(struct bch_fs *c) } free_heap(&c->ec_stripes_heap); - genradix_free(&c->ec_stripes); + genradix_free(&c->stripes[0]); bioset_exit(&c->ec_bioset); } diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 13b875a..c728c52 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -4,13 +4,13 @@ #include "ec_types.h" #include "keylist_types.h" -const char *bch2_ec_key_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_ec_key_to_text(struct printbuf *, struct bch_fs *, +const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_ec_ops (struct bkey_ops) { \ - .key_invalid = bch2_ec_key_invalid, \ - .val_to_text = bch2_ec_key_to_text, \ +#define bch2_bkey_ops_stripe (struct bkey_ops) { \ + .key_invalid = bch2_stripe_invalid, \ + .val_to_text = bch2_stripe_to_text, \ } struct bch_read_bio; @@ -92,14 +92,16 @@ void bch2_ec_stripe_head_put(struct ec_stripe_head *); struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned, unsigned, unsigned); -void bch2_stripes_heap_update(struct bch_fs *, struct ec_stripe *, size_t); -void bch2_stripes_heap_del(struct bch_fs *, struct ec_stripe *, size_t); -void bch2_stripes_heap_insert(struct bch_fs *, struct ec_stripe *, size_t); +void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t); +void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t); +void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t); void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_ec_flush_new_stripes(struct bch_fs *); +int bch2_ec_mem_alloc(struct bch_fs *, bool); + int bch2_fs_ec_start(struct bch_fs *); void bch2_fs_ec_exit(struct bch_fs *); diff --git a/libbcachefs/ec_types.h b/libbcachefs/ec_types.h index feb3601..d042981 100644 --- a/libbcachefs/ec_types.h +++ b/libbcachefs/ec_types.h @@ -5,7 +5,7 @@ #define EC_STRIPE_MAX 16 -struct ec_stripe { +struct stripe { size_t heap_idx; u16 sectors; @@ -15,7 +15,6 @@ struct ec_stripe { u8 nr_redundant; u8 alive; - atomic_t pin; atomic_t blocks_nonempty; atomic_t block_sectors[EC_STRIPE_MAX]; }; diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index ebaf390..dc3fbfb 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -27,209 +27,270 @@ #include -static void sort_key_next(struct btree_node_iter_large *iter, - struct btree *b, - struct btree_node_iter_set *i) +unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k) { - i->k += __btree_node_offset_to_key(b, i->k)->u64s; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + unsigned nr_ptrs = 0; + + bkey_for_each_ptr(p, ptr) + nr_ptrs++; - if (i->k == i->end) - *i = iter->data[--iter->used]; + return nr_ptrs; } -/* - * Returns true if l > r - unless l == r, in which case returns true if l is - * older than r. - * - * Necessary for btree_sort_fixup() - if there are multiple keys that compare - * equal in different sets, we have to process them newest to oldest. - */ -#define key_sort_cmp(h, l, r) \ -({ \ - bkey_cmp_packed(b, \ - __btree_node_offset_to_key(b, (l).k), \ - __btree_node_offset_to_key(b, (r).k)) \ - \ - ?: (l).k - (r).k; \ -}) - -static inline bool should_drop_next_key(struct btree_node_iter_large *iter, - struct btree *b) +unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c k) { - struct btree_node_iter_set *l = iter->data, *r = iter->data + 1; - struct bkey_packed *k = __btree_node_offset_to_key(b, l->k); - - if (bkey_whiteout(k)) - return true; + unsigned nr_ptrs = 0; - if (iter->used < 2) - return false; + switch (k.k->type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: { + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; - if (iter->used > 2 && - key_sort_cmp(iter, r[0], r[1]) >= 0) - r++; + bkey_for_each_ptr(p, ptr) + nr_ptrs += !ptr->cached; + BUG_ON(!nr_ptrs); + break; + } + case KEY_TYPE_reservation: + nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas; + break; + } - /* - * key_sort_cmp() ensures that when keys compare equal the older key - * comes first; so if l->k compares equal to r->k then l->k is older and - * should be dropped. - */ - return !bkey_cmp_packed(b, - __btree_node_offset_to_key(b, l->k), - __btree_node_offset_to_key(b, r->k)); + return nr_ptrs; } -struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst, - struct btree *b, - struct btree_node_iter_large *iter) +static unsigned bch2_extent_ptr_durability(struct bch_fs *c, + struct extent_ptr_decoded p) { - struct bkey_packed *out = dst->start; - struct btree_nr_keys nr; + unsigned i, durability = 0; + struct bch_dev *ca; - memset(&nr, 0, sizeof(nr)); + if (p.ptr.cached) + return 0; - heap_resort(iter, key_sort_cmp, NULL); + ca = bch_dev_bkey_exists(c, p.ptr.dev); - while (!bch2_btree_node_iter_large_end(iter)) { - if (!should_drop_next_key(iter, b)) { - struct bkey_packed *k = - __btree_node_offset_to_key(b, iter->data->k); + if (ca->mi.state != BCH_MEMBER_STATE_FAILED) + durability = max_t(unsigned, durability, ca->mi.durability); - bkey_copy(out, k); - btree_keys_account_key_add(&nr, 0, out); - out = bkey_next(out); - } + for (i = 0; i < p.ec_nr; i++) { + struct stripe *s = + genradix_ptr(&c->stripes[0], p.idx); - sort_key_next(iter, b, iter->data); - heap_sift_down(iter, 0, key_sort_cmp, NULL); + if (WARN_ON(!s)) + continue; + + durability = max_t(unsigned, durability, s->nr_redundant); } - dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - return nr; + return durability; } -/* Common among btree and extent ptrs */ +unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned durability = 0; -const struct bch_extent_ptr * -bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev) + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + durability += bch2_extent_ptr_durability(c, p); + + return durability; +} + +static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, + unsigned dev) { - const struct bch_extent_ptr *ptr; + struct bch_dev_io_failures *i; - extent_for_each_ptr(e, ptr) - if (ptr->dev == dev) - return ptr; + for (i = f->devs; i < f->devs + f->nr; i++) + if (i->dev == dev) + return i; return NULL; } -void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev) +void bch2_mark_io_failure(struct bch_io_failures *failed, + struct extent_ptr_decoded *p) { - struct bch_extent_ptr *ptr; + struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); - bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev); + if (!f) { + BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); + + f = &failed->devs[failed->nr++]; + f->dev = p->ptr.dev; + f->idx = p->idx; + f->nr_failed = 1; + f->nr_retries = 0; + } else if (p->idx != f->idx) { + f->idx = p->idx; + f->nr_failed = 1; + f->nr_retries = 0; + } else { + f->nr_failed++; + } } -const struct bch_extent_ptr * -bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group) +/* + * returns true if p1 is better than p2: + */ +static inline bool ptr_better(struct bch_fs *c, + const struct extent_ptr_decoded p1, + const struct extent_ptr_decoded p2) { - const struct bch_extent_ptr *ptr; + if (likely(!p1.idx && !p2.idx)) { + struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev); + struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev); - extent_for_each_ptr(e, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + u64 l1 = atomic64_read(&dev1->cur_latency[READ]); + u64 l2 = atomic64_read(&dev2->cur_latency[READ]); - if (ca->mi.group && - ca->mi.group - 1 == group) - return ptr; + /* Pick at random, biased in favor of the faster device: */ + + return bch2_rand_range(l1 + l2) > l1; } - return NULL; + if (force_reconstruct_read(c)) + return p1.idx > p2.idx; + + return p1.idx < p2.idx; } -const struct bch_extent_ptr * -bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target) +/* + * This picks a non-stale pointer, preferably from a device other than @avoid. + * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to + * other devices, it will still pick a pointer from avoid. + */ +int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, + struct bch_io_failures *failed, + struct extent_ptr_decoded *pick) { - const struct bch_extent_ptr *ptr; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bch_dev_io_failures *f; + struct bch_dev *ca; + int ret = 0; - extent_for_each_ptr(e, ptr) - if (bch2_dev_in_target(c, ptr->dev, target) && - (!ptr->cached || - !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) - return ptr; + if (k.k->type == KEY_TYPE_error) + return -EIO; - return NULL; -} + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + ca = bch_dev_bkey_exists(c, p.ptr.dev); -unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent e) -{ - const struct bch_extent_ptr *ptr; - unsigned nr_ptrs = 0; + /* + * If there are any dirty pointers it's an error if we can't + * read: + */ + if (!ret && !p.ptr.cached) + ret = -EIO; - extent_for_each_ptr(e, ptr) - nr_ptrs++; + if (p.ptr.cached && ptr_stale(ca, &p.ptr)) + continue; - return nr_ptrs; + f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; + if (f) + p.idx = f->nr_failed < f->nr_retries + ? f->idx + : f->idx + 1; + + if (!p.idx && + !bch2_dev_is_readable(ca)) + p.idx++; + + if (force_reconstruct_read(c) && + !p.idx && p.ec_nr) + p.idx++; + + if (p.idx >= p.ec_nr + 1) + continue; + + if (ret > 0 && !ptr_better(c, p, *pick)) + continue; + + *pick = p; + ret = 1; + } + + return ret; } -unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k) +void bch2_bkey_append_ptr(struct bkey_i *k, + struct bch_extent_ptr ptr) { - struct bkey_s_c_extent e; - const struct bch_extent_ptr *ptr; - unsigned nr_ptrs = 0; + EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev)); - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - e = bkey_s_c_to_extent(k); + switch (k->k.type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: + EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX); - extent_for_each_ptr(e, ptr) - nr_ptrs += !ptr->cached; - break; + ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; - case BCH_RESERVATION: - nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas; + memcpy((void *) &k->v + bkey_val_bytes(&k->k), + &ptr, + sizeof(ptr)); + k->u64s++; break; + default: + BUG(); } - - return nr_ptrs; } -static unsigned bch2_extent_ptr_durability(struct bch_fs *c, - struct extent_ptr_decoded p) +void bch2_bkey_drop_device(struct bkey_s k, unsigned dev) { - unsigned i, durability = 0; - struct bch_dev *ca; + struct bch_extent_ptr *ptr; - if (p.ptr.cached) - return 0; + bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev); +} - ca = bch_dev_bkey_exists(c, p.ptr.dev); +/* extent specific utility code */ - if (ca->mi.state != BCH_MEMBER_STATE_FAILED) - durability = max_t(unsigned, durability, ca->mi.durability); +const struct bch_extent_ptr * +bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev) +{ + const struct bch_extent_ptr *ptr; - for (i = 0; i < p.ec_nr; i++) { - struct ec_stripe *s = - genradix_ptr(&c->ec_stripes, p.idx); + extent_for_each_ptr(e, ptr) + if (ptr->dev == dev) + return ptr; - if (WARN_ON(!s)) - continue; + return NULL; +} - durability = max_t(unsigned, durability, s->nr_redundant); +const struct bch_extent_ptr * +bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group) +{ + const struct bch_extent_ptr *ptr; + + extent_for_each_ptr(e, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + + if (ca->mi.group && + ca->mi.group - 1 == group) + return ptr; } - return durability; + return NULL; } -unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e) +const struct bch_extent_ptr * +bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target) { - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - unsigned durability = 0; + const struct bch_extent_ptr *ptr; - extent_for_each_ptr_decode(e, p, entry) - durability += bch2_extent_ptr_durability(c, p); + extent_for_each_ptr(e, ptr) + if (bch2_dev_in_target(c, ptr->dev, target) && + (!ptr->cached || + !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) + return ptr; - return durability; + return NULL; } unsigned bch2_extent_is_compressed(struct bkey_s_c k) @@ -237,8 +298,7 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k) unsigned ret = 0; switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { + case KEY_TYPE_extent: { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -270,10 +330,10 @@ bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e, return false; } -static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e, +static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, union bch_extent_entry *entry) { - union bch_extent_entry *i = e.v->start; + union bch_extent_entry *i = ptrs.start; if (i == entry) return NULL; @@ -283,23 +343,24 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e, return i; } -union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e, - struct bch_extent_ptr *ptr) +union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, + struct bch_extent_ptr *ptr) { + struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *dst, *src, *prev; bool drop_crc = true; - EBUG_ON(ptr < &e.v->start->ptr || - ptr >= &extent_entry_last(e)->ptr); + EBUG_ON(ptr < &ptrs.start->ptr || + ptr >= &ptrs.end->ptr); EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr); src = extent_entry_next(to_entry(ptr)); - if (src != extent_entry_last(e) && + if (src != ptrs.end && !extent_entry_is_crc(src)) drop_crc = false; dst = to_entry(ptr); - while ((prev = extent_entry_prev(e, dst))) { + while ((prev = extent_entry_prev(ptrs, dst))) { if (extent_entry_is_ptr(prev)) break; @@ -313,8 +374,8 @@ union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e, } memmove_u64s_down(dst, src, - (u64 *) extent_entry_last(e) - (u64 *) src); - e.k->u64s -= (u64 *) src - (u64 *) dst; + (u64 *) ptrs.end - (u64 *) src); + k.k->u64s -= (u64 *) src - (u64 *) dst; return dst; } @@ -381,7 +442,7 @@ found: restart_narrow_pointers: extent_for_each_ptr_decode(extent_i_to_s(e), p, i) if (can_narrow_crc(p.crc, n)) { - bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr); + bch2_bkey_drop_ptr(extent_i_to_s(e).s, &i->ptr); p.ptr.offset += p.crc.offset; p.crc = n; bch2_extent_ptr_decoded_append(e, &p); @@ -406,66 +467,47 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l, bch2_crc_cmp(l.csum, r.csum)); } -static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e) -{ - struct bch_extent_ptr *ptr; - - bch2_extent_drop_ptrs(e, ptr, - ptr->cached && - ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); -} - -bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k) -{ - return bch2_extent_normalize(c, k); -} - void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) { - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - union bch_extent_entry *entry; - u64 *d = (u64 *) bkeyp_val(f, k); - unsigned i; + union bch_extent_entry *entry; + u64 *d = (u64 *) bkeyp_val(f, k); + unsigned i; - for (i = 0; i < bkeyp_val_u64s(f, k); i++) - d[i] = swab64(d[i]); + for (i = 0; i < bkeyp_val_u64s(f, k); i++) + d[i] = swab64(d[i]); - for (entry = (union bch_extent_entry *) d; - entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k)); - entry = extent_entry_next(entry)) { - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - break; - case BCH_EXTENT_ENTRY_crc32: - entry->crc32.csum = swab32(entry->crc32.csum); - break; - case BCH_EXTENT_ENTRY_crc64: - entry->crc64.csum_hi = swab16(entry->crc64.csum_hi); - entry->crc64.csum_lo = swab64(entry->crc64.csum_lo); - break; - case BCH_EXTENT_ENTRY_crc128: - entry->crc128.csum.hi = (__force __le64) - swab64((__force u64) entry->crc128.csum.hi); - entry->crc128.csum.lo = (__force __le64) - swab64((__force u64) entry->crc128.csum.lo); - break; - case BCH_EXTENT_ENTRY_stripe_ptr: - break; - } + for (entry = (union bch_extent_entry *) d; + entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k)); + entry = extent_entry_next(entry)) { + switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + break; + case BCH_EXTENT_ENTRY_crc32: + entry->crc32.csum = swab32(entry->crc32.csum); + break; + case BCH_EXTENT_ENTRY_crc64: + entry->crc64.csum_hi = swab16(entry->crc64.csum_hi); + entry->crc64.csum_lo = swab64(entry->crc64.csum_lo); + break; + case BCH_EXTENT_ENTRY_crc128: + entry->crc128.csum.hi = (__force __le64) + swab64((__force u64) entry->crc128.csum.hi); + entry->crc128.csum.lo = (__force __le64) + swab64((__force u64) entry->crc128.csum.lo); + break; + case BCH_EXTENT_ENTRY_stripe_ptr: + break; } - break; - } } } static const char *extent_ptr_invalid(const struct bch_fs *c, - struct bkey_s_c_extent e, + struct bkey_s_c k, const struct bch_extent_ptr *ptr, unsigned size_ondisk, bool metadata) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr2; struct bch_dev *ca; @@ -477,7 +519,7 @@ static const char *extent_ptr_invalid(const struct bch_fs *c, if (!ca) return "pointer to invalid device"; - extent_for_each_ptr(e, ptr2) + bkey_for_each_ptr(ptrs, ptr2) if (ptr != ptr2 && ptr->dev == ptr2->dev) return "multiple pointers to same device"; @@ -494,9 +536,10 @@ static const char *extent_ptr_invalid(const struct bch_fs *c, return NULL; } -static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c_extent e) +static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct bch_extent_crc_unpacked crc; const struct bch_extent_ptr *ptr; @@ -504,7 +547,7 @@ static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca; bool first = true; - extent_for_each_entry(e, entry) { + bkey_extent_entry_for_each(ptrs, entry) { if (!first) pr_buf(out, " "); @@ -524,184 +567,65 @@ static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c, case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc64: case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); + crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u", crc.compressed_size, crc.uncompressed_size, - crc.offset, crc.nonce, - crc.csum_type, - crc.compression_type); - break; - case BCH_EXTENT_ENTRY_stripe_ptr: - ec = &entry->stripe_ptr; - - pr_buf(out, "ec: idx %llu block %u", - (u64) ec->idx, ec->block); - break; - default: - pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); - goto out; - } - - first = false; - } -out: - if (bkey_extent_is_cached(e.k)) - pr_buf(out, " cached"); -} - -static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, - unsigned dev) -{ - struct bch_dev_io_failures *i; - - for (i = f->devs; i < f->devs + f->nr; i++) - if (i->dev == dev) - return i; - - return NULL; -} - -void bch2_mark_io_failure(struct bch_io_failures *failed, - struct extent_ptr_decoded *p) -{ - struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); - - if (!f) { - BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); - - f = &failed->devs[failed->nr++]; - f->dev = p->ptr.dev; - f->idx = p->idx; - f->nr_failed = 1; - f->nr_retries = 0; - } else if (p->idx != f->idx) { - f->idx = p->idx; - f->nr_failed = 1; - f->nr_retries = 0; - } else { - f->nr_failed++; - } -} - -/* - * returns true if p1 is better than p2: - */ -static inline bool ptr_better(struct bch_fs *c, - const struct extent_ptr_decoded p1, - const struct extent_ptr_decoded p2) -{ - if (likely(!p1.idx && !p2.idx)) { - struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev); - struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev); - - u64 l1 = atomic64_read(&dev1->cur_latency[READ]); - u64 l2 = atomic64_read(&dev2->cur_latency[READ]); - - /* Pick at random, biased in favor of the faster device: */ - - return bch2_rand_range(l1 + l2) > l1; - } - - if (force_reconstruct_read(c)) - return p1.idx > p2.idx; - - return p1.idx < p2.idx; -} - -static int extent_pick_read_device(struct bch_fs *c, - struct bkey_s_c_extent e, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) -{ - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - struct bch_dev_io_failures *f; - struct bch_dev *ca; - int ret = 0; - - extent_for_each_ptr_decode(e, p, entry) { - ca = bch_dev_bkey_exists(c, p.ptr.dev); - - if (p.ptr.cached && ptr_stale(ca, &p.ptr)) - continue; - - f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; - if (f) - p.idx = f->nr_failed < f->nr_retries - ? f->idx - : f->idx + 1; - - if (!p.idx && - !bch2_dev_is_readable(ca)) - p.idx++; - - if (force_reconstruct_read(c) && - !p.idx && p.ec_nr) - p.idx++; - - if (p.idx >= p.ec_nr + 1) - continue; + crc.offset, crc.nonce, + crc.csum_type, + crc.compression_type); + break; + case BCH_EXTENT_ENTRY_stripe_ptr: + ec = &entry->stripe_ptr; - if (ret && !ptr_better(c, p, *pick)) - continue; + pr_buf(out, "ec: idx %llu block %u", + (u64) ec->idx, ec->block); + break; + default: + pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); + return; + } - *pick = p; - ret = 1; + first = false; } - - return ret; } /* Btree ptrs */ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (bkey_extent_is_cached(k.k)) - return "cached"; - - if (k.k->size) - return "nonzero key size"; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + const struct bch_extent_ptr *ptr; + const char *reason; if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) return "value too big"; - switch (k.k->type) { - case BCH_EXTENT: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - const struct bch_extent_ptr *ptr; - const char *reason; - - extent_for_each_entry(e, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) - return "invalid extent entry type"; - - if (!extent_entry_is_ptr(entry)) - return "has non ptr field"; - } - - extent_for_each_ptr(e, ptr) { - reason = extent_ptr_invalid(c, e, ptr, - c->opts.btree_node_size, - true); - if (reason) - return reason; - } + bkey_extent_entry_for_each(ptrs, entry) { + if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) + return "invalid extent entry type"; - return NULL; + if (!extent_entry_is_ptr(entry)) + return "has non ptr field"; } - default: - return "invalid value type"; + bkey_for_each_ptr(ptrs, ptr) { + reason = extent_ptr_invalid(c, k, ptr, + c->opts.btree_node_size, + true); + if (reason) + return reason; } + + return NULL; } void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; unsigned seq; const char *err; @@ -711,7 +635,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, unsigned replicas = 0; bool bad; - extent_for_each_ptr(e, ptr) { + bkey_for_each_ptr(ptrs, ptr) { ca = bch_dev_bkey_exists(c, ptr->dev); replicas++; @@ -737,9 +661,8 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, } if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, btree_node_type(b), - e.s_c, false)) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); + !bch2_bkey_replicas_marked(c, k, false)) { + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "btree key bad (replicas not marked in superblock):\n%s", buf); @@ -748,7 +671,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, return; err: - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "%s btree pointer %s: bucket %zi gen %i mark %08x", err, buf, PTR_BUCKET_NR(ca, ptr), mark.gen, (unsigned) mark.v.counter); @@ -759,25 +682,16 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, { const char *invalid; - if (bkey_extent_is_data(k.k)) - extent_print_ptrs(out, c, bkey_s_c_to_extent(k)); + bkey_ptrs_to_text(out, c, k); invalid = bch2_btree_ptr_invalid(c, k); if (invalid) pr_buf(out, " invalid: %s", invalid); } -int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) -{ - return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key), - failed, pick); -} - /* Extents */ -static bool __bch2_cut_front(struct bpos where, struct bkey_s k) +bool __bch2_cut_front(struct bpos where, struct bkey_s k) { u64 len = 0; @@ -795,7 +709,7 @@ static bool __bch2_cut_front(struct bpos where, struct bkey_s k) * cause offset to point to the next bucket: */ if (!len) - k.k->type = KEY_TYPE_DELETED; + k.k->type = KEY_TYPE_deleted; else if (bkey_extent_is_data(k.k)) { struct bkey_s_extent e = bkey_s_to_extent(k); union bch_extent_entry *entry; @@ -830,11 +744,6 @@ static bool __bch2_cut_front(struct bpos where, struct bkey_s k) return true; } -bool bch2_cut_front(struct bpos where, struct bkey_i *k) -{ - return __bch2_cut_front(where, bkey_i_to_s(k)); -} - bool bch2_cut_back(struct bpos where, struct bkey *k) { u64 len = 0; @@ -852,7 +761,7 @@ bool bch2_cut_back(struct bpos where, struct bkey *k) k->size = len; if (!len) - k->type = KEY_TYPE_DELETED; + k->type = KEY_TYPE_deleted; return true; } @@ -870,24 +779,6 @@ void bch2_key_resize(struct bkey *k, k->size = new_size; } -/* - * In extent_sort_fix_overlapping(), insert_fixup_extent(), - * extent_merge_inline() - we're modifying keys in place that are packed. To do - * that we have to unpack the key, modify the unpacked key - then this - * copies/repacks the unpacked to the original as necessary. - */ -static void extent_save(struct btree *b, struct bkey_packed *dst, - struct bkey *src) -{ - struct bkey_format *f = &b->format; - struct bkey_i *dst_unpacked; - - if ((dst_unpacked = packed_to_bkey(dst))) - dst_unpacked->k = *src; - else - BUG_ON(!bch2_bkey_pack_key(dst, src, f)); -} - static bool extent_i_save(struct btree *b, struct bkey_packed *dst, struct bkey_i *src) { @@ -906,170 +797,6 @@ static bool extent_i_save(struct btree *b, struct bkey_packed *dst, return true; } -/* - * If keys compare equal, compare by pointer order: - * - * Necessary for sort_fix_overlapping() - if there are multiple keys that - * compare equal in different sets, we have to process them newest to oldest. - */ -#define extent_sort_cmp(h, l, r) \ -({ \ - struct bkey _ul = bkey_unpack_key(b, \ - __btree_node_offset_to_key(b, (l).k)); \ - struct bkey _ur = bkey_unpack_key(b, \ - __btree_node_offset_to_key(b, (r).k)); \ - \ - bkey_cmp(bkey_start_pos(&_ul), \ - bkey_start_pos(&_ur)) ?: (r).k - (l).k; \ -}) - -static inline void extent_sort_sift(struct btree_node_iter_large *iter, - struct btree *b, size_t i) -{ - heap_sift_down(iter, i, extent_sort_cmp, NULL); -} - -static inline void extent_sort_next(struct btree_node_iter_large *iter, - struct btree *b, - struct btree_node_iter_set *i) -{ - sort_key_next(iter, b, i); - heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL); -} - -static void extent_sort_append(struct bch_fs *c, - struct btree *b, - struct btree_nr_keys *nr, - struct bkey_packed *start, - struct bkey_packed **prev, - struct bkey_packed *k) -{ - struct bkey_format *f = &b->format; - BKEY_PADDED(k) tmp; - - if (bkey_whiteout(k)) - return; - - bch2_bkey_unpack(b, &tmp.k, k); - - if (*prev && - bch2_extent_merge(c, b, (void *) *prev, &tmp.k)) - return; - - if (*prev) { - bch2_bkey_pack(*prev, (void *) *prev, f); - - btree_keys_account_key_add(nr, 0, *prev); - *prev = bkey_next(*prev); - } else { - *prev = start; - } - - bkey_copy(*prev, &tmp.k); -} - -struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, - struct bset *dst, - struct btree *b, - struct btree_node_iter_large *iter) -{ - struct bkey_format *f = &b->format; - struct btree_node_iter_set *_l = iter->data, *_r; - struct bkey_packed *prev = NULL, *out, *lk, *rk; - struct bkey l_unpacked, r_unpacked; - struct bkey_s l, r; - struct btree_nr_keys nr; - - memset(&nr, 0, sizeof(nr)); - - heap_resort(iter, extent_sort_cmp, NULL); - - while (!bch2_btree_node_iter_large_end(iter)) { - lk = __btree_node_offset_to_key(b, _l->k); - - if (iter->used == 1) { - extent_sort_append(c, b, &nr, dst->start, &prev, lk); - extent_sort_next(iter, b, _l); - continue; - } - - _r = iter->data + 1; - if (iter->used > 2 && - extent_sort_cmp(iter, _r[0], _r[1]) >= 0) - _r++; - - rk = __btree_node_offset_to_key(b, _r->k); - - l = __bkey_disassemble(b, lk, &l_unpacked); - r = __bkey_disassemble(b, rk, &r_unpacked); - - /* If current key and next key don't overlap, just append */ - if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) { - extent_sort_append(c, b, &nr, dst->start, &prev, lk); - extent_sort_next(iter, b, _l); - continue; - } - - /* Skip 0 size keys */ - if (!r.k->size) { - extent_sort_next(iter, b, _r); - continue; - } - - /* - * overlap: keep the newer key and trim the older key so they - * don't overlap. comparing pointers tells us which one is - * newer, since the bsets are appended one after the other. - */ - - /* can't happen because of comparison func */ - BUG_ON(_l->k < _r->k && - !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k))); - - if (_l->k > _r->k) { - /* l wins, trim r */ - if (bkey_cmp(l.k->p, r.k->p) >= 0) { - sort_key_next(iter, b, _r); - } else { - __bch2_cut_front(l.k->p, r); - extent_save(b, rk, r.k); - } - - extent_sort_sift(iter, b, _r - iter->data); - } else if (bkey_cmp(l.k->p, r.k->p) > 0) { - BKEY_PADDED(k) tmp; - - /* - * r wins, but it overlaps in the middle of l - split l: - */ - bkey_reassemble(&tmp.k, l.s_c); - bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k); - - __bch2_cut_front(r.k->p, l); - extent_save(b, lk, l.k); - - extent_sort_sift(iter, b, 0); - - extent_sort_append(c, b, &nr, dst->start, &prev, - bkey_to_packed(&tmp.k)); - } else { - bch2_cut_back(bkey_start_pos(r.k), l.k); - extent_save(b, lk, l.k); - } - } - - if (prev) { - bch2_bkey_pack(prev, (void *) prev, f); - btree_keys_account_key_add(&nr, 0, prev); - out = bkey_next(prev); - } else { - out = dst->start; - } - - dst->u64s = cpu_to_le16((u64 *) out - dst->_data); - return nr; -} - struct extent_insert_state { struct btree_insert *trans; struct btree_insert_entry *insert; @@ -1098,13 +825,13 @@ static void verify_extent_nonoverlapping(struct btree *b, struct bkey uk; iter = *_iter; - k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); BUG_ON(k && (uk = bkey_unpack_key(b, k), bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); iter = *_iter; - k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard); #if 0 BUG_ON(k && (uk = bkey_unpack_key(b, k), @@ -1150,13 +877,13 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, verify_extent_nonoverlapping(l->b, &l->iter, insert); node_iter = l->iter; - k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard); if (k && !bkey_written(l->b, k) && bch2_extent_merge_inline(c, iter, k, bkey_to_packed(insert), true)) return; node_iter = l->iter; - k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_discard); if (k && !bkey_written(l->b, k) && bch2_extent_merge_inline(c, iter, bkey_to_packed(insert), k, false)) return; @@ -1180,7 +907,7 @@ static void extent_insert_committed(struct extent_insert_state *s) bkey_copy(&split.k, insert); if (s->deleting) - split.k.k.type = KEY_TYPE_DISCARD; + split.k.k.type = KEY_TYPE_discard; bch2_cut_back(s->committed, &split.k.k); @@ -1202,7 +929,7 @@ static void extent_insert_committed(struct extent_insert_state *s) if (s->update_journal) { bkey_copy(&split.k, !s->deleting ? insert : &s->whiteout); if (s->deleting) - split.k.k.type = KEY_TYPE_DISCARD; + split.k.k.type = KEY_TYPE_discard; bch2_cut_back(s->committed, &split.k.k); @@ -1214,7 +941,6 @@ static void extent_insert_committed(struct extent_insert_state *s) bch2_cut_front(s->committed, insert); insert->k.needs_whiteout = false; - s->trans->did_work = true; } void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter) @@ -1254,7 +980,7 @@ bch2_extent_can_insert(struct btree_insert *trans, *u64s += BKEY_U64s; _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, - KEY_TYPE_DISCARD); + KEY_TYPE_discard); if (!_k) return BTREE_INSERT_OK; @@ -1331,7 +1057,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, btree_account_key_drop(l->b, _k); k.k->size = 0; - k.k->type = KEY_TYPE_DELETED; + k.k->type = KEY_TYPE_deleted; if (_k >= btree_bset_last(l->b)->start) { unsigned u64s = _k->u64s; @@ -1392,7 +1118,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s) while (bkey_cmp(s->committed, insert->k.p) < 0 && (_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, - KEY_TYPE_DISCARD))) { + KEY_TYPE_discard))) { struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k); @@ -1424,7 +1150,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s) !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { if (!bkey_whiteout(k.k)) { btree_account_key_drop(l->b, _k); - _k->type = KEY_TYPE_DISCARD; + _k->type = KEY_TYPE_discard; reserve_whiteout(l->b, _k); } break; @@ -1555,88 +1281,66 @@ bch2_insert_fixup_extent(struct btree_insert *trans, const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX) - return "value too big"; - - if (!k.k->size) - return "zero key size"; + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; + struct bch_extent_crc_unpacked crc; + const struct bch_extent_ptr *ptr; + unsigned size_ondisk = e.k->size; + const char *reason; + unsigned nonce = UINT_MAX; - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - unsigned size_ondisk = e.k->size; - const char *reason; - unsigned nonce = UINT_MAX; + if (bkey_val_u64s(e.k) > BKEY_EXTENT_VAL_U64s_MAX) + return "value too big"; - extent_for_each_entry(e, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) - return "invalid extent entry type"; + extent_for_each_entry(e, entry) { + if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) + return "invalid extent entry type"; - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); + switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + ptr = entry_to_ptr(entry); - reason = extent_ptr_invalid(c, e, &entry->ptr, - size_ondisk, false); - if (reason) - return reason; - break; - case BCH_EXTENT_ENTRY_crc32: - case BCH_EXTENT_ENTRY_crc64: - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); + reason = extent_ptr_invalid(c, e.s_c, &entry->ptr, + size_ondisk, false); + if (reason) + return reason; + break; + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: + case BCH_EXTENT_ENTRY_crc128: + crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); - if (crc.offset + e.k->size > - crc.uncompressed_size) - return "checksum offset + key size > uncompressed size"; + if (crc.offset + e.k->size > + crc.uncompressed_size) + return "checksum offset + key size > uncompressed size"; - size_ondisk = crc.compressed_size; + size_ondisk = crc.compressed_size; - if (!bch2_checksum_type_valid(c, crc.csum_type)) - return "invalid checksum type"; + if (!bch2_checksum_type_valid(c, crc.csum_type)) + return "invalid checksum type"; - if (crc.compression_type >= BCH_COMPRESSION_NR) - return "invalid compression type"; + if (crc.compression_type >= BCH_COMPRESSION_NR) + return "invalid compression type"; - if (bch2_csum_type_is_encryption(crc.csum_type)) { - if (nonce == UINT_MAX) - nonce = crc.offset + crc.nonce; - else if (nonce != crc.offset + crc.nonce) - return "incorrect nonce"; - } - break; - case BCH_EXTENT_ENTRY_stripe_ptr: - break; + if (bch2_csum_type_is_encryption(crc.csum_type)) { + if (nonce == UINT_MAX) + nonce = crc.offset + crc.nonce; + else if (nonce != crc.offset + crc.nonce) + return "incorrect nonce"; } + break; + case BCH_EXTENT_ENTRY_stripe_ptr: + break; } - - return NULL; - } - - case BCH_RESERVATION: { - struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); - - if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) - return "incorrect value size"; - - if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) - return "invalid nr_replicas"; - - return NULL; } - default: - return "invalid value type"; - } + return NULL; } -static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, - struct bkey_s_c_extent e) +void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, + struct bkey_s_c k) { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const struct bch_extent_ptr *ptr; struct bch_dev *ca; struct bucket_mark mark; @@ -1698,8 +1402,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, } if (replicas > BCH_REPLICAS_MAX) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent key bad (too many replicas: %u): %s", replicas, buf); @@ -1707,10 +1410,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, } if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, btree_node_type(b), - e.s_c, false)) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + !bch2_bkey_replicas_marked(c, e.s_c, false)) { + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent key bad (replicas not marked in superblock):\n%s", buf); @@ -1720,34 +1421,18 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, return; bad_ptr: - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu " "gen %i type %u", buf, PTR_BUCKET_NR(ca, ptr), mark.gen, mark.data_type); } -void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) -{ - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - bch2_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k)); - break; - case BCH_RESERVATION: - break; - default: - BUG(); - } -} - void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { const char *invalid; - if (bkey_extent_is_data(k.k)) - extent_print_ptrs(out, c, bkey_s_c_to_extent(k)); + bkey_ptrs_to_text(out, c, k); invalid = bch2_extent_invalid(c, k); if (invalid) @@ -1843,41 +1528,17 @@ found: */ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) { - struct bkey_s_extent e; - - switch (k.k->type) { - case KEY_TYPE_ERROR: - return false; - - case KEY_TYPE_DELETED: - return true; - case KEY_TYPE_DISCARD: - return bversion_zero(k.k->version); - case KEY_TYPE_COOKIE: - return false; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - e = bkey_s_to_extent(k); + struct bch_extent_ptr *ptr; - bch2_extent_drop_stale(c, e); + bch2_bkey_drop_ptrs(k, ptr, + ptr->cached && + ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); - if (!bkey_val_u64s(e.k)) { - if (bkey_extent_is_cached(e.k)) { - k.k->type = KEY_TYPE_DISCARD; - if (bversion_zero(k.k->version)) - return true; - } else { - k.k->type = KEY_TYPE_ERROR; - } - } + /* will only happen if all pointers were cached: */ + if (!bkey_val_u64s(k.k)) + k.k->type = KEY_TYPE_deleted; - return false; - case BCH_RESERVATION: - return false; - default: - BUG(); - } + return false; } void bch2_extent_mark_replicas_cached(struct bch_fs *c, @@ -1887,7 +1548,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, { union bch_extent_entry *entry; struct extent_ptr_decoded p; - int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas; + int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas; if (target && extra > 0) extent_for_each_ptr_decode(e, p, entry) { @@ -1911,106 +1572,40 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, } } -/* - * This picks a non-stale pointer, preferably from a device other than @avoid. - * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to - * other devices, it will still pick a pointer from avoid. - */ -int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) -{ - int ret; - - switch (k.k->type) { - case KEY_TYPE_ERROR: - return -EIO; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - ret = extent_pick_read_device(c, bkey_s_c_to_extent(k), - failed, pick); - - if (!ret && !bkey_extent_is_cached(k.k)) - ret = -EIO; - - return ret; - - default: - return 0; - } -} - -enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b, +enum merge_result bch2_extent_merge(struct bch_fs *c, struct bkey_i *l, struct bkey_i *r) { - struct bkey_s_extent el, er; + struct bkey_s_extent el = bkey_i_to_s_extent(l); + struct bkey_s_extent er = bkey_i_to_s_extent(r); union bch_extent_entry *en_l, *en_r; - if (key_merging_disabled(c)) - return BCH_MERGE_NOMERGE; - - /* - * Generic header checks - * Assumes left and right are in order - * Left and right must be exactly aligned - */ - - if (l->k.u64s != r->k.u64s || - l->k.type != r->k.type || - bversion_cmp(l->k.version, r->k.version) || - bkey_cmp(l->k.p, bkey_start_pos(&r->k))) + if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k)) return BCH_MERGE_NOMERGE; - switch (l->k.type) { - case KEY_TYPE_DISCARD: - case KEY_TYPE_ERROR: - /* These types are mergeable, and no val to check */ - break; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - el = bkey_i_to_s_extent(l); - er = bkey_i_to_s_extent(r); - - extent_for_each_entry(el, en_l) { - struct bch_extent_ptr *lp, *rp; - struct bch_dev *ca; - - en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); - - if ((extent_entry_type(en_l) != - extent_entry_type(en_r)) || - !extent_entry_is_ptr(en_l)) - return BCH_MERGE_NOMERGE; + extent_for_each_entry(el, en_l) { + struct bch_extent_ptr *lp, *rp; + struct bch_dev *ca; - lp = &en_l->ptr; - rp = &en_r->ptr; + en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); - if (lp->offset + el.k->size != rp->offset || - lp->dev != rp->dev || - lp->gen != rp->gen) - return BCH_MERGE_NOMERGE; + if ((extent_entry_type(en_l) != + extent_entry_type(en_r)) || + !extent_entry_is_ptr(en_l)) + return BCH_MERGE_NOMERGE; - /* We don't allow extents to straddle buckets: */ - ca = bch_dev_bkey_exists(c, lp->dev); + lp = &en_l->ptr; + rp = &en_r->ptr; - if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp)) - return BCH_MERGE_NOMERGE; - } + if (lp->offset + el.k->size != rp->offset || + lp->dev != rp->dev || + lp->gen != rp->gen) + return BCH_MERGE_NOMERGE; - break; - case BCH_RESERVATION: { - struct bkey_i_reservation *li = bkey_i_to_reservation(l); - struct bkey_i_reservation *ri = bkey_i_to_reservation(r); + /* We don't allow extents to straddle buckets: */ + ca = bch_dev_bkey_exists(c, lp->dev); - if (li->v.generation != ri->v.generation || - li->v.nr_replicas != ri->v.nr_replicas) + if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp)) return BCH_MERGE_NOMERGE; - break; - } - default: - return BCH_MERGE_NOMERGE; } l->k.needs_whiteout |= r->k.needs_whiteout; @@ -2060,7 +1655,7 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, bch2_bkey_unpack(b, &li.k, l); bch2_bkey_unpack(b, &ri.k, r); - ret = bch2_extent_merge(c, b, &li.k, &ri.k); + ret = bch2_bkey_merge(c, &li.k, &ri.k); if (ret == BCH_MERGE_NOMERGE) return false; @@ -2128,3 +1723,54 @@ int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size) return ret; } + +/* KEY_TYPE_reservation: */ + +const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + + if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) + return "incorrect value size"; + + if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) + return "invalid nr_replicas"; + + return NULL; +} + +void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + + pr_buf(out, "generation %u replicas %u", + le32_to_cpu(r.v->generation), + r.v->nr_replicas); +} + +enum merge_result bch2_reservation_merge(struct bch_fs *c, + struct bkey_i *l, struct bkey_i *r) +{ + struct bkey_i_reservation *li = bkey_i_to_reservation(l); + struct bkey_i_reservation *ri = bkey_i_to_reservation(r); + + if (li->v.generation != ri->v.generation || + li->v.nr_replicas != ri->v.nr_replicas) + return BCH_MERGE_NOMERGE; + + l->k.needs_whiteout |= r->k.needs_whiteout; + + /* Keys with no pointers aren't restricted to one bucket and could + * overflow KEY_SIZE + */ + if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) { + bch2_key_resize(&l->k, KEY_SIZE_MAX); + bch2_cut_front(l->k.p, r); + return BCH_MERGE_PARTIAL; + } + + bch2_key_resize(&l->k, l->k.size + r->k.size); + + return BCH_MERGE_MERGE; +} diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 307abd2..e6e9c30 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -6,141 +6,37 @@ #include "extents_types.h" struct bch_fs; -struct journal_res; -struct btree_node_iter; -struct btree_node_iter_large; struct btree_insert; struct btree_insert_entry; -struct bch_devs_mask; -union bch_extent_crc; -const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *, - struct bkey_s_c); -void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); -void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *); - -#define bch2_bkey_btree_ops (struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ - .key_debugcheck = bch2_btree_ptr_debugcheck, \ - .val_to_text = bch2_btree_ptr_to_text, \ - .swab = bch2_ptr_swab, \ -} - -const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); -void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -bool bch2_ptr_normalize(struct bch_fs *, struct btree *, struct bkey_s); -enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *, - struct bkey_i *, struct bkey_i *); - -#define bch2_bkey_extent_ops (struct bkey_ops) { \ - .key_invalid = bch2_extent_invalid, \ - .key_debugcheck = bch2_extent_debugcheck, \ - .val_to_text = bch2_extent_to_text, \ - .swab = bch2_ptr_swab, \ - .key_normalize = bch2_ptr_normalize, \ - .key_merge = bch2_extent_merge, \ - .is_extents = true, \ -} - -struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *, - struct btree *, - struct btree_node_iter_large *); -struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, - struct bset *, - struct btree *, - struct btree_node_iter_large *); - -void bch2_mark_io_failure(struct bch_io_failures *, - struct extent_ptr_decoded *); -int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *, - struct bch_io_failures *, - struct extent_ptr_decoded *); -int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c, - struct bch_io_failures *, - struct extent_ptr_decoded *); - -void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); - -static inline bool bch2_extent_is_atomic(struct bkey *k, - struct btree_iter *iter) -{ - struct btree *b = iter->l[0].b; - - return bkey_cmp(k->p, b->key.k.p) <= 0 && - bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0; -} - -enum btree_insert_ret -bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *, - unsigned *); -enum btree_insert_ret -bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); - -bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); -void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, - unsigned, unsigned); - -const struct bch_extent_ptr * -bch2_extent_has_device(struct bkey_s_c_extent, unsigned); -void bch2_extent_drop_device(struct bkey_s_extent, unsigned); -const struct bch_extent_ptr * -bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned); -const struct bch_extent_ptr * -bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned); - -unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent); -unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c); -unsigned bch2_extent_is_compressed(struct bkey_s_c); - -unsigned bch2_extent_durability(struct bch_fs *, struct bkey_s_c_extent); - -bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent, - struct bch_extent_ptr, u64); - -static inline bool bkey_extent_is_data(const struct bkey *k) -{ - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return true; - default: - return false; - } -} +/* extent entries: */ -static inline bool bkey_extent_is_allocation(const struct bkey *k) -{ - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - case BCH_RESERVATION: - return true; - default: - return false; - } -} +#define extent_entry_last(_e) bkey_val_end(_e) -static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k) -{ - return bkey_extent_is_allocation(k.k) && - !bch2_extent_is_compressed(k); -} - -static inline bool bkey_extent_is_cached(const struct bkey *k) -{ - return k->type == BCH_EXTENT_CACHED; -} - -static inline void bkey_extent_set_cached(struct bkey *k, bool cached) -{ - EBUG_ON(k->type != BCH_EXTENT && - k->type != BCH_EXTENT_CACHED); +#define entry_to_ptr(_entry) \ +({ \ + EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \ + \ + __builtin_choose_expr( \ + type_is_exact(_entry, const union bch_extent_entry *), \ + (const struct bch_extent_ptr *) (_entry), \ + (struct bch_extent_ptr *) (_entry)); \ +}) - k->type = cached ? BCH_EXTENT_CACHED : BCH_EXTENT; -} +/* downcast, preserves const */ +#define to_entry(_entry) \ +({ \ + BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \ + !type_is(_entry, struct bch_extent_ptr *) && \ + !type_is(_entry, struct bch_extent_stripe_ptr *)); \ + \ + __builtin_choose_expr( \ + (type_is_exact(_entry, const union bch_extent_crc *) || \ + type_is_exact(_entry, const struct bch_extent_ptr *) ||\ + type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\ + (const union bch_extent_entry *) (_entry), \ + (union bch_extent_entry *) (_entry)); \ +}) static inline unsigned __extent_entry_type(const union bch_extent_entry *e) @@ -205,21 +101,6 @@ union bch_extent_crc { struct bch_extent_crc128 crc128; }; -/* downcast, preserves const */ -#define to_entry(_entry) \ -({ \ - BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \ - !type_is(_entry, struct bch_extent_ptr *) && \ - !type_is(_entry, struct bch_extent_stripe_ptr *)); \ - \ - __builtin_choose_expr( \ - (type_is_exact(_entry, const union bch_extent_crc *) || \ - type_is_exact(_entry, const struct bch_extent_ptr *) ||\ - type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\ - (const union bch_extent_entry *) (_entry), \ - (union bch_extent_entry *) (_entry)); \ -}) - #define __entry_to_crc(_entry) \ __builtin_choose_expr( \ type_is_exact(_entry, const union bch_extent_entry *), \ @@ -233,18 +114,6 @@ union bch_extent_crc { __entry_to_crc(_entry); \ }) -#define entry_to_ptr(_entry) \ -({ \ - EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \ - \ - __builtin_choose_expr( \ - type_is_exact(_entry, const union bch_extent_entry *), \ - (const struct bch_extent_ptr *) (_entry), \ - (struct bch_extent_ptr *) (_entry)); \ -}) - -/* checksum entries: */ - static inline struct bch_extent_crc_unpacked bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) { @@ -302,71 +171,64 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) #undef common_fields } -/* Extent entry iteration: */ - -#define extent_entry_next(_entry) \ - ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry))) +/* bkey_ptrs: generically over any key type that has ptrs */ -#define extent_entry_last(_e) \ - vstruct_idx((_e).v, bkey_val_u64s((_e).k)) +struct bkey_ptrs_c { + const union bch_extent_entry *start; + const union bch_extent_entry *end; +}; -/* Iterate over all entries: */ +struct bkey_ptrs { + union bch_extent_entry *start; + union bch_extent_entry *end; +}; -#define extent_for_each_entry_from(_e, _entry, _start) \ - for ((_entry) = _start; \ - (_entry) < extent_entry_last(_e); \ - (_entry) = extent_entry_next(_entry)) +/* iterate over bkey ptrs */ -#define extent_for_each_entry(_e, _entry) \ - extent_for_each_entry_from(_e, _entry, (_e).v->start) +#define extent_entry_next(_entry) \ + ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry))) -/* Iterate over pointers only: */ +#define __bkey_extent_entry_for_each_from(_start, _end, _entry) \ + for ((_entry) = (_start); \ + (_entry) < (_end); \ + (_entry) = extent_entry_next(_entry)) -#define extent_ptr_next(_e, _ptr) \ +#define __bkey_ptr_next(_ptr, _end) \ ({ \ - typeof(&(_e).v->start[0]) _entry; \ + typeof(_end) _entry; \ \ - extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \ + __bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \ if (extent_entry_is_ptr(_entry)) \ break; \ \ - _entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL; \ + _entry < (_end) ? entry_to_ptr(_entry) : NULL; \ }) -#define extent_for_each_ptr(_e, _ptr) \ - for ((_ptr) = &(_e).v->start->ptr; \ - ((_ptr) = extent_ptr_next(_e, _ptr)); \ - (_ptr)++) +#define bkey_extent_entry_for_each_from(_p, _entry, _start) \ + __bkey_extent_entry_for_each_from(_start, (_p).end, _entry) -/* Iterate over crcs only: */ +#define bkey_extent_entry_for_each(_p, _entry) \ + bkey_extent_entry_for_each_from(_p, _entry, _p.start) -#define extent_crc_next(_e, _crc, _iter) \ -({ \ - extent_for_each_entry_from(_e, _iter, _iter) \ - if (extent_entry_is_crc(_iter)) { \ - (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\ - break; \ - } \ - \ - (_iter) < extent_entry_last(_e); \ -}) +#define __bkey_for_each_ptr(_start, _end, _ptr) \ + for ((_ptr) = (_start); \ + ((_ptr) = __bkey_ptr_next(_ptr, _end)); \ + (_ptr)++) -#define extent_for_each_crc(_e, _crc, _iter) \ - for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \ - (_iter) = (_e).v->start; \ - extent_crc_next(_e, _crc, _iter); \ - (_iter) = extent_entry_next(_iter)) +#define bkey_ptr_next(_p, _ptr) \ + __bkey_ptr_next(_ptr, (_p).end) -/* Iterate over pointers, with crcs: */ +#define bkey_for_each_ptr(_p, _ptr) \ + __bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr) -#define __extent_ptr_next_decode(_e, _ptr, _entry) \ +#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry) \ ({ \ __label__ out; \ \ (_ptr).idx = 0; \ (_ptr).ec_nr = 0; \ \ - extent_for_each_entry_from(_e, _entry, _entry) \ + __bkey_extent_entry_for_each_from(_entry, _end, _entry) \ switch (extent_entry_type(_entry)) { \ case BCH_EXTENT_ENTRY_ptr: \ (_ptr).ptr = _entry->ptr; \ @@ -374,7 +236,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) case BCH_EXTENT_ENTRY_crc32: \ case BCH_EXTENT_ENTRY_crc64: \ case BCH_EXTENT_ENTRY_crc128: \ - (_ptr).crc = bch2_extent_crc_unpack((_e).k, \ + (_ptr).crc = bch2_extent_crc_unpack(_k, \ entry_to_crc(_entry)); \ break; \ case BCH_EXTENT_ENTRY_stripe_ptr: \ @@ -382,134 +244,310 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) break; \ } \ out: \ - _entry < extent_entry_last(_e); \ + _entry < (_end); \ }) -#define extent_for_each_ptr_decode(_e, _ptr, _entry) \ - for ((_ptr).crc = bch2_extent_crc_unpack((_e).k, NULL), \ - (_entry) = (_e).v->start; \ - __extent_ptr_next_decode(_e, _ptr, _entry); \ +#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry) \ + for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL), \ + (_entry) = _start; \ + __bkey_ptr_next_decode(_k, _end, _ptr, _entry); \ (_entry) = extent_entry_next(_entry)) -/* Iterate over pointers backwards: */ +#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry) \ + __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \ + _ptr, _entry) -void bch2_extent_crc_append(struct bkey_i_extent *, - struct bch_extent_crc_unpacked); -void bch2_extent_ptr_decoded_append(struct bkey_i_extent *, - struct extent_ptr_decoded *); +/* utility code common to all keys with pointers: */ -static inline void __extent_entry_insert(struct bkey_i_extent *e, - union bch_extent_entry *dst, - union bch_extent_entry *new) +static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k) { - union bch_extent_entry *end = extent_entry_last(extent_i_to_s(e)); - - memmove_u64s_up((u64 *) dst + extent_entry_u64s(new), - dst, (u64 *) end - (u64 *) dst); - e->k.u64s += extent_entry_u64s(new); - memcpy(dst, new, extent_entry_bytes(new)); + switch (k.k->type) { + case KEY_TYPE_btree_ptr: { + struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k); + return (struct bkey_ptrs_c) { + to_entry(&e.v->start[0]), + to_entry(bkey_val_end(e)) + }; + } + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + return (struct bkey_ptrs_c) { + e.v->start, + extent_entry_last(e) + }; + } + case KEY_TYPE_stripe: { + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + return (struct bkey_ptrs_c) { + to_entry(&s.v->ptrs[0]), + to_entry(&s.v->ptrs[s.v->nr_blocks]), + }; + } + default: + return (struct bkey_ptrs_c) { NULL, NULL }; + } } -static inline void __extent_entry_push(struct bkey_i_extent *e) +static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k) { - union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e)); - - EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) > - BKEY_EXTENT_VAL_U64s_MAX); + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c); - e->k.u64s += extent_entry_u64s(entry); + return (struct bkey_ptrs) { + (void *) p.start, + (void *) p.end + }; } -static inline void extent_ptr_append(struct bkey_i_extent *e, - struct bch_extent_ptr ptr) -{ - ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; - extent_entry_last(extent_i_to_s(e))->ptr = ptr; - __extent_entry_push(e); -} - -static inline struct bch_devs_list bch2_extent_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_extent_dirty_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) if (!ptr->cached) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_extent_cached_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) if (ptr->cached) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) +static inline bool bch2_bkey_has_device(struct bkey_s_c k, unsigned dev) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_devs(bkey_s_c_to_extent(k)); - default: - return (struct bch_devs_list) { .nr = 0 }; - } + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(p, ptr) + if (ptr->dev == dev) + return ptr; + + return NULL; } -static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) +unsigned bch2_bkey_nr_ptrs(struct bkey_s_c); +unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c); +unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); + +void bch2_mark_io_failure(struct bch_io_failures *, + struct extent_ptr_decoded *); +int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, + struct bch_io_failures *, + struct extent_ptr_decoded *); + +/* bch_btree_ptr: */ + +const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *, + struct bkey_s_c); +void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); +void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *); + +#define bch2_bkey_ops_btree_ptr (struct bkey_ops) { \ + .key_invalid = bch2_btree_ptr_invalid, \ + .key_debugcheck = bch2_btree_ptr_debugcheck, \ + .val_to_text = bch2_btree_ptr_to_text, \ + .swab = bch2_ptr_swab, \ +} + +/* bch_extent: */ + +const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); +void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); +enum merge_result bch2_extent_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); + +#define bch2_bkey_ops_extent (struct bkey_ops) { \ + .key_invalid = bch2_extent_invalid, \ + .key_debugcheck = bch2_extent_debugcheck, \ + .val_to_text = bch2_extent_to_text, \ + .swab = bch2_ptr_swab, \ + .key_normalize = bch2_extent_normalize, \ + .key_merge = bch2_extent_merge, \ +} + +/* bch_reservation: */ + +const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +enum merge_result bch2_reservation_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); + +#define bch2_bkey_ops_reservation (struct bkey_ops) { \ + .key_invalid = bch2_reservation_invalid, \ + .val_to_text = bch2_reservation_to_text, \ + .key_merge = bch2_reservation_merge, \ +} + +void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); + +static inline bool bch2_extent_is_atomic(struct bkey *k, + struct btree_iter *iter) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_dirty_devs(bkey_s_c_to_extent(k)); + struct btree *b = iter->l[0].b; + + return bkey_cmp(k->p, b->key.k.p) <= 0 && + bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0; +} + +enum btree_insert_ret +bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *, + unsigned *); +enum btree_insert_ret +bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); + +void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, + unsigned, unsigned); + +const struct bch_extent_ptr * +bch2_extent_has_device(struct bkey_s_c_extent, unsigned); +const struct bch_extent_ptr * +bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned); +const struct bch_extent_ptr * +bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned); + +unsigned bch2_extent_is_compressed(struct bkey_s_c); + +bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent, + struct bch_extent_ptr, u64); + +static inline bool bkey_extent_is_data(const struct bkey *k) +{ + switch (k->type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: + return true; default: - return (struct bch_devs_list) { .nr = 0 }; + return false; } } -static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) +static inline bool bkey_extent_is_allocation(const struct bkey *k) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_cached_devs(bkey_s_c_to_extent(k)); + switch (k->type) { + case KEY_TYPE_extent: + case KEY_TYPE_reservation: + return true; default: - return (struct bch_devs_list) { .nr = 0 }; + return false; } } +static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k) +{ + return bkey_extent_is_allocation(k.k) && + !bch2_extent_is_compressed(k); +} + +void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); +void bch2_bkey_drop_device(struct bkey_s, unsigned); + +/* Extent entry iteration: */ + +#define extent_for_each_entry_from(_e, _entry, _start) \ + __bkey_extent_entry_for_each_from(_start, \ + extent_entry_last(_e),_entry) + +#define extent_for_each_entry(_e, _entry) \ + extent_for_each_entry_from(_e, _entry, (_e).v->start) + +#define extent_ptr_next(_e, _ptr) \ + __bkey_ptr_next(_ptr, extent_entry_last(_e)) + +#define extent_for_each_ptr(_e, _ptr) \ + __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr) + +#define extent_crc_next(_e, _crc, _iter) \ +({ \ + extent_for_each_entry_from(_e, _iter, _iter) \ + if (extent_entry_is_crc(_iter)) { \ + (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\ + break; \ + } \ + \ + (_iter) < extent_entry_last(_e); \ +}) + +#define extent_for_each_crc(_e, _crc, _iter) \ + for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \ + (_iter) = (_e).v->start; \ + extent_crc_next(_e, _crc, _iter); \ + (_iter) = extent_entry_next(_iter)) + +#define extent_for_each_ptr_decode(_e, _ptr, _entry) \ + __bkey_for_each_ptr_decode((_e).k, (_e).v->start, \ + extent_entry_last(_e), _ptr, _entry) + +void bch2_extent_crc_append(struct bkey_i_extent *, + struct bch_extent_crc_unpacked); +void bch2_extent_ptr_decoded_append(struct bkey_i_extent *, + struct extent_ptr_decoded *); + +static inline void __extent_entry_insert(struct bkey_i_extent *e, + union bch_extent_entry *dst, + union bch_extent_entry *new) +{ + union bch_extent_entry *end = extent_entry_last(extent_i_to_s(e)); + + memmove_u64s_up((u64 *) dst + extent_entry_u64s(new), + dst, (u64 *) end - (u64 *) dst); + e->k.u64s += extent_entry_u64s(new); + memcpy(dst, new, extent_entry_bytes(new)); +} + +static inline void __extent_entry_push(struct bkey_i_extent *e) +{ + union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e)); + + EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) > + BKEY_EXTENT_VAL_U64s_MAX); + + e->k.u64s += extent_entry_u64s(entry); +} + bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent, struct bch_extent_crc_unpacked); bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked); -union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent , - struct bch_extent_ptr *); +union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, + struct bch_extent_ptr *); -#define bch2_extent_drop_ptrs(_e, _ptr, _cond) \ +#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \ do { \ - _ptr = &(_e).v->start->ptr; \ + struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \ + \ + _ptr = &_ptrs.start->ptr; \ \ - while ((_ptr = extent_ptr_next(e, _ptr))) { \ + while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \ if (_cond) { \ - _ptr = (void *) bch2_extent_drop_ptr(_e, _ptr); \ + _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \ + _ptrs = bch2_bkey_ptrs(_k); \ continue; \ } \ \ @@ -517,10 +555,34 @@ do { \ } \ } while (0) -bool bch2_cut_front(struct bpos, struct bkey_i *); +bool __bch2_cut_front(struct bpos, struct bkey_s); + +static inline bool bch2_cut_front(struct bpos where, struct bkey_i *k) +{ + return __bch2_cut_front(where, bkey_i_to_s(k)); +} + bool bch2_cut_back(struct bpos, struct bkey *); void bch2_key_resize(struct bkey *, unsigned); +/* + * In extent_sort_fix_overlapping(), insert_fixup_extent(), + * extent_merge_inline() - we're modifying keys in place that are packed. To do + * that we have to unpack the key, modify the unpacked key - then this + * copies/repacks the unpacked to the original as necessary. + */ +static inline void extent_save(struct btree *b, struct bkey_packed *dst, + struct bkey *src) +{ + struct bkey_format *f = &b->format; + struct bkey_i *dst_unpacked; + + if ((dst_unpacked = packed_to_bkey(dst))) + dst_unpacked->k = *src; + else + BUG_ON(!bch2_bkey_pack_key(dst, src, f)); +} + int bch2_check_range_allocated(struct bch_fs *, struct bpos, u64); #endif /* _BCACHEFS_EXTENTS_H */ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 34cfd5d..41ac5d4 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -121,7 +121,7 @@ static void bch2_quota_reservation_put(struct bch_fs *c, BUG_ON(res->sectors > inode->ei_quota_reserved); bch2_quota_acct(c, inode->ei_qid, Q_SPC, - -((s64) res->sectors), BCH_QUOTA_PREALLOC); + -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); inode->ei_quota_reserved -= res->sectors; mutex_unlock(&inode->ei_quota_lock); @@ -138,7 +138,7 @@ static int bch2_quota_reservation_add(struct bch_fs *c, mutex_lock(&inode->ei_quota_lock); ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, - check_enospc ? BCH_QUOTA_PREALLOC : BCH_QUOTA_NOCHECK); + check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); if (likely(!ret)) { inode->ei_quota_reserved += sectors; res->sectors += sectors; @@ -220,7 +220,7 @@ static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, quota_res->sectors -= sectors; inode->ei_quota_reserved -= sectors; } else { - bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN); + bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); } #endif inode->v.i_blocks += sectors; @@ -242,9 +242,15 @@ static s64 sum_sector_overwrites(struct bkey_i *new, struct btree_iter *_iter, bch2_btree_iter_link(_iter, &iter); bch2_btree_iter_copy(&iter, _iter); - for_each_btree_key_continue(&iter, BTREE_ITER_SLOTS, old) { - if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0) - break; + old = bch2_btree_iter_peek_slot(&iter); + + while (1) { + /* + * should not be possible to get an error here, since we're + * carefully not advancing past @new and thus whatever leaf node + * @_iter currently points to: + */ + BUG_ON(btree_iter_err(old)); if (allocating && !bch2_extent_is_fully_allocated(old)) @@ -256,6 +262,11 @@ static s64 sum_sector_overwrites(struct bkey_i *new, struct btree_iter *_iter, bkey_start_offset(old.k))) * (bkey_extent_is_allocation(&new->k) - bkey_extent_is_allocation(old.k)); + + if (bkey_cmp(old.k->p, new->k.p) >= 0) + break; + + old = bch2_btree_iter_next_slot(&iter); } bch2_btree_iter_unlink(&iter); @@ -848,7 +859,7 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) struct bvec_iter iter; struct bio_vec bv; unsigned nr_ptrs = !bch2_extent_is_compressed(k) - ? bch2_extent_nr_dirty_ptrs(k) + ? bch2_bkey_nr_dirty_ptrs(k) : 0; bio_for_each_segment(bv, bio, iter) { @@ -2397,7 +2408,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode, BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(©.k.k))); ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size, - bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(©.k)), + bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k)), BCH_DISK_RESERVATION_NOFAIL); BUG_ON(ret); @@ -2504,7 +2515,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, goto btree_iter_err; /* already reserved */ - if (k.k->type == BCH_RESERVATION && + if (k.k->type == KEY_TYPE_reservation && bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) { bch2_btree_iter_next_slot(iter); continue; @@ -2517,7 +2528,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, } bkey_reservation_init(&reservation.k_i); - reservation.k.type = BCH_RESERVATION; + reservation.k.type = KEY_TYPE_reservation; reservation.k.p = k.k->p; reservation.k.size = k.k->size; @@ -2525,7 +2536,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, bch2_cut_back(end_pos, &reservation.k); sectors = reservation.k.size; - reservation.v.nr_replicas = bch2_extent_nr_dirty_ptrs(k); + reservation.v.nr_replicas = bch2_bkey_nr_dirty_ptrs(k); if (!bkey_extent_is_allocation(k.k)) { ret = bch2_quota_reservation_add(c, inode, diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 9bda621..67b0dd3 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -281,7 +281,7 @@ __bch2_create(struct bch_inode_info *dir, struct dentry *dentry, if (tmpfile) inode_u.bi_flags |= BCH_INODE_UNLINKED; - ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC); + ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, KEY_TYPE_QUOTA_PREALLOC); if (ret) return ERR_PTR(ret); @@ -394,7 +394,7 @@ err_trans: make_bad_inode(&inode->v); iput(&inode->v); err: - bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN); + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); inode = ERR_PTR(ret); goto out; } @@ -999,7 +999,7 @@ static int bch2_fill_extent(struct fiemap_extent_info *info, } return 0; - } else if (k->k.type == BCH_RESERVATION) { + } else if (k->k.type == KEY_TYPE_reservation) { return fiemap_fill_next_extent(info, bkey_start_offset(&k->k) << 9, 0, k->k.size << 9, @@ -1028,7 +1028,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(ei->v.i_ino, start >> 9), 0, k) if (bkey_extent_is_data(k.k) || - k.k->type == BCH_RESERVATION) { + k.k->type == KEY_TYPE_reservation) { if (bkey_cmp(bkey_start_pos(k.k), POS(ei->v.i_ino, (start + len) >> 9)) >= 0) break; @@ -1329,9 +1329,9 @@ static void bch2_evict_inode(struct inode *vinode) if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), - BCH_QUOTA_WARN); + KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, - BCH_QUOTA_WARN); + KEY_TYPE_QUOTA_WARN); bch2_inode_rm(c, inode->v.i_ino); WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0, diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 53ee1b0..5525af8 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -234,7 +234,6 @@ static int hash_check_duplicates(const struct bch_hash_desc desc, !desc.cmp_bkey(k, k2), c, "duplicate hash table keys:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, desc.btree_id), k), buf))) { ret = fsck_hash_delete_at(desc, &h->info, k_iter); if (ret) @@ -254,7 +253,7 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc, { u64 hash; - if (k.k->type != desc.whiteout_type && + if (k.k->type != KEY_TYPE_whiteout && k.k->type != desc.key_type) return true; @@ -279,7 +278,7 @@ static int hash_check_key(const struct bch_hash_desc desc, u64 hashed; int ret = 0; - if (k.k->type != desc.whiteout_type && + if (k.k->type != KEY_TYPE_whiteout && k.k->type != desc.key_type) return 0; @@ -299,7 +298,6 @@ static int hash_check_key(const struct bch_hash_desc desc, desc.btree_id, k.k->p.offset, hashed, h->chain->pos.offset, (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, desc.btree_id), k), buf))) { ret = hash_redo_key(desc, h, c, k_iter, k, hashed); if (ret) { @@ -369,7 +367,7 @@ static int check_dirent_hash(struct hash_check *h, struct bch_fs *c, *k = bch2_btree_iter_peek(iter); - BUG_ON(k->k->type != BCH_DIRENT); + BUG_ON(k->k->type != KEY_TYPE_dirent); } err: fsck_err: @@ -384,7 +382,6 @@ err_redo: buf, strlen(buf), BTREE_ID_DIRENTS, k->k->p.offset, hash, h->chain->pos.offset, (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, BTREE_ID_DIRENTS), *k), buf))) { ret = hash_redo_key(bch2_dirent_hash_desc, h, c, iter, *k, hash); @@ -470,7 +467,7 @@ static int check_extents(struct bch_fs *c) if (fsck_err_on(w.have_inode && !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - k.k->type != BCH_RESERVATION && + k.k->type != KEY_TYPE_reservation && k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c, "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { @@ -528,13 +525,11 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!w.have_inode, c, "dirent in nonexisting directory:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - BTREE_ID_DIRENTS, k), buf)) || fsck_err_on(!S_ISDIR(w.inode.bi_mode), c, "dirent in non directory inode type %u:\n%s", mode_to_type(w.inode.bi_mode), (bch2_bkey_val_to_text(&PBUF(buf), c, - BTREE_ID_DIRENTS, k), buf))) { ret = bch2_btree_delete_at(iter, 0); if (ret) @@ -556,7 +551,7 @@ static int check_dirents(struct bch_fs *c) if (ret) goto fsck_err; - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; d = bkey_s_c_to_dirent(k); @@ -585,7 +580,6 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(d_inum == d.k->p.inode, c, "dirent points to own directory:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - BTREE_ID_DIRENTS, k), buf))) { ret = remove_dirent(c, iter, d); if (ret) @@ -603,7 +597,6 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!have_target, c, "dirent points to missing inode:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - BTREE_ID_DIRENTS, k), buf))) { ret = remove_dirent(c, iter, d); if (ret) @@ -617,7 +610,6 @@ static int check_dirents(struct bch_fs *c) "incorrect d_type: should be %u:\n%s", mode_to_type(target.bi_mode), (bch2_bkey_val_to_text(&PBUF(buf), c, - BTREE_ID_DIRENTS, k), buf))) { struct bkey_i_dirent *n; @@ -898,7 +890,7 @@ next: e->offset = k.k->p.offset; - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; dirent = bkey_s_c_to_dirent(k); @@ -941,7 +933,7 @@ up: } for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { - if (k.k->type != BCH_INODE_FS) + if (k.k->type != KEY_TYPE_inode) continue; if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode))) @@ -1029,7 +1021,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) { switch (k.k->type) { - case BCH_DIRENT: + case KEY_TYPE_dirent: d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); @@ -1309,7 +1301,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (iter.pos.inode < nlinks_pos || !link) link = &zero_links; - if (k.k && k.k->type == BCH_INODE_FS) { + if (k.k && k.k->type == KEY_TYPE_inode) { /* * Avoid potential deadlocks with iter for * truncate/rm/etc.: @@ -1391,7 +1383,7 @@ static int check_inodes_fast(struct bch_fs *c) int ret = 0; for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { - if (k.k->type != BCH_INODE_FS) + if (k.k->type != KEY_TYPE_inode) continue; inode = bkey_s_c_to_inode(k); diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 207ca36..8c3d443 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -177,76 +177,69 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (k.k->p.offset) - return "nonzero offset"; - - switch (k.k->type) { - case BCH_INODE_FS: { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); struct bch_inode_unpacked unpacked; - if (bkey_val_bytes(k.k) < sizeof(struct bch_inode)) - return "incorrect value size"; - - if (k.k->p.inode < BLOCKDEV_INODE_MAX) - return "fs inode in blockdev range"; + if (k.k->p.offset) + return "nonzero offset"; - if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) - return "invalid str hash type"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_inode)) + return "incorrect value size"; - if (bch2_inode_unpack(inode, &unpacked)) - return "invalid variable length fields"; + if (k.k->p.inode < BLOCKDEV_INODE_MAX) + return "fs inode in blockdev range"; - if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) - return "invalid data checksum type"; + if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) + return "invalid str hash type"; - if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) - return "invalid data checksum type"; + if (bch2_inode_unpack(inode, &unpacked)) + return "invalid variable length fields"; - if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && - unpacked.bi_nlink != 0) - return "flagged as unlinked but bi_nlink != 0"; + if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) + return "invalid data checksum type"; - return NULL; - } - case BCH_INODE_BLOCKDEV: - if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev)) - return "incorrect value size"; + if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) + return "invalid data checksum type"; - if (k.k->p.inode >= BLOCKDEV_INODE_MAX) - return "blockdev inode in fs range"; + if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && + unpacked.bi_nlink != 0) + return "flagged as unlinked but bi_nlink != 0"; - return NULL; - case BCH_INODE_GENERATION: - if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation)) - return "incorrect value size"; - - return NULL; - default: - return "invalid type"; - } + return NULL; } void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_inode inode; + struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); struct bch_inode_unpacked unpacked; - switch (k.k->type) { - case BCH_INODE_FS: - inode = bkey_s_c_to_inode(k); - if (bch2_inode_unpack(inode, &unpacked)) { - pr_buf(out, "(unpack error)"); - break; - } + if (bch2_inode_unpack(inode, &unpacked)) { + pr_buf(out, "(unpack error)"); + return; + } #define BCH_INODE_FIELD(_name, _bits) \ - pr_buf(out, #_name ": %llu ", (u64) unpacked._name); - BCH_INODE_FIELDS() + pr_buf(out, #_name ": %llu ", (u64) unpacked._name); + BCH_INODE_FIELDS() #undef BCH_INODE_FIELD - break; - } +} + +const char *bch2_inode_generation_invalid(const struct bch_fs *c, + struct bkey_s_c k) +{ + if (k.k->p.offset) + return "nonzero offset"; + + if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation)) + return "incorrect value size"; + + return NULL; +} + +void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ } void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, @@ -282,10 +275,9 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, static inline u32 bkey_generation(struct bkey_s_c k) { switch (k.k->type) { - case BCH_INODE_BLOCKDEV: - case BCH_INODE_FS: + case KEY_TYPE_inode: BUG(); - case BCH_INODE_GENERATION: + case KEY_TYPE_inode_generation: return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation); default: return 0; @@ -331,8 +323,7 @@ again: return ret; switch (k.k->type) { - case BCH_INODE_BLOCKDEV: - case BCH_INODE_FS: + case KEY_TYPE_inode: /* slot used */ if (iter->pos.inode >= max) goto out; @@ -406,19 +397,19 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) return ret; } - bch2_fs_inconsistent_on(k.k->type != BCH_INODE_FS, c, + bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c, "inode %llu not found when deleting", inode_nr); switch (k.k->type) { - case BCH_INODE_FS: { + case KEY_TYPE_inode: { struct bch_inode_unpacked inode_u; if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u)) bi_generation = inode_u.bi_generation + 1; break; } - case BCH_INODE_GENERATION: { + case KEY_TYPE_inode_generation: { struct bkey_s_c_inode_generation g = bkey_s_c_to_inode_generation(k); bi_generation = le32_to_cpu(g.v->bi_generation); @@ -456,7 +447,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, POS(inode_nr, 0), BTREE_ITER_SLOTS, k) { switch (k.k->type) { - case BCH_INODE_FS: + case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); break; default: @@ -465,7 +456,6 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, } break; - } return bch2_btree_iter_unlock(&iter) ?: ret; diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index e4495a4..44855e1 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -8,11 +8,21 @@ const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_inode_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_inode (struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ .val_to_text = bch2_inode_to_text, \ } +const char *bch2_inode_generation_invalid(const struct bch_fs *, + struct bkey_s_c); +void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); + +#define bch2_bkey_ops_inode_generation (struct bkey_ops) { \ + .key_invalid = bch2_inode_generation_invalid, \ + .val_to_text = bch2_inode_generation_to_text, \ +} + struct bch_inode_unpacked { u64 bi_inum; __le64 bi_hash_seed; diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 12d77ec..98eca9a 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -202,20 +202,20 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, enum bch_data_type type, const struct bkey_i *k) { - struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); const struct bch_extent_ptr *ptr; struct bch_write_bio *n; struct bch_dev *ca; BUG_ON(c->opts.nochanges); - extent_for_each_ptr(e, ptr) { + bkey_for_each_ptr(ptrs, ptr) { BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX || !c->devs[ptr->dev]); ca = bch_dev_bkey_exists(c, ptr->dev); - if (ptr + 1 < &extent_entry_last(e)->ptr) { + if (to_entry(ptr + 1) < ptrs.end) { n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO, &ca->replica_set)); @@ -300,7 +300,6 @@ static void __bch2_write_index(struct bch_write_op *op) { struct bch_fs *c = op->c; struct keylist *keys = &op->insert_keys; - struct bkey_s_extent e; struct bch_extent_ptr *ptr; struct bkey_i *src, *dst = keys->keys, *n, *k; unsigned dev; @@ -310,12 +309,10 @@ static void __bch2_write_index(struct bch_write_op *op) n = bkey_next(src); bkey_copy(dst, src); - e = bkey_i_to_s_extent(dst); - - bch2_extent_drop_ptrs(e, ptr, + bch2_bkey_drop_ptrs(bkey_i_to_s(dst), ptr, test_bit(ptr->dev, op->failed.d)); - if (!bch2_extent_nr_ptrs(e.c)) { + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(dst))) { ret = -EIO; goto err; } @@ -416,10 +413,10 @@ static void init_append_extent(struct bch_write_op *op, e->k.p = op->pos; e->k.size = crc.uncompressed_size; e->k.version = version; - bkey_extent_set_cached(&e->k, op->flags & BCH_WRITE_CACHED); bch2_extent_crc_append(e, crc); - bch2_alloc_sectors_append_ptrs(op->c, wp, e, crc.compressed_size); + bch2_alloc_sectors_append_ptrs(op->c, wp, &e->k_i, + crc.compressed_size); bch2_keylist_push(&op->insert_keys); } @@ -1589,7 +1586,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bpos pos = bkey_start_pos(k.k); int pick_ret; - pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick); + pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); /* hole or reservation - just zero fill: */ if (!pick_ret) diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index ac1219f..47cfd50 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -462,7 +462,7 @@ u64 bch2_journal_last_unwritten_seq(struct journal *j) int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - bool need_reclaim = false; + int ret; retry: spin_lock(&j->lock); @@ -490,14 +490,11 @@ retry: BUG_ON(journal_cur_seq(j) < seq); - if (!journal_entry_open(j)) { - need_reclaim = true; - goto blocked; + ret = journal_entry_open(j); + if (ret) { + spin_unlock(&j->lock); + return ret < 0 ? ret : 0; } - - spin_unlock(&j->lock); - - return 0; blocked: if (!j->res_get_blocked_start) j->res_get_blocked_start = local_clock() ?: 1; @@ -505,8 +502,7 @@ blocked: closure_wait(&j->async_wait, cl); spin_unlock(&j->lock); - if (need_reclaim) - bch2_journal_reclaim_work(&j->reclaim_work.work); + bch2_journal_reclaim_work(&j->reclaim_work.work); return -EAGAIN; } diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 3840764..05500bf 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -141,11 +141,12 @@ static void journal_entry_null_range(void *start, void *end) static int journal_validate_key(struct bch_fs *c, struct jset *jset, struct jset_entry *entry, - struct bkey_i *k, enum bkey_type key_type, + struct bkey_i *k, enum btree_node_type key_type, const char *type, int write) { void *next = vstruct_next(entry); const char *invalid; + unsigned version = le32_to_cpu(jset->version); int ret = 0; if (journal_entry_err_on(!k->k.u64s, c, @@ -174,14 +175,17 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, } if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN) - bch2_bkey_swab(key_type, NULL, bkey_to_packed(k)); + bch2_bkey_swab(NULL, bkey_to_packed(k)); - invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k)); + if (!write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(key_type, bkey_to_packed(k), write); + + invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), key_type); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, key_type, - bkey_i_to_s_c(k)); + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k)); mustfix_fsck_err(c, "invalid %s in journal: %s\n%s", type, invalid, buf); @@ -190,6 +194,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, journal_entry_null_range(vstruct_next(entry), next); return 0; } + + if (write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(key_type, bkey_to_packed(k), write); fsck_err: return ret; } @@ -203,8 +211,8 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c, vstruct_for_each(entry, k) { int ret = journal_validate_key(c, jset, entry, k, - bkey_type(entry->level, - entry->btree_id), + __btree_node_type(entry->level, + entry->btree_id), "key", write); if (ret) return ret; @@ -351,14 +359,17 @@ static int jset_validate(struct bch_fs *c, { size_t bytes = vstruct_bytes(jset); struct bch_csum csum; + unsigned version; int ret = 0; if (le64_to_cpu(jset->magic) != jset_magic(c)) return JOURNAL_ENTRY_NONE; - if (le32_to_cpu(jset->version) != BCACHE_JSET_VERSION) { - bch_err(c, "unknown journal entry version %u", - le32_to_cpu(jset->version)); + version = le32_to_cpu(jset->version); + if ((version != BCH_JSET_VERSION_OLD && + version < bcachefs_metadata_version_min) || + version >= bcachefs_metadata_version_max) { + bch_err(c, "unknown journal entry version %u", jset->version); return BCH_FSCK_UNKNOWN_VERSION; } @@ -929,7 +940,6 @@ static void __journal_write_alloc(struct journal *j, unsigned replicas_want) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bkey_i_extent *e = bkey_i_to_extent(&w->key); struct journal_device *ja; struct bch_dev *ca; unsigned i; @@ -951,13 +961,14 @@ static void __journal_write_alloc(struct journal *j, if (!ca->mi.durability || ca->mi.state != BCH_MEMBER_STATE_RW || !ja->nr || - bch2_extent_has_device(extent_i_to_s_c(e), ca->dev_idx) || + bch2_bkey_has_device(bkey_i_to_s_c(&w->key), + ca->dev_idx) || sectors > ja->sectors_free) continue; bch2_dev_stripe_increment(c, ca, &j->wp.stripe); - extent_ptr_append(e, + bch2_bkey_append_ptr(&w->key, (struct bch_extent_ptr) { .offset = bucket_to_sector(ca, ja->buckets[ja->cur_idx]) + @@ -1096,7 +1107,7 @@ static void journal_write_done(struct closure *cl) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *w = journal_prev_buf(j); struct bch_devs_list devs = - bch2_extent_devs(bkey_i_to_s_c_extent(&w->key)); + bch2_bkey_devs(bkey_i_to_s_c(&w->key)); u64 seq = le64_to_cpu(w->data->seq); u64 last_seq = le64_to_cpu(w->data->last_seq); @@ -1158,7 +1169,7 @@ static void journal_write_endio(struct bio *bio) unsigned long flags; spin_lock_irqsave(&j->err_lock, flags); - bch2_extent_drop_device(bkey_i_to_s_extent(&w->key), ca->dev_idx); + bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx); spin_unlock_irqrestore(&j->err_lock, flags); } @@ -1175,6 +1186,7 @@ void bch2_journal_write(struct closure *cl) struct jset *jset; struct bio *bio; struct bch_extent_ptr *ptr; + bool validate_before_checksum = false; unsigned i, sectors, bytes; journal_buf_realloc(j, w); @@ -1196,12 +1208,22 @@ void bch2_journal_write(struct closure *cl) jset->read_clock = cpu_to_le16(c->bucket_clock[READ].hand); jset->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand); jset->magic = cpu_to_le64(jset_magic(c)); - jset->version = cpu_to_le32(BCACHE_JSET_VERSION); + + jset->version = c->sb.version < bcachefs_metadata_version_new_versioning + ? cpu_to_le32(BCH_JSET_VERSION_OLD) + : cpu_to_le32(c->sb.version); SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN); SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c)); - if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) && + if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) + validate_before_checksum = true; + + if (le32_to_cpu(jset->version) < + bcachefs_metadata_version_bkey_renumber) + validate_before_checksum = true; + + if (validate_before_checksum && jset_validate_entries(c, jset, WRITE)) goto err; @@ -1212,7 +1234,7 @@ void bch2_journal_write(struct closure *cl) jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); - if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) && + if (!validate_before_checksum && jset_validate_entries(c, jset, WRITE)) goto err; diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index df4fbae..3f26f45 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -14,7 +14,7 @@ #include "replicas.h" #include "super-io.h" -static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, +static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, unsigned dev_idx, int flags, bool metadata) { unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas; @@ -22,9 +22,9 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED; unsigned nr_good; - bch2_extent_drop_device(e, dev_idx); + bch2_bkey_drop_device(k, dev_idx); - nr_good = bch2_extent_durability(c, e.c); + nr_good = bch2_bkey_durability(c, k.s_c); if ((!nr_good && !(flags & lost)) || (nr_good < replicas && !(flags & degraded))) return -EINVAL; @@ -35,7 +35,6 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct bkey_s_c k; - struct bkey_s_extent e; BKEY_PADDED(key) tmp; struct btree_iter iter; int ret = 0; @@ -50,7 +49,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) !(ret = btree_iter_err(k))) { if (!bkey_extent_is_data(k.k) || !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k); + ret = bch2_mark_bkey_replicas(c, k); if (ret) break; bch2_btree_iter_next(&iter); @@ -58,18 +57,18 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) } bkey_reassemble(&tmp.key, k); - e = bkey_i_to_s_extent(&tmp.key); - ret = drop_dev_ptrs(c, e, dev_idx, flags, false); + ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key), + dev_idx, flags, false); if (ret) break; /* * If the new extent no longer has any pointers, bch2_extent_normalize() * will do the appropriate thing with it (turning it into a - * KEY_TYPE_ERROR key, or just a discard if it was a cached extent) + * KEY_TYPE_error key, or just a discard if it was a cached extent) */ - bch2_extent_normalize(c, e.s); + bch2_extent_normalize(c, bkey_i_to_s(&tmp.key)); iter.pos = bkey_start_pos(&tmp.key.k); @@ -117,7 +116,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; - struct bkey_i_extent *new_key; + struct bkey_i_btree_ptr *new_key; retry: if (!bch2_extent_has_device(bkey_i_to_s_c_extent(&b->key), dev_idx)) { @@ -129,15 +128,14 @@ retry: */ bch2_btree_iter_downgrade(&iter); - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); if (ret) goto err; } else { bkey_copy(&tmp.k, &b->key); - new_key = bkey_i_to_extent(&tmp.k); + new_key = bkey_i_to_btree_ptr(&tmp.k); - ret = drop_dev_ptrs(c, extent_i_to_s(new_key), + ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i), dev_idx, flags, true); if (ret) goto err; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 885792b..8c95aa9 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -100,8 +100,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_cut_back(insert->k.p, &new->k); if (m->data_cmd == DATA_REWRITE) - bch2_extent_drop_device(extent_i_to_s(insert), - m->data_opts.rewrite_dev); + bch2_bkey_drop_device(extent_i_to_s(insert).s, + m->data_opts.rewrite_dev); extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) { @@ -132,8 +132,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) * has fewer replicas than when we last looked at it - meaning * we need to get a disk reservation here: */ - nr = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) - - (bch2_extent_nr_dirty_ptrs(k) + m->nr_ptrs_reserved); + nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) - + (bch2_bkey_nr_dirty_ptrs(k) + m->nr_ptrs_reserved); if (nr > 0) { /* * can't call bch2_disk_reservation_add() with btree @@ -243,7 +243,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, switch (data_cmd) { case DATA_ADD_REPLICAS: { int nr = (int) io_opts.data_replicas - - bch2_extent_nr_dirty_ptrs(k); + bch2_bkey_nr_dirty_ptrs(k); if (nr > 0) { m->op.nr_replicas = m->nr_ptrs_reserved = nr; @@ -478,7 +478,6 @@ int bch2_move_data(struct bch_fs *c, struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); BKEY_PADDED(k) tmp; struct bkey_s_c k; - struct bkey_s_c_extent e; struct data_opts data_opts; enum data_cmd data_cmd; u64 delay, cur_inum = U64_MAX; @@ -531,8 +530,6 @@ peek: if (!bkey_extent_is_data(k.k)) goto next_nondata; - e = bkey_s_c_to_extent(k); - if (cur_inum != k.k->p.inode) { struct bch_inode_unpacked inode; @@ -546,8 +543,7 @@ peek: goto peek; } - switch ((data_cmd = pred(c, arg, BKEY_TYPE_EXTENTS, e, - &io_opts, &data_opts))) { + switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -582,7 +578,7 @@ peek: if (rate) bch2_ratelimit_increment(rate, k.k->size); next: - atomic64_add(k.k->size * bch2_extent_nr_dirty_ptrs(k), + atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k), &stats->sectors_seen); next_nondata: bch2_btree_iter_next(&stats->iter); @@ -614,7 +610,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c) for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k); + ret = bch2_mark_bkey_replicas(c, k); if (ret) break; } @@ -638,8 +634,7 @@ static int bch2_gc_btree_replicas(struct bch_fs *c) for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); bch2_btree_iter_cond_resched(&iter); } @@ -669,10 +664,9 @@ static int bch2_move_btree(struct bch_fs *c, for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - switch ((cmd = pred(c, arg, BKEY_TYPE_BTREE, - bkey_i_to_s_c_extent(&b->key), - &io_opts, - &data_opts))) { + switch ((cmd = pred(c, arg, + bkey_i_to_s_c(&b->key), + &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -698,8 +692,7 @@ next: #if 0 static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { @@ -708,33 +701,38 @@ static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, #endif static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - unsigned nr_good = bch2_extent_durability(c, e); - unsigned replicas = type == BKEY_TYPE_BTREE - ? c->opts.metadata_replicas - : io_opts->data_replicas; + unsigned nr_good = bch2_bkey_durability(c, k); + unsigned replicas = 0; + + switch (k.k->type) { + case KEY_TYPE_btree_ptr: + replicas = c->opts.metadata_replicas; + break; + case KEY_TYPE_extent: + replicas = io_opts->data_replicas; + break; + } if (!nr_good || nr_good >= replicas) return DATA_SKIP; data_opts->target = 0; - data_opts->btree_insert_flags = 0; + data_opts->btree_insert_flags = 0; return DATA_ADD_REPLICAS; } static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { struct bch_ioctl_data *op = arg; - if (!bch2_extent_has_device(e, op->migrate.dev)) + if (!bch2_bkey_has_device(k, op->migrate.dev)) return DATA_SKIP; data_opts->target = 0; diff --git a/libbcachefs/move.h b/libbcachefs/move.h index bc87e06..b3bee07 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -46,7 +46,7 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *, struct bkey_s_c); typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, - enum bkey_type, struct bkey_s_c_extent, + struct bkey_s_c, struct bch_io_opts *, struct data_opts *); int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 70318f2..d689082 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -65,36 +65,42 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) } static bool __copygc_pred(struct bch_dev *ca, - struct bkey_s_c_extent e) + struct bkey_s_c k) { copygc_heap *h = &ca->copygc_heap; - const struct bch_extent_ptr *ptr = - bch2_extent_has_device(e, ca->dev_idx); - if (ptr) { - struct copygc_heap_entry search = { .offset = ptr->offset }; + switch (k.k->type) { + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const struct bch_extent_ptr *ptr = + bch2_extent_has_device(e, ca->dev_idx); - ssize_t i = eytzinger0_find_le(h->data, h->used, - sizeof(h->data[0]), - bucket_offset_cmp, &search); + if (ptr) { + struct copygc_heap_entry search = { .offset = ptr->offset }; - return (i >= 0 && - ptr->offset < h->data[i].offset + ca->mi.bucket_size && - ptr->gen == h->data[i].gen); + ssize_t i = eytzinger0_find_le(h->data, h->used, + sizeof(h->data[0]), + bucket_offset_cmp, &search); + + return (i >= 0 && + ptr->offset < h->data[i].offset + ca->mi.bucket_size && + ptr->gen == h->data[i].gen); + } + break; + } } return false; } static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { struct bch_dev *ca = arg; - if (!__copygc_pred(ca, e)) + if (!__copygc_pred(ca, k)) return DATA_SKIP; data_opts->target = dev_to_target(ca->dev_idx); diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 8ffae3d..5c74401 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -180,6 +180,9 @@ enum opt_type { OPT_BOOL(), \ NO_SB_OPT, false) \ BCH_OPT(nostart, u8, OPT_INTERNAL, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(version_upgrade, u8, OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false) diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index 75104ea..95ff0ca 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -21,23 +21,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_quota dq; - if (k.k->p.inode >= QTYP_NR) return "invalid quota type"; - switch (k.k->type) { - case BCH_QUOTA: { - dq = bkey_s_c_to_quota(k); - - if (bkey_val_bytes(k.k) != sizeof(struct bch_quota)) - return "incorrect value size"; + if (bkey_val_bytes(k.k) != sizeof(struct bch_quota)) + return "incorrect value size"; - return NULL; - } - default: - return "invalid type"; - } + return NULL; } static const char * const bch2_quota_counters[] = { @@ -48,20 +38,14 @@ static const char * const bch2_quota_counters[] = { void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_quota dq; + struct bkey_s_c_quota dq = bkey_s_c_to_quota(k); unsigned i; - switch (k.k->type) { - case BCH_QUOTA: - dq = bkey_s_c_to_quota(k); - - for (i = 0; i < Q_COUNTERS; i++) - pr_buf(out, "%s hardlimit %llu softlimit %llu", - bch2_quota_counters[i], - le64_to_cpu(dq.v->c[i].hardlimit), - le64_to_cpu(dq.v->c[i].softlimit)); - break; - } + for (i = 0; i < Q_COUNTERS; i++) + pr_buf(out, "%s hardlimit %llu softlimit %llu", + bch2_quota_counters[i], + le64_to_cpu(dq.v->c[i].hardlimit), + le64_to_cpu(dq.v->c[i].softlimit)); } #ifdef CONFIG_BCACHEFS_QUOTA @@ -177,7 +161,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, BUG_ON((s64) n < 0); - if (mode == BCH_QUOTA_NOCHECK) + if (mode == KEY_TYPE_QUOTA_NOCHECK) return 0; if (v <= 0) { @@ -200,7 +184,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, if (qc->hardlimit && qc->hardlimit < n && !ignore_hardlimit(q)) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, HARDWARN); @@ -211,7 +195,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, qc->timer && ktime_get_real_seconds() >= qc->timer && !ignore_hardlimit(q)) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN); @@ -220,7 +204,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, if (qc->softlimit && qc->softlimit < n && qc->timer == 0) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, SOFTWARN); @@ -311,13 +295,13 @@ int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes, ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC, dst_q[i]->c[Q_SPC].v + space, - BCH_QUOTA_PREALLOC); + KEY_TYPE_QUOTA_PREALLOC); if (ret) goto err; ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO, dst_q[i]->c[Q_INO].v + 1, - BCH_QUOTA_PREALLOC); + KEY_TYPE_QUOTA_PREALLOC); if (ret) goto err; } @@ -346,7 +330,7 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) BUG_ON(k.k->p.inode >= QTYP_NR); switch (k.k->type) { - case BCH_QUOTA: + case KEY_TYPE_quota: dq = bkey_s_c_to_quota(k); q = &c->quotas[k.k->p.inode]; @@ -446,15 +430,15 @@ int bch2_fs_quota_read(struct bch_fs *c) for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, BTREE_ITER_PREFETCH, k) { switch (k.k->type) { - case BCH_INODE_FS: + case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u); if (ret) return ret; bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors, - BCH_QUOTA_NOCHECK); + KEY_TYPE_QUOTA_NOCHECK); bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, - BCH_QUOTA_NOCHECK); + KEY_TYPE_QUOTA_NOCHECK); } } return bch2_btree_iter_unlock(&iter) ?: ret; @@ -699,22 +683,19 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid, struct bch_fs *c = sb->s_fs_info; struct bch_memquota_type *q = &c->quotas[kqid->type]; qid_t qid = from_kqid(&init_user_ns, *kqid); - struct genradix_iter iter = genradix_iter_init(&q->table, qid); + struct genradix_iter iter; struct bch_memquota *mq; int ret = 0; mutex_lock(&q->lock); - while ((mq = genradix_iter_peek(&iter, &q->table))) { + genradix_for_each_from(&q->table, iter, mq, qid) if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) { __bch2_quota_get(qdq, mq); *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos); goto found; } - genradix_iter_advance(&iter, &q->table); - } - ret = -ENOENT; found: mutex_unlock(&q->lock); @@ -745,7 +726,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, return ret; switch (k.k->type) { - case BCH_QUOTA: + case KEY_TYPE_quota: new_quota.v = *bkey_s_c_to_quota(k).v; break; } diff --git a/libbcachefs/quota.h b/libbcachefs/quota.h index 325b9fc..0c3eb69 100644 --- a/libbcachefs/quota.h +++ b/libbcachefs/quota.h @@ -9,15 +9,15 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_quota; const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_quota_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_quota (struct bkey_ops) { \ .key_invalid = bch2_quota_invalid, \ .val_to_text = bch2_quota_to_text, \ } enum quota_acct_mode { - BCH_QUOTA_PREALLOC, - BCH_QUOTA_WARN, - BCH_QUOTA_NOCHECK, + KEY_TYPE_QUOTA_PREALLOC, + KEY_TYPE_QUOTA_WARN, + KEY_TYPE_QUOTA_NOCHECK, }; static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u) diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 25d72de..dc6ca94 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -69,28 +69,34 @@ void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors) } static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; + switch (k.k->type) { + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - /* Make sure we have room to add a new pointer: */ - if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > - BKEY_EXTENT_VAL_U64s_MAX) - return DATA_SKIP; + /* Make sure we have room to add a new pointer: */ + if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > + BKEY_EXTENT_VAL_U64s_MAX) + return DATA_SKIP; - extent_for_each_ptr_decode(e, p, entry) - if (rebalance_ptr_pred(c, p, io_opts)) - goto found; + extent_for_each_ptr_decode(e, p, entry) + if (rebalance_ptr_pred(c, p, io_opts)) + goto found; - return DATA_SKIP; + return DATA_SKIP; found: - data_opts->target = io_opts->background_target; - data_opts->btree_insert_flags = 0; - return DATA_ADD_REPLICAS; + data_opts->target = io_opts->background_target; + data_opts->btree_insert_flags = 0; + return DATA_ADD_REPLICAS; + } + default: + return DATA_SKIP; + } } struct rebalance_work { diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 0e3c321..e9e4a1a 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -146,6 +146,10 @@ int bch2_fs_recovery(struct bch_fs *c) mutex_unlock(&c->sb_lock); goto err; } + + if (le16_to_cpu(c->disk_sb.sb->version) < + bcachefs_metadata_version_bkey_renumber) + bch2_sb_clean_renumber(clean, READ); } mutex_unlock(&c->sb_lock); @@ -264,12 +268,18 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; - if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) { - mutex_lock(&c->sb_lock); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; - mutex_unlock(&c->sb_lock); + mutex_lock(&c->sb_lock); + if (c->opts.version_upgrade) { + if (c->sb.version < bcachefs_metadata_version_new_versioning) + c->disk_sb.sb->version_min = + le16_to_cpu(bcachefs_metadata_version_min); + c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); } + if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; + mutex_unlock(&c->sb_lock); + if (enabled_qtypes(c)) { bch_verbose(c, "reading quotas:"); ret = bch2_fs_quota_read(c); @@ -304,6 +314,9 @@ int bch2_fs_initialize(struct bch_fs *c) set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags); + for (i = 0; i < BTREE_ID_NR; i++) + bch2_btree_root_alloc(c, i); + ret = bch2_initial_gc(c, &journal); if (ret) goto err; @@ -315,9 +328,6 @@ int bch2_fs_initialize(struct bch_fs *c) goto err; } - for (i = 0; i < BTREE_ID_NR; i++) - bch2_btree_root_alloc(c, i); - /* * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: @@ -378,9 +388,12 @@ int bch2_fs_initialize(struct bch_fs *c) goto err; mutex_lock(&c->sb_lock); + c->disk_sb.sb->version = c->disk_sb.sb->version_min = + le16_to_cpu(bcachefs_metadata_version_current); + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; + SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; bch2_write_super(c); mutex_unlock(&c->sb_lock); diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 0ba5ce5..6ab4e36 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -72,64 +72,57 @@ void bch2_cpu_replicas_to_text(struct printbuf *out, static void extent_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *r) { - if (bkey_extent_is_data(k.k)) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - r->nr_required = 1; + r->nr_required = 1; - extent_for_each_ptr_decode(e, p, entry) { - if (p.ptr.cached) - continue; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.ptr.cached) + continue; - if (p.ec_nr) { - r->nr_devs = 0; - break; - } - - r->devs[r->nr_devs++] = p.ptr.dev; + if (p.ec_nr) { + r->nr_devs = 0; + break; } + + r->devs[r->nr_devs++] = p.ptr.dev; } } static void stripe_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *r) { - if (k.k->type == BCH_STRIPE) { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - const struct bch_extent_ptr *ptr; + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + const struct bch_extent_ptr *ptr; - r->nr_required = s.v->nr_blocks - s.v->nr_redundant; + r->nr_required = s.v->nr_blocks - s.v->nr_redundant; - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) - r->devs[r->nr_devs++] = ptr->dev; - } + for (ptr = s.v->ptrs; + ptr < s.v->ptrs + s.v->nr_blocks; + ptr++) + r->devs[r->nr_devs++] = ptr->dev; } -static void bkey_to_replicas(enum bkey_type type, - struct bkey_s_c k, +static void bkey_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *e) { e->nr_devs = 0; - switch (type) { - case BKEY_TYPE_BTREE: + switch (k.k->type) { + case KEY_TYPE_btree_ptr: e->data_type = BCH_DATA_BTREE; extent_to_replicas(k, e); break; - case BKEY_TYPE_EXTENTS: + case KEY_TYPE_extent: e->data_type = BCH_DATA_USER; extent_to_replicas(k, e); break; - case BKEY_TYPE_EC: + case KEY_TYPE_stripe: e->data_type = BCH_DATA_USER; stripe_to_replicas(k, e); break; - default: - break; } replicas_entry_sort(e); @@ -295,26 +288,21 @@ int bch2_mark_replicas(struct bch_fs *c, return __bch2_mark_replicas(c, &search.e); } -int bch2_mark_bkey_replicas(struct bch_fs *c, - enum bkey_type type, - struct bkey_s_c k) +int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) { struct bch_replicas_entry_padded search; + struct bch_devs_list cached = bch2_bkey_cached_devs(k); + unsigned i; int ret; memset(&search, 0, sizeof(search)); - if (type == BKEY_TYPE_EXTENTS) { - struct bch_devs_list cached = bch2_bkey_cached_devs(k); - unsigned i; + for (i = 0; i < cached.nr; i++) + if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED, + bch2_dev_list_single(cached.devs[i])))) + return ret; - for (i = 0; i < cached.nr; i++) - if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED, - bch2_dev_list_single(cached.devs[i])))) - return ret; - } - - bkey_to_replicas(type, k, &search.e); + bkey_to_replicas(k, &search.e); return search.e.nr_devs ? __bch2_mark_replicas(c, &search.e) @@ -718,26 +706,22 @@ bool bch2_replicas_marked(struct bch_fs *c, } bool bch2_bkey_replicas_marked(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, bool check_gc_replicas) { struct bch_replicas_entry_padded search; + struct bch_devs_list cached = bch2_bkey_cached_devs(k); + unsigned i; memset(&search, 0, sizeof(search)); - if (type == BKEY_TYPE_EXTENTS) { - struct bch_devs_list cached = bch2_bkey_cached_devs(k); - unsigned i; - - for (i = 0; i < cached.nr; i++) - if (!bch2_replicas_marked(c, BCH_DATA_CACHED, - bch2_dev_list_single(cached.devs[i]), - check_gc_replicas)) - return false; - } + for (i = 0; i < cached.nr; i++) + if (!bch2_replicas_marked(c, BCH_DATA_CACHED, + bch2_dev_list_single(cached.devs[i]), + check_gc_replicas)) + return false; - bkey_to_replicas(type, k, &search.e); + bkey_to_replicas(k, &search.e); return search.e.nr_devs ? replicas_has_entry(c, &search.e, check_gc_replicas) diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index 7fee927..87246a0 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -5,12 +5,11 @@ bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type, struct bch_devs_list, bool); -bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type, +bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c, bool); int bch2_mark_replicas(struct bch_fs *, enum bch_data_type, struct bch_devs_list); -int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type, - struct bkey_s_c); +int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index 7eff5a4..032b34a 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -117,7 +117,6 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, struct bch_hash_desc { enum btree_id btree_id; u8 key_type; - u8 whiteout_type; u64 (*hash_key)(const struct bch_hash_info *, const void *); u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c); @@ -148,7 +147,7 @@ bch2_hash_lookup(struct btree_trans *trans, if (k.k->type == desc.key_type) { if (!desc.cmp_key(k, key)) return iter; - } else if (k.k->type == desc.whiteout_type) { + } else if (k.k->type == KEY_TYPE_whiteout) { ; } else { /* hole, not found */ @@ -201,7 +200,7 @@ static inline int bch2_hash_needs_whiteout(struct btree_trans *trans, for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { if (k.k->type != desc.key_type && - k.k->type != desc.whiteout_type) + k.k->type != KEY_TYPE_whiteout) return false; if (k.k->type == desc.key_type && @@ -244,7 +243,7 @@ static inline int __bch2_hash_set(struct btree_trans *trans, return PTR_ERR(slot); } - if (k.k->type != desc.whiteout_type) + if (k.k->type != KEY_TYPE_whiteout) goto not_found; } @@ -294,7 +293,7 @@ static inline int bch2_hash_delete_at(struct btree_trans *trans, bkey_init(&delete->k); delete->k.p = iter->pos; - delete->k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED; + delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted; bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, delete)); return 0; diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 7192007..c5eaf15 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -232,21 +232,25 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) struct bch_sb_field *f; struct bch_sb_field_members *mi; const char *err; + u32 version, version_min; u16 block_size; - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_MIN || - le64_to_cpu(sb->version) > BCH_SB_VERSION_MAX) - return"Unsupported superblock version"; + version = le16_to_cpu(sb->version); + version_min = version >= bcachefs_metadata_version_new_versioning + ? le16_to_cpu(sb->version_min) + : version; + + if (version >= bcachefs_metadata_version_max || + version_min < bcachefs_metadata_version_min) + return "Unsupported superblock version"; + + if (version_min > version) + return "Bad minimum version"; if (sb->features[1] || (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) return "Filesystem has incompatible features"; - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) { - SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7); - SET_BCH_SB_POSIX_ACL(sb, 1); - } - block_size = le16_to_cpu(sb->block_size); if (!is_power_of_2(block_size) || @@ -333,13 +337,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) return err; } - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_NONCE_V1 && - bch2_sb_get_crypt(sb) && - BCH_SB_INITIALIZED(sb)) - return "Incompatible extent nonces"; - - sb->version = cpu_to_le64(BCH_SB_VERSION_MAX); - return NULL; } @@ -356,6 +353,7 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.uuid = src->uuid; c->sb.user_uuid = src->user_uuid; + c->sb.version = le16_to_cpu(src->version); c->sb.nr_devices = src->nr_devices; c->sb.clean = BCH_SB_CLEAN(src); c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); @@ -377,6 +375,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) unsigned i; dst->version = src->version; + dst->version_min = src->version_min; dst->seq = src->seq; dst->uuid = src->uuid; dst->user_uuid = src->user_uuid; @@ -476,8 +475,8 @@ reread: if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC)) return "Not a bcachefs superblock"; - if (le64_to_cpu(sb->sb->version) < BCH_SB_VERSION_MIN || - le64_to_cpu(sb->sb->version) > BCH_SB_VERSION_MAX) + if (le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_min || + le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max) return "Unsupported superblock version"; bytes = vstruct_bytes(sb->sb); @@ -843,12 +842,6 @@ static const char *bch2_sb_validate_members(struct bch_sb *sb, return "bucket size smaller than btree node size"; } - if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) - for (m = mi->members; - m < mi->members + sb->nr_devices; - m++) - SET_BCH_MEMBER_DATA_ALLOWED(m, ~0); - return NULL; } @@ -878,6 +871,16 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = { /* BCH_SB_FIELD_clean: */ +void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) +{ + struct jset_entry *entry; + + for (entry = clean->start; + entry < (struct jset_entry *) vstruct_end(&clean->field); + entry = vstruct_next(entry)) + bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write); +} + void bch2_fs_mark_clean(struct bch_fs *c, bool clean) { struct bch_sb_field_clean *sb_clean; @@ -932,6 +935,10 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean) BUG_ON(entry != vstruct_end(&sb_clean->field)); + if (le16_to_cpu(c->disk_sb.sb->version) < + bcachefs_metadata_version_bkey_renumber) + bch2_sb_clean_renumber(sb_clean, WRITE); + mutex_unlock(&c->btree_root_lock); write_super: bch2_write_super(c); diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index c66fd97..b493d62 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -134,6 +134,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) /* BCH_SB_FIELD_clean: */ +void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int); + void bch2_fs_mark_clean(struct bch_fs *, bool); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 0eb6b7e..b33117d 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -9,6 +9,7 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "bkey_sort.h" #include "btree_cache.h" #include "btree_gc.h" #include "btree_update_interior.h" @@ -580,7 +581,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_LIST_HEAD(&c->ec_new_stripe_list); mutex_init(&c->ec_new_stripe_lock); - mutex_init(&c->ec_stripes_lock); + mutex_init(&c->ec_stripe_create_lock); spin_lock_init(&c->ec_stripes_heap_lock); seqcount_init(&c->gc_pos_lock); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 0c3bdcd..42e09f5 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -276,7 +276,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) return -EPERM; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) - if (k.k->type == BCH_EXTENT) { + if (k.k->type == KEY_TYPE_extent) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 31f3b98..85d8bdd 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -61,8 +61,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) const struct bch_hash_desc bch2_xattr_hash_desc = { .btree_id = BTREE_ID_XATTRS, - .key_type = BCH_XATTR, - .whiteout_type = BCH_XATTR_WHITEOUT, + .key_type = KEY_TYPE_xattr, .hash_key = xattr_hash_key, .hash_bkey = xattr_hash_bkey, .cmp_key = xattr_cmp_key, @@ -72,71 +71,50 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k) { const struct xattr_handler *handler; - struct bkey_s_c_xattr xattr; - - switch (k.k->type) { - case BCH_XATTR: - if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr)) - return "value too small"; - - xattr = bkey_s_c_to_xattr(k); + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - if (bkey_val_u64s(k.k) < - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len))) - return "value too small"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr)) + return "value too small"; - if (bkey_val_u64s(k.k) > - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len) + 4)) - return "value too big"; + if (bkey_val_u64s(k.k) < + xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len))) + return "value too small"; - handler = bch2_xattr_type_to_handler(xattr.v->x_type); - if (!handler) - return "invalid type"; + if (bkey_val_u64s(k.k) > + xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len) + 4)) + return "value too big"; - if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) - return "xattr name has invalid characters"; + handler = bch2_xattr_type_to_handler(xattr.v->x_type); + if (!handler) + return "invalid type"; - return NULL; - case BCH_XATTR_WHITEOUT: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) + return "xattr name has invalid characters"; - default: - return "invalid type"; - } + return NULL; } void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { const struct xattr_handler *handler; - struct bkey_s_c_xattr xattr; + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - switch (k.k->type) { - case BCH_XATTR: - xattr = bkey_s_c_to_xattr(k); + handler = bch2_xattr_type_to_handler(xattr.v->x_type); + if (handler && handler->prefix) + pr_buf(out, "%s", handler->prefix); + else if (handler) + pr_buf(out, "(type %u)", xattr.v->x_type); + else + pr_buf(out, "(unknown type %u)", xattr.v->x_type); - handler = bch2_xattr_type_to_handler(xattr.v->x_type); - if (handler && handler->prefix) - pr_buf(out, "%s", handler->prefix); - else if (handler) - pr_buf(out, "(type %u)", xattr.v->x_type); - else - pr_buf(out, "(unknown type %u)", xattr.v->x_type); - - bch_scnmemcpy(out, xattr.v->x_name, - xattr.v->x_name_len); - pr_buf(out, ":"); - bch_scnmemcpy(out, xattr_val(xattr.v), - le16_to_cpu(xattr.v->x_val_len)); - break; - case BCH_XATTR_WHITEOUT: - pr_buf(out, "whiteout"); - break; - } + bch_scnmemcpy(out, xattr.v->x_name, + xattr.v->x_name_len); + pr_buf(out, ":"); + bch_scnmemcpy(out, xattr_val(xattr.v), + le16_to_cpu(xattr.v->x_val_len)); } int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, @@ -260,7 +238,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (k.k->p.inode > inum) break; - if (k.k->type != BCH_XATTR) + if (k.k->type != KEY_TYPE_xattr) continue; xattr = bkey_s_c_to_xattr(k).v; @@ -313,7 +291,7 @@ static const struct xattr_handler bch_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_USER, + .flags = KEY_TYPE_XATTR_INDEX_USER, }; static bool bch2_xattr_trusted_list(struct dentry *dentry) @@ -326,14 +304,14 @@ static const struct xattr_handler bch_xattr_trusted_handler = { .list = bch2_xattr_trusted_list, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_TRUSTED, + .flags = KEY_TYPE_XATTR_INDEX_TRUSTED, }; static const struct xattr_handler bch_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_SECURITY, + .flags = KEY_TYPE_XATTR_INDEX_SECURITY, }; #ifndef NO_BCACHEFS_FS @@ -471,13 +449,13 @@ const struct xattr_handler *bch2_xattr_handlers[] = { }; static const struct xattr_handler *bch_xattr_handler_map[] = { - [BCH_XATTR_INDEX_USER] = &bch_xattr_user_handler, - [BCH_XATTR_INDEX_POSIX_ACL_ACCESS] = + [KEY_TYPE_XATTR_INDEX_USER] = &bch_xattr_user_handler, + [KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, - [BCH_XATTR_INDEX_POSIX_ACL_DEFAULT] = + [KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler, - [BCH_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler, - [BCH_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler, + [KEY_TYPE_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler, + [KEY_TYPE_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler, }; static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type) diff --git a/libbcachefs/xattr.h b/libbcachefs/xattr.h index 42b7ba3..e9b2776 100644 --- a/libbcachefs/xattr.h +++ b/libbcachefs/xattr.h @@ -8,7 +8,7 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_xattr_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_xattr (struct bkey_ops) { \ .key_invalid = bch2_xattr_invalid, \ .val_to_text = bch2_xattr_to_text, \ } -- 2.39.2