From 1251ea58a84e365db3754e6f5f57bd442484dc89 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Oct 2023 17:23:13 -0400 Subject: [PATCH] Update bcachefs sources to 7bf1ac0d46 bcachefs: Correctly initialize new buckets on device resize Signed-off-by: Hunter Shaffer --- .bcachefs_revision | 2 +- cmd_device.c | 22 +- cmd_format.c | 11 +- libbcachefs.c | 14 +- libbcachefs/alloc_background.c | 27 ++- libbcachefs/alloc_background.h | 1 + libbcachefs/bcachefs.h | 2 +- libbcachefs/bcachefs_format.h | 48 +++- libbcachefs/buckets.c | 2 - libbcachefs/buckets.h | 7 +- libbcachefs/checksum.c | 37 ++- libbcachefs/disk_groups.c | 24 +- libbcachefs/errcode.h | 1 + libbcachefs/fs.c | 6 +- libbcachefs/inode.c | 10 +- libbcachefs/journal_sb.c | 20 +- libbcachefs/opts.c | 5 + libbcachefs/opts.h | 1 + libbcachefs/recovery.c | 11 +- libbcachefs/replicas.c | 3 +- libbcachefs/sb-members.c | 416 +++++++++++++++++++++++---------- libbcachefs/sb-members.h | 8 +- libbcachefs/super-io.c | 26 +-- libbcachefs/super-io.h | 11 +- libbcachefs/super.c | 103 ++++---- libbcachefs/sysfs.c | 4 +- 26 files changed, 530 insertions(+), 292 deletions(-) diff --git a/.bcachefs_revision b/.bcachefs_revision index a507788..48318a7 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -a1b6677dca574a8bf904d9eea2b108474dc378d1 +7bf1ac0d46ebede68561e0476f7af9c07ac21de8 diff --git a/cmd_device.c b/cmd_device.c index 1914629..d0a3e72 100644 --- a/cmd_device.c +++ b/cmd_device.c @@ -414,7 +414,7 @@ int cmd_device_set_state(int argc, char *argv[]) if (ret) die("error opening %s: %s", dev_str, bch2_err_str(ret)); - struct bch_member *m = bch2_sb_get_members(sb.sb)->members + sb.sb->dev_idx; + struct bch_member *m = bch2_members_v2_get_mut(sb.sb, sb.sb->dev_idx); SET_BCH_MEMBER_STATE(m, new_state); @@ -510,16 +510,11 @@ int cmd_device_resize(int argc, char *argv[]) if (idx >= sb->nr_devices) die("error reading superblock: dev idx >= sb->nr_devices"); - struct bch_sb_field_members *mi = bch2_sb_get_members(sb); - if (!mi) - die("error reading superblock: no member info"); + struct bch_member m = bch2_sb_member_get(sb, idx); - /* could also just read this out of sysfs... meh */ - struct bch_member *m = mi->members + idx; + u64 nbuckets = size / le16_to_cpu(m.bucket_size); - u64 nbuckets = size / le16_to_cpu(m->bucket_size); - - if (nbuckets < le64_to_cpu(m->nbuckets)) + if (nbuckets < le64_to_cpu(m.nbuckets)) die("Shrinking not supported yet"); printf("resizing %s to %llu buckets\n", dev, nbuckets); @@ -616,14 +611,9 @@ int cmd_device_resize_journal(int argc, char *argv[]) if (idx >= sb->nr_devices) die("error reading superblock: dev idx >= sb->nr_devices"); - struct bch_sb_field_members *mi = bch2_sb_get_members(sb); - if (!mi) - die("error reading superblock: no member info"); - - /* could also just read this out of sysfs... meh */ - struct bch_member *m = mi->members + idx; + struct bch_member m = bch2_sb_member_get(sb, idx); - u64 nbuckets = size / le16_to_cpu(m->bucket_size); + u64 nbuckets = size / le16_to_cpu(m.bucket_size); printf("resizing journal on %s to %llu buckets\n", dev, nbuckets); bchu_disk_resize_journal(fs, idx, nbuckets); diff --git a/cmd_format.c b/cmd_format.c index 26a1cd9..4b1dcbe 100644 --- a/cmd_format.c +++ b/cmd_format.c @@ -245,7 +245,7 @@ int cmd_format(int argc, char *argv[]) buf.human_readable_units = true; - bch2_sb_to_text(&buf, sb, false, 1 << BCH_SB_FIELD_members); + bch2_sb_to_text(&buf, sb, false, 1 << BCH_SB_FIELD_members_v2); printf("%s", buf.buf); printbuf_exit(&buf); @@ -305,8 +305,9 @@ int cmd_show_super(int argc, char *argv[]) { "help", 0, NULL, 'h' }, { NULL } }; - unsigned fields = 1 << BCH_SB_FIELD_members; + unsigned fields = 0; bool print_layout = false; + bool print_default_fields = true; int opt; while ((opt = getopt_long(argc, argv, "f:lh", longopts, NULL)) != -1) @@ -316,6 +317,7 @@ int cmd_show_super(int argc, char *argv[]) ? ~0 : read_flag_list_or_die(optarg, bch2_sb_fields, "superblock field"); + print_default_fields = false; break; case 'l': print_layout = true; @@ -342,6 +344,11 @@ int cmd_show_super(int argc, char *argv[]) if (ret) die("Error opening %s: %s", dev, bch2_err_str(ret)); + if (print_default_fields) + fields = bch2_sb_get_members_v2(sb.sb) + ? 1 << BCH_SB_FIELD_members_v2 + : 1 << BCH_SB_FIELD_members_v1; + struct printbuf buf = PRINTBUF; buf.human_readable_units = true; diff --git a/libbcachefs.c b/libbcachefs.c index 44a5fbf..aac5084 100644 --- a/libbcachefs.c +++ b/libbcachefs.c @@ -150,7 +150,6 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, { struct bch_sb_handle sb = { NULL }; struct dev_opts *i; - struct bch_sb_field_members *mi; unsigned max_dev_block_size = 0; unsigned opt_id; @@ -222,12 +221,13 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, sb.sb->time_precision = cpu_to_le32(1); /* Member info: */ - mi = bch2_sb_resize_members(&sb, + struct bch_sb_field_members_v2 *mi = + bch2_sb_resize_members_v2(&sb, (sizeof(*mi) + sizeof(struct bch_member) * nr_devs) / sizeof(u64)); - + mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); for (i = devs; i < devs + nr_devs; i++) { - struct bch_member *m = mi->members + (i - devs); + struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs)); uuid_generate(m->uuid.b); m->nbuckets = cpu_to_le64(i->nbuckets); @@ -255,9 +255,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, * Recompute mi and m after each sb modification: its location * in memory may have changed due to reallocation. */ - mi = bch2_sb_get_members(sb.sb); - m = mi->members + (i - devs); - + m = bch2_members_v2_get_mut(sb.sb, (i - devs)); SET_BCH_MEMBER_GROUP(m, idx + 1); } @@ -279,6 +277,8 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, SET_BCH_SB_ENCRYPTION_TYPE(sb.sb, 1); } + bch_members_cpy_v2_v1(&sb); + for (i = devs; i < devs + nr_devs; i++) { u64 size_sectors = i->size >> 9; diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 19ef7a4..d1c323f 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -1831,29 +1831,33 @@ void bch2_do_invalidates(struct bch_fs *c) bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } -static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, - unsigned long *last_updated) +int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, + u64 bucket_start, u64 bucket_end) { struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bkey hole; - struct bpos end = POS(ca->dev_idx, ca->mi.nbuckets); - struct bch_member *m; + struct bpos end = POS(ca->dev_idx, bucket_end); + struct bch_member m; + unsigned long last_updated = jiffies; int ret; + BUG_ON(bucket_start > bucket_end); + BUG_ON(bucket_end > ca->mi.nbuckets); + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, - POS(ca->dev_idx, ca->mi.first_bucket), - BTREE_ITER_PREFETCH); + POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), + BTREE_ITER_PREFETCH); /* * Scan the alloc btree for every bucket on @ca, and add buckets to the * freespace/need_discard/need_gc_gens btrees as needed: */ while (1) { - if (*last_updated + HZ * 10 < jiffies) { + if (last_updated + HZ * 10 < jiffies) { bch_info(ca, "%s: currently at %llu/%llu", __func__, iter.pos.offset, ca->mi.nbuckets); - *last_updated = jiffies; + last_updated = jiffies; } bch2_trans_begin(trans); @@ -1922,8 +1926,8 @@ bkey_err: } mutex_lock(&c->sb_lock); - m = bch2_sb_get_members(c->disk_sb.sb)->members + ca->dev_idx; - SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); + m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_FREESPACE_INITIALIZED(&m, true); mutex_unlock(&c->sb_lock); return 0; @@ -1935,7 +1939,6 @@ int bch2_fs_freespace_init(struct bch_fs *c) unsigned i; int ret = 0; bool doing_init = false; - unsigned long last_updated = jiffies; /* * We can crash during the device add path, so we need to check this on @@ -1951,7 +1954,7 @@ int bch2_fs_freespace_init(struct bch_fs *c) doing_init = true; } - ret = bch2_dev_freespace_init(c, ca, &last_updated); + ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); if (ret) { percpu_ref_put(&ca->ref); bch_err_fn(c, ret); diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index c0914fe..9704206 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -245,6 +245,7 @@ static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); } +int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64); int bch2_fs_freespace_init(struct bch_fs *); void bch2_recalc_capacity(struct bch_fs *); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index ef5c078..e9d07f9 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -379,7 +379,7 @@ BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM #ifndef CONFIG_BCACHEFS_DEBUG -#define BCH_DEBUG_PARAM(name, description) static const bool bch2_##name; +#define BCH_DEBUG_PARAM(name, description) static const __maybe_unused bool bch2_##name; BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM #endif diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index f0d1304..99749f3 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1222,7 +1222,7 @@ struct bch_sb_field { #define BCH_SB_FIELDS() \ x(journal, 0) \ - x(members, 1) \ + x(members_v1, 1) \ x(crypt, 2) \ x(replicas_v0, 3) \ x(quota, 4) \ @@ -1231,7 +1231,8 @@ struct bch_sb_field { x(replicas, 7) \ x(journal_seq_blacklist, 8) \ x(journal_v2, 9) \ - x(counters, 10) + x(counters, 10) \ + x(members_v2, 11) enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, @@ -1264,10 +1265,23 @@ struct bch_sb_field_journal_v2 { } d[]; }; -/* BCH_SB_FIELD_members: */ +/* BCH_SB_FIELD_members_v1: */ #define BCH_MIN_NR_NBUCKETS (1 << 6) +#define BCH_IOPS_MEASUREMENTS() \ + x(seqread, 0) \ + x(seqwrite, 1) \ + x(randread, 2) \ + x(randwrite, 3) + +enum bch_iops_measurement { +#define x(t, n) BCH_IOPS_##t = n, + BCH_IOPS_MEASUREMENTS() +#undef x + BCH_IOPS_NR +}; + struct bch_member { __uuid_t uuid; __le64 nbuckets; /* device size */ @@ -1276,17 +1290,20 @@ struct bch_member { __le32 pad; __le64 last_mount; /* time_t */ - __le64 flags[2]; + __le64 flags; + __le32 iops[4]; }; -LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags[0], 0, 4) +#define BCH_MEMBER_V1_BYTES 56 + +LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4) /* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */ -LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags[0], 14, 15) -LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags[0], 15, 20) -LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags[0], 20, 28) -LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags[0], 28, 30) +LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15) +LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags, 15, 20) +LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28) +LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30) LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED, - struct bch_member, flags[0], 30, 31) + struct bch_member, flags, 30, 31) #if 0 LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20); @@ -1306,9 +1323,16 @@ enum bch_member_state { BCH_MEMBER_STATE_NR }; -struct bch_sb_field_members { +struct bch_sb_field_members_v1 { + struct bch_sb_field field; + struct bch_member _members[]; //Members are now variable size +}; + +struct bch_sb_field_members_v2 { struct bch_sb_field field; - struct bch_member members[]; + __le16 member_bytes; //size of single member entry + u8 pad[6]; + struct bch_member _members[]; }; /* BCH_SB_FIELD_crypt: */ diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index e7f4506..46b6406 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -367,7 +367,6 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, struct printbuf buf = PRINTBUF; percpu_down_read(&c->mark_lock); - buf.atomic++; idx = bch2_replicas_entry_idx(c, r); if (idx < 0 && @@ -795,7 +794,6 @@ static int mark_stripe_bucket(struct btree_trans *trans, /* * XXX doesn't handle deletion */ percpu_down_read(&c->mark_lock); - buf.atomic++; g = PTR_GC_BUCKET(ca, ptr); if (g->dirty_sectors || diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index ecbeb72..bf8d7f4 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -70,12 +70,15 @@ union ulong_byte_assert { static inline void bucket_unlock(struct bucket *b) { BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); - bit_spin_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); + + clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); + wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR); } static inline void bucket_lock(struct bucket *b) { - bit_spin_lock(BUCKET_LOCK_BITNR, (void *) &b->lock); + wait_on_bit_lock((void *) &b->lock, BUCKET_LOCK_BITNR, + TASK_UNINTERRUPTIBLE); } static inline struct bucket_array *gc_bucket_array(struct bch_dev *ca) diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c index 1948119..839f00d 100644 --- a/libbcachefs/checksum.c +++ b/libbcachefs/checksum.c @@ -362,7 +362,7 @@ struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a, state.type = type; bch2_checksum_init(&state); - state.seed = (u64 __force) a.lo; + state.seed = le64_to_cpu(a.lo); BUG_ON(!bch2_checksum_mergeable(type)); @@ -373,7 +373,7 @@ struct bch_csum bch2_checksum_merge(unsigned type, struct bch_csum a, page_address(ZERO_PAGE(0)), page_len); b_len -= page_len; } - a.lo = (__le64 __force) bch2_checksum_final(&state); + a.lo = cpu_to_le64(bch2_checksum_final(&state)); a.lo ^= b.lo; a.hi ^= b.hi; return a; @@ -534,16 +534,31 @@ static int __bch2_request_key(char *key_description, struct bch_key *key) { key_serial_t key_id; + key_id = request_key("user", key_description, NULL, + KEY_SPEC_SESSION_KEYRING); + if (key_id >= 0) + goto got_key; + key_id = request_key("user", key_description, NULL, KEY_SPEC_USER_KEYRING); - if (key_id < 0) - return -errno; + if (key_id >= 0) + goto got_key; + + key_id = request_key("user", key_description, NULL, + KEY_SPEC_USER_SESSION_KEYRING); + if (key_id >= 0) + goto got_key; + + return -errno; +got_key: if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key)) return -1; return 0; } + +#include "../crypto.h" #endif int bch2_request_key(struct bch_sb *sb, struct bch_key *key) @@ -556,6 +571,20 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key) ret = __bch2_request_key(key_description.buf, key); printbuf_exit(&key_description); + +#ifndef __KERNEL__ + if (ret) { + char *passphrase = read_passphrase("Enter passphrase: "); + struct bch_encrypted_key sb_key; + + bch2_passphrase_check(sb, passphrase, + key, &sb_key); + ret = 0; + } +#endif + + /* stash with memfd, pass memfd fd to mount */ + return ret; } diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c index b292dbe..43aad8b 100644 --- a/libbcachefs/disk_groups.c +++ b/libbcachefs/disk_groups.c @@ -25,19 +25,18 @@ static int bch2_sb_disk_groups_validate(struct bch_sb *sb, struct bch_sb_field_disk_groups *groups = field_to_type(f, disk_groups); struct bch_disk_group *g, *sorted = NULL; - struct bch_sb_field_members *mi = bch2_sb_get_members(sb); unsigned nr_groups = disk_groups_nr(groups); unsigned i, len; int ret = 0; for (i = 0; i < sb->nr_devices; i++) { - struct bch_member *m = mi->members + i; + struct bch_member m = bch2_sb_member_get(sb, i); unsigned group_id; - if (!BCH_MEMBER_GROUP(m)) + if (!BCH_MEMBER_GROUP(&m)) continue; - group_id = BCH_MEMBER_GROUP(m) - 1; + group_id = BCH_MEMBER_GROUP(&m) - 1; if (group_id >= nr_groups) { prt_printf(err, "disk %u has invalid label %u (have %u)", @@ -152,14 +151,12 @@ const struct bch_sb_field_ops bch_sb_field_ops_disk_groups = { int bch2_sb_disk_groups_to_cpu(struct bch_fs *c) { - struct bch_sb_field_members *mi; struct bch_sb_field_disk_groups *groups; struct bch_disk_groups_cpu *cpu_g, *old_g; unsigned i, g, nr_groups; lockdep_assert_held(&c->sb_lock); - mi = bch2_sb_get_members(c->disk_sb.sb); groups = bch2_sb_get_disk_groups(c->disk_sb.sb); nr_groups = disk_groups_nr(groups); @@ -182,13 +179,13 @@ int bch2_sb_disk_groups_to_cpu(struct bch_fs *c) } for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { - struct bch_member *m = mi->members + i; + struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, i); struct bch_disk_group_cpu *dst; - if (!bch2_member_exists(m)) + if (!bch2_member_exists(&m)) continue; - g = BCH_MEMBER_GROUP(m); + g = BCH_MEMBER_GROUP(&m); while (g) { dst = &cpu_g->entries[g - 1]; __set_bit(i, dst->devs.d); @@ -443,7 +440,7 @@ int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) if (ret) return ret; - mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; + mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); SET_BCH_MEMBER_GROUP(mi, v + 1); return 0; } @@ -528,12 +525,11 @@ void bch2_opt_target_to_text(struct printbuf *out, rcu_read_unlock(); } else { - struct bch_sb_field_members *mi = bch2_sb_get_members(sb); - struct bch_member *m = mi->members + t.dev; + struct bch_member m = bch2_sb_member_get(sb, t.dev); - if (bch2_dev_exists(sb, mi, t.dev)) { + if (bch2_dev_exists(sb, t.dev)) { prt_printf(out, "Device "); - pr_uuid(out, m->uuid.b); + pr_uuid(out, m.uuid.b); prt_printf(out, " (%u)", t.dev); } else { prt_printf(out, "Bad device %u", t.dev); diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 64f7176..7cc0837 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -91,6 +91,7 @@ x(ENOSPC, ENOSPC_sb_quota) \ x(ENOSPC, ENOSPC_sb_replicas) \ x(ENOSPC, ENOSPC_sb_members) \ + x(ENOSPC, ENOSPC_sb_members_v2) \ x(ENOSPC, ENOSPC_sb_crypt) \ x(ENOSPC, ENOSPC_btree_slot) \ x(ENOSPC, ENOSPC_snapshot_tree) \ diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 1354af2..89759e6 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -1595,7 +1595,7 @@ static struct bch_fs *bch2_path_to_fs(const char *path) static char **split_devs(const char *_dev_name, unsigned *nr) { char *dev_name = NULL, **devs = NULL, *s; - size_t i, nr_devs = 0; + size_t i = 0, nr_devs = 0; dev_name = kstrdup(_dev_name, GFP_KERNEL); if (!dev_name) @@ -1610,9 +1610,7 @@ static char **split_devs(const char *_dev_name, unsigned *nr) return NULL; } - for (i = 0, s = dev_name; - s; - (s = strchr(s, ':')) && (*s++ = '\0')) + while ((s = strsep(&dev_name, ":"))) devs[i++] = s; *nr = nr_devs; diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 4a695a8..bb3f443 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -780,6 +780,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, struct btree_iter iter; struct bkey_s_c k; struct bkey_i delete; + struct bpos end = POS(inum.inum, U64_MAX); u32 snapshot; int ret = 0; @@ -788,7 +789,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, * extent iterator: */ bch2_trans_iter_init(trans, &iter, id, POS(inum.inum, 0), - BTREE_ITER_INTENT|BTREE_ITER_NOT_EXTENTS); + BTREE_ITER_INTENT); while (1) { bch2_trans_begin(trans); @@ -799,7 +800,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, bch2_btree_iter_set_snapshot(&iter, snapshot); - k = bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX)); + k = bch2_btree_iter_peek_upto(&iter, end); ret = bkey_err(k); if (ret) goto err; @@ -810,6 +811,11 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, bkey_init(&delete.k); delete.k.p = iter.pos; + if (iter.flags & BTREE_ITER_IS_EXTENTS) + bch2_key_resize(&delete.k, + bpos_min(end, k.k->p).offset - + iter.pos.offset); + ret = bch2_trans_update(trans, &iter, &delete, 0) ?: bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); diff --git a/libbcachefs/journal_sb.c b/libbcachefs/journal_sb.c index cc41bff..3c5a95e 100644 --- a/libbcachefs/journal_sb.c +++ b/libbcachefs/journal_sb.c @@ -21,7 +21,7 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, struct printbuf *err) { struct bch_sb_field_journal *journal = field_to_type(f, journal); - struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx; + struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); int ret = -BCH_ERR_invalid_sb_journal; unsigned nr; unsigned i; @@ -45,15 +45,15 @@ static int bch2_sb_journal_validate(struct bch_sb *sb, goto err; } - if (b[0] < le16_to_cpu(m->first_bucket)) { + if (b[0] < le16_to_cpu(m.first_bucket)) { prt_printf(err, "journal bucket %llu before first bucket %u", - b[0], le16_to_cpu(m->first_bucket)); + b[0], le16_to_cpu(m.first_bucket)); goto err; } - if (b[nr - 1] >= le64_to_cpu(m->nbuckets)) { + if (b[nr - 1] >= le64_to_cpu(m.nbuckets)) { prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)", - b[nr - 1], le64_to_cpu(m->nbuckets)); + b[nr - 1], le64_to_cpu(m.nbuckets)); goto err; } @@ -104,7 +104,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct printbuf *err) { struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2); - struct bch_member *m = bch2_sb_get_members(sb)->members + sb->dev_idx; + struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); int ret = -BCH_ERR_invalid_sb_journal; unsigned nr; unsigned i; @@ -130,15 +130,15 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, goto err; } - if (b[0].start < le16_to_cpu(m->first_bucket)) { + if (b[0].start < le16_to_cpu(m.first_bucket)) { prt_printf(err, "journal bucket %llu before first bucket %u", - b[0].start, le16_to_cpu(m->first_bucket)); + b[0].start, le16_to_cpu(m.first_bucket)); goto err; } - if (b[nr - 1].end > le64_to_cpu(m->nbuckets)) { + if (b[nr - 1].end > le64_to_cpu(m.nbuckets)) { prt_printf(err, "journal bucket %llu past end of device (nbuckets %llu)", - b[nr - 1].end - 1, le64_to_cpu(m->nbuckets)); + b[nr - 1].end - 1, le64_to_cpu(m.nbuckets)); goto err; } diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index 739a2ef..232f50c 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -12,6 +12,11 @@ #define x(t, n, ...) [n] = #t, +const char * const bch2_iops_measurements[] = { + BCH_IOPS_MEASUREMENTS() + NULL +}; + const char * const bch2_error_actions[] = { BCH_ERROR_ACTIONS() NULL diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index c21c258..5501433 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -10,6 +10,7 @@ struct bch_fs; +extern const char * const bch2_iops_measurements[]; extern const char * const bch2_error_actions[]; extern const char * const bch2_fsck_fix_opts[]; extern const char * const bch2_version_upgrade_opts[]; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 1dceb7e..cd037f2 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -374,13 +374,12 @@ static int read_btree_roots(struct bch_fs *c) ret = bch2_btree_root_read(c, i, &r->key, r->level); if (ret) { - __fsck_err(c, - btree_id_is_alloc(i) - ? FSCK_CAN_IGNORE : 0, - "error reading btree root %s", - bch2_btree_ids[i]); + fsck_err(c, + "error reading btree root %s", + bch2_btree_ids[i]); if (btree_id_is_alloc(i)) c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); + ret = 0; } } @@ -645,7 +644,7 @@ int bch2_fs_recovery(struct bch_fs *c) { struct bch_sb_field_clean *clean = NULL; struct jset *last_journal_entry = NULL; - u64 last_seq, blacklist_seq, journal_seq; + u64 last_seq = 0, blacklist_seq, journal_seq; bool write_sb = false; int ret = 0; diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index dbef41c..a9a694f 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -805,7 +805,6 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_sb *sb, struct printbuf *err) { - struct bch_sb_field_members *mi = bch2_sb_get_members(sb); unsigned i, j; sort_cmp_size(cpu_r->entries, @@ -837,7 +836,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, } for (j = 0; j < e->nr_devs; j++) - if (!bch2_dev_exists(sb, mi, e->devs[j])) { + if (!bch2_dev_exists(sb, e->devs[j])) { prt_printf(err, "invalid device %u in entry ", e->devs[j]); bch2_replicas_entry_to_text(err, e); return -BCH_ERR_invalid_sb_replicas; diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index 16a2b33..04bde1a 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -2,172 +2,338 @@ #include "bcachefs.h" #include "disk_groups.h" +#include "opts.h" #include "replicas.h" #include "sb-members.h" #include "super-io.h" -/* Code for bch_sb_field_members: */ +/* Code for bch_sb_field_members_v1: */ -static int bch2_sb_members_validate(struct bch_sb *sb, - struct bch_sb_field *f, - struct printbuf *err) +static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i) { - struct bch_sb_field_members *mi = field_to_type(f, members); - unsigned i; + return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes)); +} - if ((void *) (mi->members + sb->nr_devices) > - vstruct_end(&mi->field)) { - prt_printf(err, "too many devices for section size"); - return -BCH_ERR_invalid_sb_members; - } +struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) +{ + return members_v2_get_mut(bch2_sb_get_members_v2(sb), i); +} - for (i = 0; i < sb->nr_devices; i++) { - struct bch_member *m = mi->members + i; +static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) +{ + struct bch_member ret, *p = members_v2_get_mut(mi, i); + memset(&ret, 0, sizeof(ret)); + memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); + return ret; +} - if (!bch2_member_exists(m)) - continue; +static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) +{ + return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); +} - if (le64_to_cpu(m->nbuckets) > LONG_MAX) { - prt_printf(err, "device %u: too many buckets (got %llu, max %lu)", - i, le64_to_cpu(m->nbuckets), LONG_MAX); - return -BCH_ERR_invalid_sb_members; - } +static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) +{ + struct bch_member ret, *p = members_v1_get_mut(mi, i); + memset(&ret, 0, sizeof(ret)); + memcpy(&ret, p, min_t(size_t, sizeof(struct bch_member), sizeof(ret))); return ret; +} - if (le64_to_cpu(m->nbuckets) - - le16_to_cpu(m->first_bucket) < BCH_MIN_NR_NBUCKETS) { - prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", - i, le64_to_cpu(m->nbuckets), BCH_MIN_NR_NBUCKETS); - return -BCH_ERR_invalid_sb_members; - } +struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) +{ + struct bch_sb_field_members_v2 *mi2 = bch2_sb_get_members_v2(sb); + if (mi2) + return members_v2_get(mi2, i); + struct bch_sb_field_members_v1 *mi1 = bch2_sb_get_members_v1(sb); + return members_v1_get(mi1, i); +} - if (le16_to_cpu(m->bucket_size) < - le16_to_cpu(sb->block_size)) { - prt_printf(err, "device %u: bucket size %u smaller than block size %u", - i, le16_to_cpu(m->bucket_size), le16_to_cpu(sb->block_size)); - return -BCH_ERR_invalid_sb_members; - } +static int sb_members_v2_resize_entries(struct bch_fs *c) +{ + struct bch_sb_field_members_v2 *mi = bch2_sb_get_members_v2(c->disk_sb.sb); + + if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) { + unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) * + c->disk_sb.sb->nr_devices), 8); + + mi = bch2_sb_resize_members_v2(&c->disk_sb, u64s); + if (!mi) + return -BCH_ERR_ENOSPC_sb_members_v2; - if (le16_to_cpu(m->bucket_size) < - BCH_SB_BTREE_NODE_SIZE(sb)) { - prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", - i, le16_to_cpu(m->bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); - return -BCH_ERR_invalid_sb_members; + for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { + void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); + memmove(dst, members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); + memset(dst + le16_to_cpu(mi->member_bytes), + 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); } + mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); } + return 0; +} + +int bch2_members_v2_init(struct bch_fs *c) +{ + struct bch_sb_field_members_v1 *mi1; + struct bch_sb_field_members_v2 *mi2; + + if (!bch2_sb_get_members_v2(c->disk_sb.sb)) { + mi2 = bch2_sb_resize_members_v2(&c->disk_sb, + DIV_ROUND_UP(sizeof(*mi2) + + sizeof(struct bch_member) * c->sb.nr_devices, + sizeof(u64))); + mi1 = bch2_sb_get_members_v1(c->disk_sb.sb); + memcpy(&mi2->_members[0], &mi1->_members[0], + BCH_MEMBER_V1_BYTES * c->sb.nr_devices); + memset(&mi2->pad[0], 0, sizeof(mi2->pad)); + mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES); + } + + return sb_members_v2_resize_entries(c); +} + +int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) +{ + struct bch_sb_field_members_v1 *mi1; + struct bch_sb_field_members_v2 *mi2; + + mi1 = bch2_sb_resize_members_v1(disk_sb, + DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * + disk_sb->sb->nr_devices, sizeof(u64))); + if (!mi1) + return -BCH_ERR_ENOSPC_sb_members; + + mi2 = bch2_sb_get_members_v2(disk_sb->sb); + + for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) + memcpy(members_v1_get_mut(mi1, i), members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); return 0; } -static void bch2_sb_members_to_text(struct printbuf *out, struct bch_sb *sb, - struct bch_sb_field *f) +static int validate_member(struct printbuf *err, + struct bch_member m, + struct bch_sb *sb, + int i) { - struct bch_sb_field_members *mi = field_to_type(f, members); - struct bch_sb_field_disk_groups *gi = bch2_sb_get_disk_groups(sb); - unsigned i; + if (le64_to_cpu(m.nbuckets) > LONG_MAX) { + prt_printf(err, "device %u: too many buckets (got %llu, max %lu)", + i, le64_to_cpu(m.nbuckets), LONG_MAX); + return -BCH_ERR_invalid_sb_members; + } - for (i = 0; i < sb->nr_devices; i++) { - struct bch_member *m = mi->members + i; - unsigned data_have = bch2_sb_dev_has_data(sb, i); - u64 bucket_size = le16_to_cpu(m->bucket_size); - u64 device_size = le64_to_cpu(m->nbuckets) * bucket_size; + if (le64_to_cpu(m.nbuckets) - + le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) { + prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", + i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS); + return -BCH_ERR_invalid_sb_members; + } - if (!bch2_member_exists(m)) - continue; + if (le16_to_cpu(m.bucket_size) < + le16_to_cpu(sb->block_size)) { + prt_printf(err, "device %u: bucket size %u smaller than block size %u", + i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size)); + return -BCH_ERR_invalid_sb_members; + } - prt_printf(out, "Device:"); - prt_tab(out); - prt_printf(out, "%u", i); - prt_newline(out); + if (le16_to_cpu(m.bucket_size) < + BCH_SB_BTREE_NODE_SIZE(sb)) { + prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", + i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); + return -BCH_ERR_invalid_sb_members; + } - printbuf_indent_add(out, 2); + return 0; +} - prt_printf(out, "UUID:"); - prt_tab(out); - pr_uuid(out, m->uuid.b); - prt_newline(out); +static void member_to_text(struct printbuf *out, + struct bch_member m, + struct bch_sb_field_disk_groups *gi, + struct bch_sb *sb, + int i) +{ + unsigned data_have = bch2_sb_dev_has_data(sb, i); + u64 bucket_size = le16_to_cpu(m.bucket_size); + u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; - prt_printf(out, "Size:"); - prt_tab(out); - prt_units_u64(out, device_size << 9); - prt_newline(out); - prt_printf(out, "Bucket size:"); - prt_tab(out); - prt_units_u64(out, bucket_size << 9); - prt_newline(out); + prt_printf(out, "Device:"); + prt_tab(out); + prt_printf(out, "%u", i); + prt_newline(out); - prt_printf(out, "First bucket:"); - prt_tab(out); - prt_printf(out, "%u", le16_to_cpu(m->first_bucket)); - prt_newline(out); + printbuf_indent_add(out, 2); - prt_printf(out, "Buckets:"); - prt_tab(out); - prt_printf(out, "%llu", le64_to_cpu(m->nbuckets)); - prt_newline(out); + prt_printf(out, "UUID:"); + prt_tab(out); + pr_uuid(out, m.uuid.b); + prt_newline(out); - prt_printf(out, "Last mount:"); - prt_tab(out); - if (m->last_mount) - pr_time(out, le64_to_cpu(m->last_mount)); - else - prt_printf(out, "(never)"); - prt_newline(out); + prt_printf(out, "Size:"); + prt_tab(out); + prt_units_u64(out, device_size << 9); + prt_newline(out); - prt_printf(out, "State:"); + for (unsigned i = 0; i < BCH_IOPS_NR; i++) { + prt_printf(out, "%s iops:", bch2_iops_measurements[i]); prt_tab(out); - prt_printf(out, "%s", - BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR - ? bch2_member_states[BCH_MEMBER_STATE(m)] - : "unknown"); + prt_printf(out, "%u", le32_to_cpu(m.iops[i])); prt_newline(out); + } - prt_printf(out, "Label:"); - prt_tab(out); - if (BCH_MEMBER_GROUP(m)) { - unsigned idx = BCH_MEMBER_GROUP(m) - 1; - - if (idx < disk_groups_nr(gi)) - prt_printf(out, "%s (%u)", - gi->entries[idx].label, idx); - else - prt_printf(out, "(bad disk labels section)"); - } else { - prt_printf(out, "(none)"); - } - prt_newline(out); + prt_printf(out, "Bucket size:"); + prt_tab(out); + prt_units_u64(out, bucket_size << 9); + prt_newline(out); - prt_printf(out, "Data allowed:"); - prt_tab(out); - if (BCH_MEMBER_DATA_ALLOWED(m)) - prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(m)); - else - prt_printf(out, "(none)"); - prt_newline(out); + prt_printf(out, "First bucket:"); + prt_tab(out); + prt_printf(out, "%u", le16_to_cpu(m.first_bucket)); + prt_newline(out); - prt_printf(out, "Has data:"); - prt_tab(out); - if (data_have) - prt_bitflags(out, bch2_data_types, data_have); + prt_printf(out, "Buckets:"); + prt_tab(out); + prt_printf(out, "%llu", le64_to_cpu(m.nbuckets)); + prt_newline(out); + + prt_printf(out, "Last mount:"); + prt_tab(out); + if (m.last_mount) + pr_time(out, le64_to_cpu(m.last_mount)); + else + prt_printf(out, "(never)"); + prt_newline(out); + + prt_printf(out, "State:"); + prt_tab(out); + prt_printf(out, "%s", + BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR + ? bch2_member_states[BCH_MEMBER_STATE(&m)] + : "unknown"); + prt_newline(out); + + prt_printf(out, "Label:"); + prt_tab(out); + if (BCH_MEMBER_GROUP(&m)) { + unsigned idx = BCH_MEMBER_GROUP(&m) - 1; + + if (idx < disk_groups_nr(gi)) + prt_printf(out, "%s (%u)", + gi->entries[idx].label, idx); else - prt_printf(out, "(none)"); - prt_newline(out); + prt_printf(out, "(bad disk labels section)"); + } else { + prt_printf(out, "(none)"); + } + prt_newline(out); - prt_printf(out, "Discard:"); - prt_tab(out); - prt_printf(out, "%llu", BCH_MEMBER_DISCARD(m)); - prt_newline(out); + prt_printf(out, "Data allowed:"); + prt_tab(out); + if (BCH_MEMBER_DATA_ALLOWED(&m)) + prt_bitflags(out, bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); + else + prt_printf(out, "(none)"); + prt_newline(out); - prt_printf(out, "Freespace initialized:"); - prt_tab(out); - prt_printf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(m)); - prt_newline(out); + prt_printf(out, "Has data:"); + prt_tab(out); + if (data_have) + prt_bitflags(out, bch2_data_types, data_have); + else + prt_printf(out, "(none)"); + prt_newline(out); + + prt_printf(out, "Discard:"); + prt_tab(out); + prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m)); + prt_newline(out); + + prt_printf(out, "Freespace initialized:"); + prt_tab(out); + prt_printf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); + prt_newline(out); + + printbuf_indent_sub(out, 2); +} + +static int bch2_sb_members_v1_validate(struct bch_sb *sb, + struct bch_sb_field *f, + struct printbuf *err) +{ + struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); + unsigned i; - printbuf_indent_sub(out, 2); + if ((void *) members_v1_get_mut(mi, sb->nr_devices) > + vstruct_end(&mi->field)) { + prt_printf(err, "too many devices for section size"); + return -BCH_ERR_invalid_sb_members; } + + for (i = 0; i < sb->nr_devices; i++) { + struct bch_member m = members_v1_get(mi, i); + + int ret = validate_member(err, m, sb, i); + if (ret) + return ret; + } + + return 0; +} + +static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); + struct bch_sb_field_disk_groups *gi = bch2_sb_get_disk_groups(sb); + unsigned i; + + for (i = 0; i < sb->nr_devices; i++) { + struct bch_member m = members_v1_get(mi, i); + member_to_text(out, m, gi, sb, i); + } +} + +const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { + .validate = bch2_sb_members_v1_validate, + .to_text = bch2_sb_members_v1_to_text, +}; + +static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); + struct bch_sb_field_disk_groups *gi = bch2_sb_get_disk_groups(sb); + unsigned i; + + for (i = 0; i < sb->nr_devices; i++) { + struct bch_member m = members_v2_get(mi, i); + member_to_text(out, m, gi, sb, i); + } +} + +static int bch2_sb_members_v2_validate(struct bch_sb *sb, + struct bch_sb_field *f, + struct printbuf *err) +{ + struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); + size_t mi_bytes = (void *) members_v2_get_mut(mi, sb->nr_devices) - + (void *) mi; + + if (mi_bytes > vstruct_bytes(&mi->field)) { + prt_printf(err, "section too small (%zu > %zu)", + mi_bytes, vstruct_bytes(&mi->field)); + return -BCH_ERR_invalid_sb_members; + } + + for (unsigned i = 0; i < sb->nr_devices; i++) { + int ret = validate_member(err, members_v2_get(mi, i), sb, i); + if (ret) + return ret; + } + + return 0; } -const struct bch_sb_field_ops bch_sb_field_ops_members = { - .validate = bch2_sb_members_validate, - .to_text = bch2_sb_members_to_text, +const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { + .validate = bch2_sb_members_v2_validate, + .to_text = bch2_sb_members_v2_to_text, }; diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h index 34e1cf6..430f345 100644 --- a/libbcachefs/sb-members.h +++ b/libbcachefs/sb-members.h @@ -2,6 +2,11 @@ #ifndef _BCACHEFS_SB_MEMBERS_H #define _BCACHEFS_SB_MEMBERS_H +int bch2_members_v2_init(struct bch_fs *c); +int bch_members_cpy_v2_v1(struct bch_sb_handle *disk_sb); +struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i); +struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i); + static inline bool bch2_dev_is_online(struct bch_dev *ca) { return !percpu_ref_is_zero(&ca->io_ref); @@ -171,6 +176,7 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c) return devs; } -extern const struct bch_sb_field_ops bch_sb_field_ops_members; +extern const struct bch_sb_field_ops bch_sb_field_ops_members_v1; +extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2; #endif /* _BCACHEFS_SB_MEMBERS_H */ diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index c9bf342..4498e24 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -355,7 +355,7 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, { struct bch_sb *sb = disk_sb->sb; struct bch_sb_field *f; - struct bch_sb_field_members *mi; + struct bch_sb_field_members_v1 *mi; enum bch_opt_id opt_id; u16 block_size; int ret; @@ -458,7 +458,7 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, } /* members must be validated first: */ - mi = bch2_sb_get_members(sb); + mi = bch2_sb_get_members_v1(sb); if (!mi) { prt_printf(out, "Invalid superblock: member info area missing"); return -BCH_ERR_invalid_sb_members_missing; @@ -469,7 +469,7 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, return ret; vstruct_for_each(sb, f) { - if (le32_to_cpu(f->type) == BCH_SB_FIELD_members) + if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1) continue; ret = bch2_sb_field_validate(sb, f, out); @@ -485,7 +485,6 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, static void bch2_sb_update(struct bch_fs *c) { struct bch_sb *src = c->disk_sb.sb; - struct bch_sb_field_members *mi = bch2_sb_get_members(src); struct bch_dev *ca; unsigned i; @@ -511,8 +510,10 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.features = le64_to_cpu(src->features[0]); c->sb.compat = le64_to_cpu(src->compat[0]); - for_each_member_device(ca, c, i) - ca->mi = bch2_mi_to_cpu(mi->members + i); + for_each_member_device(ca, c, i) { + struct bch_member m = bch2_sb_member_get(src, i); + ca->mi = bch2_mi_to_cpu(&m); + } } static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) @@ -891,6 +892,7 @@ int bch2_write_super(struct bch_fs *c) SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); bch2_sb_counters_from_cpu(c); + bch_members_cpy_v2_v1(&c->disk_sb); for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); @@ -1125,7 +1127,6 @@ void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, bool print_layout, unsigned fields) { - struct bch_sb_field_members *mi; struct bch_sb_field *f; u64 fields_have = 0; unsigned nr_devices = 0; @@ -1133,15 +1134,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, if (!out->nr_tabstops) printbuf_tabstop_push(out, 44); - mi = bch2_sb_get_members(sb); - if (mi) { - struct bch_member *m; - - for (m = mi->members; - m < mi->members + sb->nr_devices; - m++) - nr_devices += bch2_member_exists(m); - } + for (int i = 0; i < sb->nr_devices; i++) + nr_devices += bch2_dev_exists(sb, i); prt_printf(out, "External UUID:"); prt_tab(out); diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index d51c0a1..f7e9099 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -6,6 +6,7 @@ #include "eytzinger.h" #include "super_types.h" #include "super.h" +#include "sb-members.h" #include @@ -89,7 +90,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) __bch2_check_set_feature(c, feat); } -/* BCH_SB_FIELD_members: */ +/* BCH_SB_FIELD_members_v1: */ static inline bool bch2_member_exists(struct bch_member *m) { @@ -97,11 +98,13 @@ static inline bool bch2_member_exists(struct bch_member *m) } static inline bool bch2_dev_exists(struct bch_sb *sb, - struct bch_sb_field_members *mi, unsigned dev) { - return dev < sb->nr_devices && - bch2_member_exists(&mi->members[dev]); + if (dev < sb->nr_devices) { + struct bch_member m = bch2_sb_member_get(sb, dev); + return bch2_member_exists(&m); + } + return false; } static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 1347270..2014f78 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -49,6 +49,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "sb-members.h" #include "snapshot.h" #include "subvolume.h" #include "super.h" @@ -399,6 +400,10 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch_info(c, "going read-write"); + ret = bch2_members_v2_init(c); + if (ret) + goto err; + ret = bch2_fs_mark_dirty(c); if (ret) goto err; @@ -662,7 +667,6 @@ err: static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) { - struct bch_sb_field_members *mi; struct bch_fs *c; struct printbuf name = PRINTBUF; unsigned i, iter_size; @@ -858,9 +862,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; - mi = bch2_sb_get_members(c->disk_sb.sb); for (i = 0; i < c->sb.nr_devices; i++) - if (bch2_dev_exists(c->disk_sb.sb, mi, i) && + if (bch2_dev_exists(c->disk_sb.sb, i) && bch2_dev_alloc(c, i)) { ret = -EEXIST; goto err; @@ -925,7 +928,6 @@ static void print_mount_opts(struct bch_fs *c) int bch2_fs_start(struct bch_fs *c) { - struct bch_sb_field_members *mi; struct bch_dev *ca; time64_t now = ktime_get_real_seconds(); unsigned i; @@ -939,12 +941,17 @@ int bch2_fs_start(struct bch_fs *c) mutex_lock(&c->sb_lock); + ret = bch2_members_v2_init(c); + if (ret) { + mutex_unlock(&c->sb_lock); + goto err; + } + for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); - mi = bch2_sb_get_members(c->disk_sb.sb); for_each_online_member(ca, c, i) - mi->members[ca->dev_idx].last_mount = cpu_to_le64(now); + bch2_members_v2_get_mut(c->disk_sb.sb, i)->last_mount = cpu_to_le64(now); mutex_unlock(&c->sb_lock); @@ -997,16 +1004,12 @@ err: static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c) { - struct bch_sb_field_members *sb_mi; - - sb_mi = bch2_sb_get_members(sb); - if (!sb_mi) - return -BCH_ERR_member_info_missing; + struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); if (le16_to_cpu(sb->block_size) != block_sectors(c)) return -BCH_ERR_mismatched_block_size; - if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) < + if (le16_to_cpu(m.bucket_size) < BCH_SB_BTREE_NODE_SIZE(c->disk_sb.sb)) return -BCH_ERR_bucket_size_too_small; @@ -1017,12 +1020,11 @@ static int bch2_dev_in_fs(struct bch_sb *fs, struct bch_sb *sb) { struct bch_sb *newest = le64_to_cpu(fs->seq) > le64_to_cpu(sb->seq) ? fs : sb; - struct bch_sb_field_members *mi = bch2_sb_get_members(newest); if (!uuid_equal(&fs->uuid, &sb->uuid)) return -BCH_ERR_device_not_a_member_of_filesystem; - if (!bch2_dev_exists(newest, mi, sb->dev_idx)) + if (!bch2_dev_exists(newest, sb->dev_idx)) return -BCH_ERR_device_has_been_removed; if (fs->block_size != sb->block_size) @@ -1192,15 +1194,14 @@ static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca, static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) { - struct bch_member *member = - bch2_sb_get_members(c->disk_sb.sb)->members + dev_idx; + struct bch_member member = bch2_sb_member_get(c->disk_sb.sb, dev_idx); struct bch_dev *ca = NULL; int ret = 0; if (bch2_fs_init_fault("dev_alloc")) goto err; - ca = __bch2_dev_alloc(c, member); + ca = __bch2_dev_alloc(c, &member); if (!ca) goto err; @@ -1335,7 +1336,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, static bool bch2_fs_may_start(struct bch_fs *c) { - struct bch_sb_field_members *mi; struct bch_dev *ca; unsigned i, flags = 0; @@ -1348,10 +1348,9 @@ static bool bch2_fs_may_start(struct bch_fs *c) if (!c->opts.degraded && !c->opts.very_degraded) { mutex_lock(&c->sb_lock); - mi = bch2_sb_get_members(c->disk_sb.sb); for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { - if (!bch2_dev_exists(c->disk_sb.sb, mi, i)) + if (!bch2_dev_exists(c->disk_sb.sb, i)) continue; ca = bch_dev_locked(c, i); @@ -1391,7 +1390,7 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { - struct bch_sb_field_members *mi; + struct bch_member *m; int ret = 0; if (ca->mi.state == new_state) @@ -1406,8 +1405,8 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, bch_notice(ca, "%s", bch2_member_states[new_state]); mutex_lock(&c->sb_lock); - mi = bch2_sb_get_members(c->disk_sb.sb); - SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state); + m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_STATE(m, new_state); bch2_write_super(c); mutex_unlock(&c->sb_lock); @@ -1463,7 +1462,7 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { - struct bch_sb_field_members *mi; + struct bch_member *m; unsigned dev_idx = ca->dev_idx, data; int ret; @@ -1551,8 +1550,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) * this device must be gone: */ mutex_lock(&c->sb_lock); - mi = bch2_sb_get_members(c->disk_sb.sb); - memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid)); + m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); + memset(&m->uuid, 0, sizeof(m->uuid)); bch2_write_super(c); @@ -1575,7 +1574,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) struct bch_opts opts = bch2_opts_empty(); struct bch_sb_handle sb; struct bch_dev *ca = NULL; - struct bch_sb_field_members *mi; + struct bch_sb_field_members_v2 *mi; struct bch_member dev_mi; unsigned dev_idx, nr_devices, u64s; struct printbuf errbuf = PRINTBUF; @@ -1588,7 +1587,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err; } - dev_mi = bch2_sb_get_members(sb.sb)->members[sb.sb->dev_idx]; + dev_mi = bch2_sb_member_get(sb.sb, sb.sb->dev_idx); if (BCH_MEMBER_GROUP(&dev_mi)) { bch2_disk_path_to_text(&label, sb.sb, BCH_MEMBER_GROUP(&dev_mi) - 1); @@ -1631,9 +1630,9 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_unlock; } - mi = bch2_sb_get_members(ca->disk_sb.sb); + mi = bch2_sb_get_members_v2(ca->disk_sb.sb); - if (!bch2_sb_resize_members(&ca->disk_sb, + if (!bch2_sb_resize_members_v2(&ca->disk_sb, le32_to_cpu(mi->field.u64s) + sizeof(dev_mi) / sizeof(u64))) { ret = -BCH_ERR_ENOSPC_sb_members; @@ -1644,9 +1643,8 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (dynamic_fault("bcachefs:add:no_slot")) goto no_slot; - mi = bch2_sb_get_members(c->disk_sb.sb); for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) - if (!bch2_dev_exists(c->disk_sb.sb, mi, dev_idx)) + if (!bch2_dev_exists(c->disk_sb.sb, dev_idx)) goto have_slot; no_slot: ret = -BCH_ERR_ENOSPC_sb_members; @@ -1655,20 +1653,21 @@ no_slot: have_slot: nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); - u64s = (sizeof(struct bch_sb_field_members) + - sizeof(struct bch_member) * nr_devices) / sizeof(u64); + u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) + + le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64)); - mi = bch2_sb_resize_members(&c->disk_sb, u64s); + mi = bch2_sb_resize_members_v2(&c->disk_sb, u64s); if (!mi) { ret = -BCH_ERR_ENOSPC_sb_members; bch_err_msg(c, ret, "setting up new superblock"); goto err_unlock; } + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); /* success: */ - mi->members[dev_idx] = dev_mi; - mi->members[dev_idx].last_mount = cpu_to_le64(ktime_get_real_seconds()); + *m = dev_mi; + m->last_mount = cpu_to_le64(ktime_get_real_seconds()); c->disk_sb.sb->nr_devices = nr_devices; ca->disk_sb.sb->dev_idx = dev_idx; @@ -1728,7 +1727,6 @@ int bch2_dev_online(struct bch_fs *c, const char *path) { struct bch_opts opts = bch2_opts_empty(); struct bch_sb_handle sb = { NULL }; - struct bch_sb_field_members *mi; struct bch_dev *ca; unsigned dev_idx; int ret; @@ -1765,9 +1763,9 @@ int bch2_dev_online(struct bch_fs *c, const char *path) __bch2_dev_read_write(c, ca); mutex_lock(&c->sb_lock); - mi = bch2_sb_get_members(c->disk_sb.sb); + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - mi->members[ca->dev_idx].last_mount = + m->last_mount = cpu_to_le64(ktime_get_real_seconds()); bch2_write_super(c); @@ -1809,10 +1807,12 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { - struct bch_member *mi; + struct bch_member *m; + u64 old_nbuckets; int ret = 0; down_write(&c->state_lock); + old_nbuckets = ca->mi.nbuckets; if (nbuckets < ca->mi.nbuckets) { bch_err(ca, "Cannot shrink yet"); @@ -1839,12 +1839,24 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) goto err; mutex_lock(&c->sb_lock); - mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; - mi->nbuckets = cpu_to_le64(nbuckets); + m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + m->nbuckets = cpu_to_le64(nbuckets); bch2_write_super(c); mutex_unlock(&c->sb_lock); + if (ca->mi.freespace_initialized) { + ret = bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets); + if (ret) + goto err; + + /* + * XXX: this is all wrong transactionally - we'll be able to do + * this correctly after the disk space accounting rewrite + */ + ca->usage_base->d[BCH_DATA_free].buckets += nbuckets - old_nbuckets; + } + bch2_recalc_capacity(c); err: up_write(&c->state_lock); @@ -1875,7 +1887,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, { struct bch_sb_handle *sb = NULL; struct bch_fs *c = NULL; - struct bch_sb_field_members *mi; unsigned i, best_sb = 0; struct printbuf errbuf = PRINTBUF; int ret = 0; @@ -1906,12 +1917,10 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, le64_to_cpu(sb[best_sb].sb->seq)) best_sb = i; - mi = bch2_sb_get_members(sb[best_sb].sb); - i = 0; while (i < nr_devices) { if (i != best_sb && - !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) { + !bch2_dev_exists(sb[best_sb].sb, sb[i].sb->dev_idx)) { pr_info("%pg has been removed, skipping", sb[i].bdev); bch2_free_super(&sb[i]); array_remove_item(sb, nr_devices, i); diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 1abc61c..9a85810 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -962,7 +962,7 @@ STORE(bch2_dev) bool v = strtoul_or_return(buf); mutex_lock(&c->sb_lock); - mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; + mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); if (v != BCH_MEMBER_DISCARD(mi)) { SET_BCH_MEMBER_DISCARD(mi, v); @@ -975,7 +975,7 @@ STORE(bch2_dev) u64 v = strtoul_or_return(buf); mutex_lock(&c->sb_lock); - mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx]; + mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); if (v + 1 != BCH_MEMBER_DURABILITY(mi)) { SET_BCH_MEMBER_DURABILITY(mi, v + 1); -- 2.39.2