From: Jonathan Carter Date: Mon, 26 Oct 2020 07:07:13 +0000 (+0200) Subject: New upstream snapshot X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;ds=sidebyside;h=04159ffa875ec3088d00408253db95669470b64c;p=bcachefs-tools-debian New upstream snapshot --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb1f1ee --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +/result +bcachefs +.* +*.o +*.d +*.a +tags +TAGS +cscope* +bcachefs-tools +tests/test_helper +tests/__pycache__/ + +# dot-files that we don't want to ignore +!.gitignore +!.travis.yml + +mount/target +mount.bcachefs diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..3b90b73 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,36 @@ +os: linux +dist: bionic +language: c +arch: + - amd64 +# - arm64 + +addons: + apt: + packages: + - valgrind + - python3-pytest + - python3-pytest-xdist + - meson + - ninja-build + - pkg-config + - libaio-dev + - libblkid-dev + - libkeyutils-dev + - liblz4-dev + - libscrypt-dev + - libsodium-dev + - liburcu-dev + - libzstd-dev + - libudev-dev + - uuid-dev + - zlib1g-dev + +before_install: + - wget https://github.com/libfuse/libfuse/archive/fuse-3.7.0.tar.gz -O /tmp/fuse.tar.gz + - tar -C /tmp -zxvf /tmp/fuse.tar.gz + - mkdir /tmp/libfuse-fuse-3.7.0/build + - pushd /tmp/libfuse-fuse-3.7.0/build && meson .. && ninja && sudo ninja install && popd + - sudo ldconfig + +script: ./smoke_test diff --git a/cmd_fsck.c b/cmd_fsck.c index 5756ee7..9ef69ad 100644 --- a/cmd_fsck.c +++ b/cmd_fsck.c @@ -55,7 +55,7 @@ int cmd_fsck(int argc, char *argv[]) /* force check, even if filesystem marked clean: */ break; case 'o': - ret = bch2_parse_mount_opts(&opts, optarg); + ret = bch2_parse_mount_opts(NULL, &opts, optarg); if (ret) return ret; break; diff --git a/debian/changelog b/debian/changelog index 4945653..684da48 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +bcachefs-tools (0.1+git20201025.742dbbdb-1) unstable; urgency=medium + + * New upstream snapshot + + -- Jonathan Carter Mon, 26 Oct 2020 08:45:37 +0200 + bcachefs-tools (0.1+git20201017.8a4408-1~exp1) unstable; urgency=medium * New upstream release diff --git a/debian/files b/debian/files index 11087b6..ba38766 100644 --- a/debian/files +++ b/debian/files @@ -1 +1 @@ -bcachefs-tools_0.1+git20201017.8a4408-1~exp1_source.buildinfo utils optional +bcachefs-tools_0.1+git20201025.742dbbdb-1_source.buildinfo utils optional diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 54096e8..97508de 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -544,8 +544,6 @@ out: * commands to the newly free buckets, then puts them on the various freelists. */ -#define BUCKET_GC_GEN_MAX 96U - /** * wait_buckets_available - wait on reclaimable buckets * diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 870714f..cbaff56 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -13,6 +13,9 @@ struct bkey_alloc_unpacked { #undef x }; +/* How out of date a pointer gen is allowed to be: */ +#define BUCKET_GC_GEN_MAX 96U + /* returns true if not equal */ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l, struct bkey_alloc_unpacked r) diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index d5a2230..2926c64 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -340,7 +340,8 @@ static inline void bkey_init(struct bkey *k) x(reflink_p, 15) \ x(reflink_v, 16) \ x(inline_data, 17) \ - x(btree_ptr_v2, 18) + x(btree_ptr_v2, 18) \ + x(indirect_inline_data, 19) enum bch_bkey_type { #define x(name, nr) KEY_TYPE_##name = nr, @@ -886,6 +887,12 @@ struct bch_reflink_v { __u64 _data[0]; }; +struct bch_indirect_inline_data { + struct bch_val v; + __le64 refcount; + u8 data[0]; +}; + /* Inline data */ struct bch_inline_data { @@ -1032,7 +1039,8 @@ LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); x(journal, 2) \ x(btree, 3) \ x(user, 4) \ - x(cached, 5) + x(cached, 5) \ + x(parity, 6) enum bch_data_type { #define x(t, n) BCH_DATA_##t, @@ -1321,7 +1329,8 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); x(incompressible, 10) \ x(btree_ptr_v2, 11) \ x(extents_above_btree_updates, 12) \ - x(btree_updates_journalled, 13) + x(btree_updates_journalled, 13) \ + x(reflink_inline_data, 14) #define BCH_SB_FEATURES_ALL \ ((1ULL << BCH_FEATURE_new_siphash)| \ diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index cbcfbd2..80ea488 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -565,6 +565,7 @@ BKEY_VAL_ACCESSORS(reflink_p); BKEY_VAL_ACCESSORS(reflink_v); BKEY_VAL_ACCESSORS(inline_data); BKEY_VAL_ACCESSORS(btree_ptr_v2); +BKEY_VAL_ACCESSORS(indirect_inline_data); /* byte order helpers */ diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index 36e0c51..3284922 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -72,7 +72,11 @@ static const char *key_type_inline_data_invalid(const struct bch_fs *c, static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - pr_buf(out, "(%zu bytes)", bkey_val_bytes(k.k)); + struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k); + unsigned datalen = bkey_inline_data_bytes(k.k); + + pr_buf(out, "datalen %u: %*phN", + datalen, min(datalen, 32U), d.v->data); } #define bch2_bkey_ops_inline_data (struct bkey_ops) { \ diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 2f50972..682f599 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -750,7 +750,9 @@ static int validate_bset(struct bch_fs *c, struct btree *b, btree_err_on(bkey_cmp(bn->max_key, b->key.k.p), BTREE_ERR_MUST_RETRY, c, b, i, - "incorrect max key"); + "incorrect max key %llu:%llu", + bn->max_key.inode, + bn->max_key.offset); if (write) compat_btree_node(b->c.level, b->c.btree_id, version, @@ -930,7 +932,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), BTREE_ERR_WANT_RETRY, c, b, i, - "unknown checksum type"); + "unknown checksum type %llu", + BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); @@ -957,7 +960,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), BTREE_ERR_WANT_RETRY, c, b, i, - "unknown checksum type"); + "unknown checksum type %llu", + BSET_CSUM_TYPE(i)); nonce = btree_nonce(i, b->written << 9); csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index c1717b7..cc01bae 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -591,6 +591,7 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter) #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ ((1U << BKEY_TYPE_EXTENTS)| \ (1U << BKEY_TYPE_INODES)| \ + (1U << BKEY_TYPE_EC)| \ (1U << BKEY_TYPE_REFLINK)) enum btree_trigger_flags { diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index cd699c2..49995cd 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -337,8 +337,9 @@ static inline bool iter_has_trans_triggers(struct btree_iter *iter) static inline bool iter_has_nontrans_triggers(struct btree_iter *iter) { - return (BTREE_NODE_TYPE_HAS_TRIGGERS & - ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS) & + return (((BTREE_NODE_TYPE_HAS_TRIGGERS & + ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) | + (1U << BTREE_ID_EC)) & (1U << iter->btree_id); } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index c3fc3ab..82f1cc4 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -77,6 +77,26 @@ #include #include +static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage, + enum bch_data_type data_type, + s64 sectors) +{ + switch (data_type) { + case BCH_DATA_btree: + fs_usage->btree += sectors; + break; + case BCH_DATA_user: + case BCH_DATA_parity: + fs_usage->data += sectors; + break; + case BCH_DATA_cached: + fs_usage->cached += sectors; + break; + default: + break; + } +} + /* * Clear journal_seq_valid for buckets for which it's not needed, to prevent * wraparound: @@ -132,17 +152,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c) struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); - switch (e->data_type) { - case BCH_DATA_btree: - usage->btree += usage->replicas[i]; - break; - case BCH_DATA_user: - usage->data += usage->replicas[i]; - break; - case BCH_DATA_cached: - usage->cached += usage->replicas[i]; - break; - } + fs_usage_data_type_to_base(usage, e->data_type, usage->replicas[i]); } percpu_up_write(&c->mark_lock); @@ -376,9 +386,14 @@ static inline int is_fragmented_bucket(struct bucket_mark m, return 0; } +static inline int is_stripe_data_bucket(struct bucket_mark m) +{ + return m.stripe && m.data_type != BCH_DATA_parity; +} + static inline int bucket_stripe_sectors(struct bucket_mark m) { - return m.stripe ? m.dirty_sectors : 0; + return is_stripe_data_bucket(m) ? m.dirty_sectors : 0; } static inline enum bch_data_type bucket_type(struct bucket_mark m) @@ -412,8 +427,8 @@ int bch2_fs_usage_apply(struct bch_fs *c, */ should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0); if (WARN_ONCE(should_not_have_added > 0, - "disk usage increased by %lli without a reservation", - should_not_have_added)) { + "disk usage increased by %lli more than reservation of %llu", + added, disk_res ? disk_res->sectors : 0)) { atomic64_sub(should_not_have_added, &c->sectors_available); added -= should_not_have_added; ret = -1; @@ -522,17 +537,7 @@ static inline int update_replicas(struct bch_fs *c, if (!fs_usage) return 0; - switch (r->data_type) { - case BCH_DATA_btree: - fs_usage->btree += sectors; - break; - case BCH_DATA_user: - fs_usage->data += sectors; - break; - case BCH_DATA_cached: - fs_usage->cached += sectors; - break; - } + fs_usage_data_type_to_base(fs_usage, r->data_type, sectors); fs_usage->replicas[idx] += sectors; return 0; } @@ -884,124 +889,155 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p, p.crc.uncompressed_size); } -static void bucket_set_stripe(struct bch_fs *c, - const struct bch_extent_ptr *ptr, - struct bch_fs_usage *fs_usage, - u64 journal_seq, - unsigned flags, - bool enabled) +static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 bucket_data_type, + u16 dirty_sectors, u16 cached_sectors) { - bool gc = flags & BTREE_TRIGGER_GC; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = PTR_BUCKET(ca, ptr, gc); - struct bucket_mark new, old; - - old = bucket_cmpxchg(g, new, ({ - new.stripe = enabled; - if (journal_seq) { - new.journal_seq_valid = 1; - new.journal_seq = journal_seq; - } - })); - - bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); - - /* - * XXX write repair code for these, flag stripe as possibly bad - */ - if (old.gen != ptr->gen) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "stripe with stale pointer"); -#if 0 - /* - * We'd like to check for these, but these checks don't work - * yet: - */ - if (old.stripe && enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "multiple stripes using same bucket"); - - if (!old.stripe && !enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "deleting stripe but bucket not marked as stripe bucket"); -#endif -} - -static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, - struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type ptr_data_type, - u8 bucket_gen, u8 *bucket_data_type, - u16 *dirty_sectors, u16 *cached_sectors) -{ - u16 *dst_sectors = !p.ptr.cached + size_t bucket_nr = PTR_BUCKET_NR(bch_dev_bkey_exists(c, ptr->dev), ptr); + u16 bucket_sectors = !ptr->cached ? dirty_sectors : cached_sectors; - u16 orig_sectors = *dst_sectors; char buf[200]; - if (gen_after(p.ptr.gen, bucket_gen)) { + if (gen_after(ptr->gen, bucket_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (gen_cmp(bucket_gen, p.ptr.gen) > 96U) { + if (gen_cmp(bucket_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (bucket_gen != p.ptr.gen && !p.ptr.cached) { + if (bucket_gen != ptr->gen && !ptr->cached) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (bucket_gen != p.ptr.gen) + if (bucket_gen != ptr->gen) return 1; - if (*bucket_data_type && *bucket_data_type != ptr_data_type) { + if (bucket_data_type && ptr_data_type && + bucket_data_type != ptr_data_type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type], + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type], bch2_data_types[ptr_data_type], (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (checked_add(*dst_sectors, sectors)) { + if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - orig_sectors, sectors, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + bucket_sectors, sectors, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } + return 0; +} + +static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, + unsigned ptr_idx, + struct bch_fs_usage *fs_usage, + u64 journal_seq, unsigned flags, + bool enabled) +{ + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned nr_data = s->nr_blocks - s->nr_redundant; + bool parity = ptr_idx >= nr_data; + const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; + bool gc = flags & BTREE_TRIGGER_GC; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bucket *g = PTR_BUCKET(ca, ptr, gc); + struct bucket_mark new, old; + char buf[200]; + int ret; + + if (enabled) + g->ec_redundancy = s->nr_redundant; + + old = bucket_cmpxchg(g, new, ({ + ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type, + new.dirty_sectors, new.cached_sectors); + if (ret) + return ret; + + if (new.stripe && enabled) + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + + if (!new.stripe && !enabled) + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u: deleting stripe but not marked\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + + new.stripe = enabled; + + if ((flags & BTREE_TRIGGER_GC) && parity) { + new.data_type = enabled ? BCH_DATA_parity : 0; + new.dirty_sectors = enabled ? le16_to_cpu(s->sectors): 0; + } + + if (journal_seq) { + new.journal_seq_valid = 1; + new.journal_seq = journal_seq; + } + })); + + if (!enabled) + g->ec_redundancy = 0; + + bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); + return 0; +} + +static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 *bucket_data_type, + u16 *dirty_sectors, u16 *cached_sectors) +{ + u16 *dst_sectors = !ptr->cached + ? dirty_sectors + : cached_sectors; + int ret = check_bucket_ref(c, k, ptr, sectors, ptr_data_type, + bucket_gen, *bucket_data_type, + *dirty_sectors, *cached_sectors); + + if (ret) + return ret; + + *dst_sectors += sectors; *bucket_data_type = *dirty_sectors || *cached_sectors ? ptr_data_type : 0; return 0; @@ -1026,7 +1062,7 @@ static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k, new.v.counter = old.v.counter = v; bucket_data_type = new.data_type; - ret = __mark_pointer(c, k, p, sectors, data_type, new.gen, + ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, new.gen, &bucket_data_type, &new.dirty_sectors, &new.cached_sectors); @@ -1059,12 +1095,10 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, struct bch_extent_stripe_ptr p, enum bch_data_type data_type, struct bch_fs_usage *fs_usage, - s64 sectors, unsigned flags, - struct bch_replicas_padded *r, - unsigned *nr_data, - unsigned *nr_parity) + s64 sectors, unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; + struct bch_replicas_padded r; struct stripe *m; unsigned i, blocks_nonempty = 0; @@ -1079,14 +1113,10 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, return -EIO; } - BUG_ON(m->r.e.data_type != data_type); - - *nr_data = m->nr_blocks - m->nr_redundant; - *nr_parity = m->nr_redundant; - *r = m->r; - m->block_sectors[p.block] += sectors; + r = m->r; + for (i = 0; i < m->nr_blocks; i++) blocks_nonempty += m->block_sectors[i] != 0; @@ -1098,6 +1128,9 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c, spin_unlock(&c->ec_stripes_heap_lock); + r.e.data_type = data_type; + update_replicas(c, fs_usage, &r.e, sectors); + return 0; } @@ -1143,25 +1176,11 @@ static int bch2_mark_extent(struct bch_fs *c, dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { - struct bch_replicas_padded ec_r; - unsigned nr_data, nr_parity; - s64 parity_sectors; - ret = bch2_mark_stripe_ptr(c, p.ec, data_type, - fs_usage, disk_sectors, flags, - &ec_r, &nr_data, &nr_parity); + fs_usage, disk_sectors, flags); if (ret) return ret; - parity_sectors = - __ptr_disk_sectors_delta(p.crc.live_size, - offset, sectors, flags, - p.crc.compressed_size * nr_parity, - p.crc.uncompressed_size * nr_data); - - update_replicas(c, fs_usage, &ec_r.e, - disk_sectors + parity_sectors); - /* * There may be other dirty pointers in this extent, but * if so they're not required for mounting if we have an @@ -1190,6 +1209,7 @@ static int bch2_mark_stripe(struct bch_fs *c, ? bkey_s_c_to_stripe(new).v : NULL; struct stripe *m = genradix_ptr(&c->stripes[gc], idx); unsigned i; + int ret; if (!m || (old_s && !m->alive)) { bch_err_ratelimited(c, "error marking nonexistent stripe %zu", @@ -1199,9 +1219,12 @@ static int bch2_mark_stripe(struct bch_fs *c, if (!new_s) { /* Deleting: */ - for (i = 0; i < old_s->nr_blocks; i++) - bucket_set_stripe(c, old_s->ptrs + i, fs_usage, - journal_seq, flags, false); + for (i = 0; i < old_s->nr_blocks; i++) { + ret = bucket_set_stripe(c, old, i, fs_usage, + journal_seq, flags, false); + if (ret) + return ret; + } if (!gc && m->on_heap) { spin_lock(&c->ec_stripes_heap_lock); @@ -1209,6 +1232,10 @@ static int bch2_mark_stripe(struct bch_fs *c, spin_unlock(&c->ec_stripes_heap_lock); } + if (gc) + update_replicas(c, fs_usage, &m->r.e, + -((s64) m->sectors * m->nr_redundant)); + memset(m, 0, sizeof(*m)); } else { BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks); @@ -1220,11 +1247,16 @@ static int bch2_mark_stripe(struct bch_fs *c, old_s->ptrs + i, sizeof(struct bch_extent_ptr))) { - if (old_s) - bucket_set_stripe(c, old_s->ptrs + i, fs_usage, + if (old_s) { + bucket_set_stripe(c, old, i, fs_usage, journal_seq, flags, false); - bucket_set_stripe(c, new_s->ptrs + i, fs_usage, - journal_seq, flags, true); + if (ret) + return ret; + } + ret = bucket_set_stripe(c, new, i, fs_usage, + journal_seq, flags, true); + if (ret) + return ret; } } @@ -1233,19 +1265,23 @@ static int bch2_mark_stripe(struct bch_fs *c, m->algorithm = new_s->algorithm; m->nr_blocks = new_s->nr_blocks; m->nr_redundant = new_s->nr_redundant; + m->blocks_nonempty = 0; - bch2_bkey_to_replicas(&m->r.e, new); + for (i = 0; i < new_s->nr_blocks; i++) { + m->block_sectors[i] = + stripe_blockcount_get(new_s, i); + m->blocks_nonempty += !!m->block_sectors[i]; + } - /* gc recalculates these fields: */ - if (!(flags & BTREE_TRIGGER_GC)) { - m->blocks_nonempty = 0; + if (gc && old_s) + update_replicas(c, fs_usage, &m->r.e, + -((s64) m->sectors * m->nr_redundant)); - for (i = 0; i < new_s->nr_blocks; i++) { - m->block_sectors[i] = - stripe_blockcount_get(new_s, i); - m->blocks_nonempty += !!m->block_sectors[i]; - } - } + bch2_bkey_to_replicas(&m->r.e, new); + + if (gc) + update_replicas(c, fs_usage, &m->r.e, + ((s64) m->sectors * m->nr_redundant)); if (!gc) { spin_lock(&c->ec_stripes_heap_lock); @@ -1550,23 +1586,21 @@ static int trans_get_key(struct btree_trans *trans, return ret; } -static int bch2_trans_mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) +static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, + const struct bch_extent_ptr *ptr, + struct bkey_alloc_unpacked *u) { struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bpos pos = POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)); - struct btree_iter *iter; - struct bkey_s_c k_a; - struct bkey_alloc_unpacked u; - struct bkey_i_alloc *a; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); struct bucket *g; + struct btree_iter *iter; + struct bkey_s_c k; int ret; - iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k_a); + iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k); if (iter) { - u = bch2_alloc_unpack(k_a); + *u = bch2_alloc_unpack(k); } else { iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos, BTREE_ITER_CACHED| @@ -1576,16 +1610,36 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, return PTR_ERR(iter); ret = bch2_btree_iter_traverse(iter); - if (ret) - goto out; + if (ret) { + bch2_trans_iter_put(trans, iter); + return ret; + } percpu_down_read(&c->mark_lock); g = bucket(ca, pos.offset); - u = alloc_mem_to_key(g, READ_ONCE(g->mark)); + *u = alloc_mem_to_key(g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); } - ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type, + *_iter = iter; + return 0; +} + +static int bch2_trans_mark_pointer(struct btree_trans *trans, + struct bkey_s_c k, struct extent_ptr_decoded p, + s64 sectors, enum bch_data_type data_type) +{ + struct bch_fs *c = trans->c; + struct btree_iter *iter; + struct bkey_alloc_unpacked u; + struct bkey_i_alloc *a; + int ret; + + ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); + if (ret) + return ret; + + ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, u.gen, &u.data_type, &u.dirty_sectors, &u.cached_sectors); if (ret) goto out; @@ -1596,7 +1650,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, goto out; bkey_alloc_init(&a->k_i); - a->k.p = pos; + a->k.p = iter->pos; bch2_alloc_pack(a, u); bch2_trans_update(trans, iter, &a->k_i, 0); out: @@ -1606,15 +1660,13 @@ out: static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, struct bch_extent_stripe_ptr p, - s64 sectors, enum bch_data_type data_type, - struct bch_replicas_padded *r, - unsigned *nr_data, - unsigned *nr_parity) + s64 sectors, enum bch_data_type data_type) { struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_s_c k; struct bkey_i_stripe *s; + struct bch_replicas_padded r; int ret = 0; ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k); @@ -1635,15 +1687,14 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, goto out; bkey_reassemble(&s->k_i, k); - stripe_blockcount_set(&s->v, p.block, stripe_blockcount_get(&s->v, p.block) + sectors); - - *nr_data = s->v.nr_blocks - s->v.nr_redundant; - *nr_parity = s->v.nr_redundant; - bch2_bkey_to_replicas(&r->e, bkey_i_to_s_c(&s->k_i)); bch2_trans_update(trans, iter, &s->k_i, 0); + + bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); + r.e.data_type = data_type; + update_replicas_list(trans, &r.e, sectors); out: bch2_trans_iter_put(trans, iter); return ret; @@ -1688,25 +1739,11 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { - struct bch_replicas_padded ec_r; - unsigned nr_data, nr_parity; - s64 parity_sectors; - ret = bch2_trans_mark_stripe_ptr(trans, p.ec, - disk_sectors, data_type, - &ec_r, &nr_data, &nr_parity); + disk_sectors, data_type); if (ret) return ret; - parity_sectors = - __ptr_disk_sectors_delta(p.crc.live_size, - offset, sectors, flags, - p.crc.compressed_size * nr_parity, - p.crc.uncompressed_size * nr_data); - - update_replicas_list(trans, &ec_r.e, - disk_sectors + parity_sectors); - r.e.nr_required = 0; } } @@ -1717,6 +1754,76 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, return 0; } +static int bch2_trans_mark_stripe(struct btree_trans *trans, + struct bkey_s_c k, + unsigned flags) +{ + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned nr_data = s->nr_blocks - s->nr_redundant; + struct bch_replicas_padded r; + struct bkey_alloc_unpacked u; + struct bkey_i_alloc *a; + struct btree_iter *iter; + bool deleting = flags & BTREE_TRIGGER_OVERWRITE; + s64 sectors = le16_to_cpu(s->sectors); + unsigned i; + int ret = 0; + + if (deleting) + sectors = -sectors; + + bch2_bkey_to_replicas(&r.e, k); + update_replicas_list(trans, &r.e, sectors * s->nr_redundant); + + /* + * The allocator code doesn't necessarily update bucket gens in the + * btree when incrementing them, right before handing out new buckets - + * we just need to persist those updates here along with the new stripe: + */ + + for (i = 0; i < s->nr_blocks && !ret; i++) { + bool parity = i >= nr_data; + + ret = bch2_trans_start_alloc_update(trans, &iter, + &s->ptrs[i], &u); + if (ret) + break; + + if (parity) { + u.dirty_sectors += sectors; + u.data_type = u.dirty_sectors + ? BCH_DATA_parity + : 0; + } + + a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto put_iter; + + bkey_alloc_init(&a->k_i); + a->k.p = iter->pos; + bch2_alloc_pack(a, u); + bch2_trans_update(trans, iter, &a->k_i, 0); +put_iter: + bch2_trans_iter_put(trans, iter); + } + + return ret; +} + +static __le64 *bkey_refcount(struct bkey_i *k) +{ + switch (k->k.type) { + case KEY_TYPE_reflink_v: + return &bkey_i_to_reflink_v(k)->v.refcount; + case KEY_TYPE_indirect_inline_data: + return &bkey_i_to_indirect_inline_data(k)->v.refcount; + default: + return NULL; + } +} + static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 idx, unsigned sectors, @@ -1725,7 +1832,8 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_i_reflink_v *r_v; + struct bkey_i *n; + __le64 *refcount; s64 ret; ret = trans_get_key(trans, BTREE_ID_REFLINK, @@ -1733,14 +1841,6 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, if (ret < 0) return ret; - if (k.k->type != KEY_TYPE_reflink_v) { - bch2_fs_inconsistent(c, - "%llu:%llu len %u points to nonexistent indirect extent %llu", - p.k->p.inode, p.k->p.offset, p.k->size, idx); - ret = -EIO; - goto err; - } - if ((flags & BTREE_TRIGGER_OVERWRITE) && (bkey_start_offset(k.k) < idx || k.k->p.offset > idx + sectors)) @@ -1748,25 +1848,33 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, sectors = k.k->p.offset - idx; - r_v = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); - ret = PTR_ERR_OR_ZERO(r_v); + n = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + ret = PTR_ERR_OR_ZERO(n); if (ret) goto err; - bkey_reassemble(&r_v->k_i, k); + bkey_reassemble(n, k); - le64_add_cpu(&r_v->v.refcount, - !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1); + refcount = bkey_refcount(n); + if (!refcount) { + bch2_fs_inconsistent(c, + "%llu:%llu len %u points to nonexistent indirect extent %llu", + p.k->p.inode, p.k->p.offset, p.k->size, idx); + ret = -EIO; + goto err; + } + + le64_add_cpu(refcount, !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1); - if (!r_v->v.refcount) { - r_v->k.type = KEY_TYPE_deleted; - set_bkey_val_u64s(&r_v->k, 0); + if (!*refcount) { + n->k.type = KEY_TYPE_deleted; + set_bkey_val_u64s(&n->k, 0); } bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k)); BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK); - bch2_trans_update(trans, iter, &r_v->k_i, 0); + bch2_trans_update(trans, iter, n, 0); out: ret = sectors; err: @@ -1816,6 +1924,8 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, case KEY_TYPE_reflink_v: return bch2_trans_mark_extent(trans, k, offset, sectors, flags, BCH_DATA_user); + case KEY_TYPE_stripe: + return bch2_trans_mark_stripe(trans, k, flags); case KEY_TYPE_inode: d = replicas_deltas_realloc(trans, 0); diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index d5215b1..d6057d2 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -41,6 +41,7 @@ struct bucket { u8 oldest_gen; u8 gc_gen; unsigned gen_valid:1; + u8 ec_redundancy; }; struct bucket_array { @@ -125,6 +126,7 @@ struct disk_reservation { struct copygc_heap_entry { u8 dev; u8 gen; + u8 replicas; u16 fragmentation; u32 sectors; u64 offset; diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index eac750a..e4a4805 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -343,12 +343,17 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, unsigned offset = 0, bytes = buf->size << 9; struct bch_extent_ptr *ptr = &v->ptrs[idx]; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + enum bch_data_type data_type = idx < buf->key.v.nr_blocks - buf->key.v.nr_redundant + ? BCH_DATA_user + : BCH_DATA_parity; if (!bch2_dev_get_ioref(ca, rw)) { clear_bit(idx, buf->valid); return; } + this_cpu_add(ca->io_done->sectors[rw][data_type], buf->size); + while (offset < bytes) { unsigned nr_iovecs = min_t(size_t, BIO_MAX_PAGES, DIV_ROUND_UP(bytes, PAGE_SIZE)); @@ -670,6 +675,7 @@ static void ec_stripe_delete_work(struct work_struct *work) /* stripe creation: */ static int ec_stripe_bkey_insert(struct bch_fs *c, + struct ec_stripe_new *s, struct bkey_i_stripe *stripe) { struct btree_trans trans; @@ -711,7 +717,7 @@ found_slot: bch2_trans_update(&trans, iter, &stripe->k_i, 0); - ret = bch2_trans_commit(&trans, NULL, NULL, + ret = bch2_trans_commit(&trans, &s->res, NULL, BTREE_INSERT_NOFAIL); err: bch2_trans_iter_put(&trans, iter); @@ -858,8 +864,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) ret = s->existing_stripe ? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i, - NULL, NULL, BTREE_INSERT_NOFAIL) - : ec_stripe_bkey_insert(c, &s->stripe.key); + &s->res, NULL, BTREE_INSERT_NOFAIL) + : ec_stripe_bkey_insert(c, s, &s->stripe.key); if (ret) { bch_err(c, "error creating stripe: error creating stripe key"); goto err_put_writes; @@ -886,6 +892,8 @@ static void ec_stripe_create(struct ec_stripe_new *s) err_put_writes: percpu_ref_put(&c->writes); err: + bch2_disk_reservation_put(c, &s->res); + open_bucket_for_each(c, &s->blocks, ob, i) { ob->ec = NULL; __bch2_open_bucket_put(c, ob); @@ -1325,6 +1333,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, struct open_bucket *ob; unsigned i, data_idx = 0; s64 idx; + int ret; closure_init_stack(&cl); @@ -1356,6 +1365,22 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, } } + if (!h->s->existing_stripe && + !h->s->res.sectors) { + ret = bch2_disk_reservation_get(c, &h->s->res, + h->blocksize, + h->s->nr_parity, 0); + if (ret) { + /* What should we do here? */ + bch_err(c, "unable to create new stripe: %i", ret); + bch2_ec_stripe_head_put(c, h); + h = NULL; + goto out; + + } + + } + if (new_stripe_alloc_buckets(c, h)) { bch2_ec_stripe_head_put(c, h); h = NULL; diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 6db16cf..15f751f 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -3,6 +3,7 @@ #define _BCACHEFS_EC_H #include "ec_types.h" +#include "buckets_types.h" #include "keylist_types.h" const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c); @@ -105,6 +106,7 @@ struct ec_stripe_new { struct open_buckets blocks; u8 data_block_idx[EC_STRIPE_MAX]; struct open_buckets parity; + struct disk_reservation res; struct keylist keys; u64 inline_keys[BKEY_U64s * 8]; diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 568f039..88297b3 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -1200,14 +1200,14 @@ int bch2_cut_front_s(struct bpos where, struct bkey_s k) le64_add_cpu(&p.v->idx, sub); break; } - case KEY_TYPE_inline_data: { - struct bkey_s_inline_data d = bkey_s_to_inline_data(k); + case KEY_TYPE_inline_data: + case KEY_TYPE_indirect_inline_data: { + void *p = bkey_inline_data_p(k); + unsigned bytes = bkey_inline_data_bytes(k.k); - sub = min_t(u64, sub << 9, bkey_val_bytes(d.k)); + sub = min_t(u64, sub << 9, bytes); - memmove(d.v->data, - d.v->data + sub, - bkey_val_bytes(d.k) - sub); + memmove(p, p + sub, bytes - sub); new_val_u64s -= sub >> 3; break; @@ -1245,7 +1245,9 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) switch (k.k->type) { case KEY_TYPE_inline_data: - new_val_u64s = min(new_val_u64s, k.k->size << 6); + case KEY_TYPE_indirect_inline_data: + new_val_u64s = (bkey_inline_data_offset(k.k) + + min(bkey_inline_data_bytes(k.k), k.k->size << 9)) >> 3; break; } diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 29b1536..74c7bb8 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -445,10 +445,35 @@ static inline bool bkey_extent_is_direct_data(const struct bkey *k) } } +static inline bool bkey_extent_is_inline_data(const struct bkey *k) +{ + return k->type == KEY_TYPE_inline_data || + k->type == KEY_TYPE_indirect_inline_data; +} + +static inline unsigned bkey_inline_data_offset(const struct bkey *k) +{ + switch (k->type) { + case KEY_TYPE_inline_data: + return sizeof(struct bch_inline_data); + case KEY_TYPE_indirect_inline_data: + return sizeof(struct bch_indirect_inline_data); + default: + BUG(); + } +} + +static inline unsigned bkey_inline_data_bytes(const struct bkey *k) +{ + return bkey_val_bytes(k) - bkey_inline_data_offset(k); +} + +#define bkey_inline_data_p(_k) (((void *) (_k).v) + bkey_inline_data_offset((_k).k)) + static inline bool bkey_extent_is_data(const struct bkey *k) { - return bkey_extent_is_direct_data(k) || - k->type == KEY_TYPE_inline_data || + return bkey_extent_is_direct_data(k) || + bkey_extent_is_inline_data(k) || k->type == KEY_TYPE_reflink_p; } @@ -463,6 +488,7 @@ static inline bool bkey_extent_is_allocation(const struct bkey *k) case KEY_TYPE_reflink_p: case KEY_TYPE_reflink_v: case KEY_TYPE_inline_data: + case KEY_TYPE_indirect_inline_data: return true; default: return false; diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 4ceeafc..3aed2ca 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -839,18 +839,19 @@ retry: if (ret) break; - bkey_on_stack_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; + bkey_on_stack_reassemble(&sk, c, k); + ret = bch2_read_indirect_extent(trans, &offset_into_extent, &sk); if (ret) break; + k = bkey_i_to_s_c(sk.k); + sectors = min(sectors, k.k->size - offset_into_extent); bch2_trans_unlock(trans); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 6a9820e..1d66aca 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -887,20 +887,21 @@ retry: continue; } - bkey_on_stack_realloc(&cur, c, k.k->u64s); - bkey_on_stack_realloc(&prev, c, k.k->u64s); - bkey_reassemble(cur.k, k); - k = bkey_i_to_s_c(cur.k); - offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; + bkey_on_stack_realloc(&cur, c, k.k->u64s); + bkey_on_stack_realloc(&prev, c, k.k->u64s); + bkey_reassemble(cur.k, k); + ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &cur); if (ret) break; + k = bkey_i_to_s_c(cur.k); + sectors = min(sectors, k.k->size - offset_into_extent); if (offset_into_extent) @@ -1321,7 +1322,7 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); - ret = bch2_parse_mount_opts(&opts, data); + ret = bch2_parse_mount_opts(c, &opts, data); if (ret) return ret; @@ -1462,7 +1463,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, opt_set(opts, read_only, (flags & SB_RDONLY) != 0); - ret = bch2_parse_mount_opts(&opts, data); + ret = bch2_parse_mount_opts(NULL, &opts, data); if (ret) return ERR_PTR(ret); @@ -1485,11 +1486,24 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, goto got_sb; c = bch2_fs_open(devs, nr_devs, opts); - - if (!IS_ERR(c)) - sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); - else + if (IS_ERR(c)) { sb = ERR_CAST(c); + goto got_sb; + } + + /* Some options can't be parsed until after the fs is started: */ + ret = bch2_parse_mount_opts(c, &opts, data); + if (ret) { + bch2_fs_stop(c); + sb = ERR_PTR(ret); + goto got_sb; + } + + bch2_opts_apply(&c->opts, opts); + + sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); + if (IS_ERR(sb)) + bch2_fs_stop(c); got_sb: kfree(devs_to_fs); kfree(devs[0]); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 0a4b4ee..8add8cc 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -1475,7 +1475,8 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, opts, DATA_PROMOTE, (struct data_opts) { - .target = opts.promote_target + .target = opts.promote_target, + .nr_replicas = 1, }, btree_id, k); BUG_ON(ret); @@ -1675,7 +1676,6 @@ retry: unsigned bytes, sectors, offset_into_extent; bkey_on_stack_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); @@ -1686,6 +1686,8 @@ retry: if (ret) break; + k = bkey_i_to_s_c(sk.k); + sectors = min(sectors, k.k->size - offset_into_extent); bch2_trans_unlock(&trans); @@ -2007,7 +2009,8 @@ int __bch2_read_indirect_extent(struct btree_trans *trans, if (ret) goto err; - if (k.k->type != KEY_TYPE_reflink_v) { + if (k.k->type != KEY_TYPE_reflink_v && + k.k->type != KEY_TYPE_indirect_inline_data) { __bcache_io_error(trans->c, "pointer to nonexistent indirect extent"); ret = -EIO; @@ -2035,13 +2038,12 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bpos pos = bkey_start_pos(k.k); int pick_ret; - if (k.k->type == KEY_TYPE_inline_data) { - struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k); + if (bkey_extent_is_inline_data(k.k)) { unsigned bytes = min_t(unsigned, iter.bi_size, - bkey_val_bytes(d.k)); + bkey_inline_data_bytes(k.k)); swap(iter.bi_size, bytes); - memcpy_to_bio(&orig->bio, iter, d.v->data); + memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k)); swap(iter.bi_size, bytes); bio_advance_iter(&orig->bio, &iter, bytes); zero_fill_bio_iter(&orig->bio, iter); @@ -2313,13 +2315,14 @@ retry: sectors = k.k->size - offset_into_extent; bkey_on_stack_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); ret = bch2_read_indirect_extent(&trans, &offset_into_extent, &sk); if (ret) goto err; + k = bkey_i_to_s_c(sk.k); + /* * With indirect extents, the amount of data to read is the min * of the original extent and the indirect extent: diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 62dcac7..6633d21 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -266,8 +266,8 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, BCH_WRITE_DATA_ENCODED| BCH_WRITE_FROM_INTERNAL; - m->op.nr_replicas = 1; - m->op.nr_replicas_required = 1; + m->op.nr_replicas = data_opts.nr_replicas; + m->op.nr_replicas_required = data_opts.nr_replicas; m->op.index_update_fn = bch2_migrate_index_update; switch (data_cmd) { @@ -756,6 +756,7 @@ static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, return DATA_SKIP; data_opts->target = 0; + data_opts->nr_replicas = 1; data_opts->btree_insert_flags = 0; return DATA_ADD_REPLICAS; } @@ -771,6 +772,7 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, return DATA_SKIP; data_opts->target = 0; + data_opts->nr_replicas = 1; data_opts->btree_insert_flags = 0; data_opts->rewrite_dev = op->migrate.dev; return DATA_REWRITE; diff --git a/libbcachefs/move.h b/libbcachefs/move.h index 0acd172..b04bc66 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -20,7 +20,8 @@ enum data_cmd { struct data_opts { u16 target; - unsigned rewrite_dev; + u8 rewrite_dev; + u8 nr_replicas; int btree_insert_flags; }; diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index de0a797..ddfda1e 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -53,17 +53,21 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) cmp_int(l->offset, r->offset); } -static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k) +static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, + struct bkey_s_c k, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) { copygc_heap *h = &c->copygc_heap; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct copygc_heap_entry search = { - .dev = ptr->dev, - .offset = ptr->offset + .dev = p.ptr.dev, + .offset = p.ptr.offset, }; ssize_t i = eytzinger0_find_le(h->data, h->used, @@ -81,27 +85,24 @@ static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k) BUG_ON(i != j); #endif if (i >= 0 && - ptr->offset < h->data[i].offset + ca->mi.bucket_size && - ptr->gen == h->data[i].gen) - return ptr->dev; - } + p.ptr.offset < h->data[i].offset + ca->mi.bucket_size && + p.ptr.gen == h->data[i].gen) { + data_opts->target = io_opts->background_target; + data_opts->nr_replicas = 1; + data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE; + data_opts->rewrite_dev = p.ptr.dev; - return -1; -} + if (p.has_ec) { + struct stripe *m = genradix_ptr(&c->stripes[0], p.ec.idx); -static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, - struct bch_io_opts *io_opts, - struct data_opts *data_opts) -{ - int dev_idx = __copygc_pred(c, k); - if (dev_idx < 0) - return DATA_SKIP; - - data_opts->target = io_opts->background_target; - data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE; - data_opts->rewrite_dev = dev_idx; - return DATA_REWRITE; + data_opts->nr_replicas += m->nr_redundant; + } + + return DATA_REWRITE; + } + } + + return DATA_SKIP; } static bool have_copygc_reserve(struct bch_dev *ca) @@ -168,7 +169,8 @@ static int bch2_copygc(struct bch_fs *c) buckets = bucket_array(ca); for (b = buckets->first_bucket; b < buckets->nbuckets; b++) { - struct bucket_mark m = READ_ONCE(buckets->b[b].mark); + struct bucket *g = buckets->b + b; + struct bucket_mark m = READ_ONCE(g->mark); struct copygc_heap_entry e; if (m.owned_by_allocator || @@ -177,9 +179,12 @@ static int bch2_copygc(struct bch_fs *c) bucket_sectors_used(m) >= ca->mi.bucket_size) continue; + WARN_ON(m.stripe && !g->ec_redundancy); + e = (struct copygc_heap_entry) { .dev = dev_idx, .gen = m.gen, + .replicas = 1 + g->ec_redundancy, .fragmentation = bucket_sectors_used(m) * (1U << 15) / ca->mi.bucket_size, .sectors = bucket_sectors_used(m), @@ -196,11 +201,11 @@ static int bch2_copygc(struct bch_fs *c) } for (i = h->data; i < h->data + h->used; i++) - sectors_to_move += i->sectors; + sectors_to_move += i->sectors * i->replicas; while (sectors_to_move > sectors_reserved) { BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL)); - sectors_to_move -= e.sectors; + sectors_to_move -= e.sectors * e.replicas; } buckets_to_move = h->used; diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c index afe25cd..97a36ac 100644 --- a/libbcachefs/opts.c +++ b/libbcachefs/opts.c @@ -247,7 +247,7 @@ int bch2_opt_parse(struct bch_fs *c, const struct bch_option *opt, break; case BCH_OPT_FN: if (!c) - return -EINVAL; + return 0; return opt->parse(c, val, res); } @@ -325,7 +325,8 @@ int bch2_opts_check_may_set(struct bch_fs *c) return 0; } -int bch2_parse_mount_opts(struct bch_opts *opts, char *options) +int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, + char *options) { char *opt, *name, *val; int ret, id; @@ -340,7 +341,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options) if (id < 0) goto bad_opt; - ret = bch2_opt_parse(NULL, &bch2_opt_table[id], val, &v); + ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v); if (ret < 0) goto bad_val; } else { diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 014c608..710a7ee 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -185,7 +185,7 @@ enum opt_type { x(inline_data, u8, \ OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - NO_SB_OPT, false, \ + NO_SB_OPT, true, \ NULL, "Enable inline data extents") \ x(acl, u8, \ OPT_FORMAT|OPT_MOUNT, \ @@ -418,7 +418,7 @@ void bch2_opt_to_text(struct printbuf *, struct bch_fs *, int bch2_opt_check_may_set(struct bch_fs *, int, u64); int bch2_opts_check_may_set(struct bch_fs *); -int bch2_parse_mount_opts(struct bch_opts *, char *); +int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, char *); /* inode opts: */ diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index 56a1f76..44d2651 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -73,6 +73,7 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, { if (__bch2_rebalance_pred(c, k, io_opts) >= 0) { data_opts->target = io_opts->background_target; + data_opts->nr_replicas = 1; data_opts->btree_insert_flags = 0; return DATA_ADD_REPLICAS; } else { diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index d70fa96..32fed6b 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -25,6 +25,18 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } +/* for -o reconstruct_alloc: */ +static void drop_alloc_keys(struct journal_keys *keys) +{ + size_t src, dst; + + for (src = 0, dst = 0; src < keys->nr; src++) + if (keys->d[src].btree_id != BTREE_ID_ALLOC) + keys->d[dst++] = keys->d[src]; + + keys->nr = dst; +} + /* iterate over keys read from the journal: */ static struct journal_key *journal_key_search(struct journal_keys *journal_keys, @@ -930,7 +942,6 @@ static int read_btree_roots(struct bch_fs *c) continue; } - if (r->error) { __fsck_err(c, i == BTREE_ID_ALLOC ? FSCK_CAN_IGNORE : 0, @@ -1027,6 +1038,11 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } + if (c->opts.reconstruct_alloc) { + c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO); + drop_alloc_keys(&c->journal_keys); + } + ret = journal_replay_early(c, clean, &c->journal_entries); if (ret) goto err; diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 3c473f1..8abcbfb 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -9,6 +9,18 @@ #include +static inline unsigned bkey_type_to_indirect(const struct bkey *k) +{ + switch (k->type) { + case KEY_TYPE_extent: + return KEY_TYPE_reflink_v; + case KEY_TYPE_inline_data: + return KEY_TYPE_indirect_inline_data; + default: + return 0; + } +} + /* reflink pointers */ const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k) @@ -71,17 +83,42 @@ void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } +/* indirect inline data */ + +const char *bch2_indirect_inline_data_invalid(const struct bch_fs *c, + struct bkey_s_c k) +{ + if (bkey_val_bytes(k.k) < sizeof(struct bch_indirect_inline_data)) + return "incorrect value size"; + return NULL; +} + +void bch2_indirect_inline_data_to_text(struct printbuf *out, + struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_s_c_indirect_inline_data d = bkey_s_c_to_indirect_inline_data(k); + unsigned datalen = bkey_inline_data_bytes(k.k); + + pr_buf(out, "refcount %llu datalen %u: %*phN", + le64_to_cpu(d.v->refcount), datalen, + min(datalen, 32U), d.v->data); +} + static int bch2_make_extent_indirect(struct btree_trans *trans, struct btree_iter *extent_iter, - struct bkey_i_extent *e) + struct bkey_i *orig) { struct bch_fs *c = trans->c; struct btree_iter *reflink_iter; struct bkey_s_c k; - struct bkey_i_reflink_v *r_v; + struct bkey_i *r_v; struct bkey_i_reflink_p *r_p; + __le64 *refcount; int ret; + if (orig->k.type == KEY_TYPE_inline_data) + bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data); + for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK, POS(0, c->reflink_hint), BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) { @@ -90,7 +127,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, continue; } - if (bkey_deleted(k.k) && e->k.size <= k.k->size) + if (bkey_deleted(k.k) && orig->k.size <= k.k->size) break; } @@ -100,29 +137,31 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, /* rewind iter to start of hole, if necessary: */ bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k)); - r_v = bch2_trans_kmalloc(trans, sizeof(*r_v) + bkey_val_bytes(&e->k)); + r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_val_bytes(&orig->k)); ret = PTR_ERR_OR_ZERO(r_v); if (ret) goto err; - bkey_reflink_v_init(&r_v->k_i); + bkey_init(&r_v->k); + r_v->k.type = bkey_type_to_indirect(&orig->k); r_v->k.p = reflink_iter->pos; - bch2_key_resize(&r_v->k, e->k.size); - r_v->k.version = e->k.version; + bch2_key_resize(&r_v->k, orig->k.size); + r_v->k.version = orig->k.version; + + set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); - set_bkey_val_u64s(&r_v->k, bkey_val_u64s(&r_v->k) + - bkey_val_u64s(&e->k)); - r_v->v.refcount = 0; - memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k)); + refcount = (void *) &r_v->v; + *refcount = 0; + memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); - bch2_trans_update(trans, reflink_iter, &r_v->k_i, 0); + bch2_trans_update(trans, reflink_iter, r_v, 0); r_p = bch2_trans_kmalloc(trans, sizeof(*r_p)); if (IS_ERR(r_p)) return PTR_ERR(r_p); - e->k.type = KEY_TYPE_reflink_p; - r_p = bkey_i_to_reflink_p(&e->k_i); + orig->k.type = KEY_TYPE_reflink_p; + r_p = bkey_i_to_reflink_p(orig); set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k)); @@ -144,8 +183,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) if (bkey_cmp(iter->pos, end) >= 0) return bkey_s_c_null; - if (k.k->type == KEY_TYPE_extent || - k.k->type == KEY_TYPE_reflink_p) + if (bkey_extent_is_data(k.k)) break; } @@ -218,7 +256,7 @@ s64 bch2_remap_range(struct bch_fs *c, if (!bkey_cmp(dst_iter->pos, dst_end)) break; - if (src_k.k->type == KEY_TYPE_extent) { + if (src_k.k->type != KEY_TYPE_reflink_p) { bkey_on_stack_reassemble(&new_src, c, src_k); src_k = bkey_i_to_s_c(new_src.k); @@ -226,7 +264,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_cut_back(src_end, new_src.k); ret = bch2_make_extent_indirect(&trans, src_iter, - bkey_i_to_extent(new_src.k)); + new_src.k); if (ret) goto btree_err; diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h index 5445c1c..9d5e7dc 100644 --- a/libbcachefs/reflink.h +++ b/libbcachefs/reflink.h @@ -18,13 +18,22 @@ const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); - #define bch2_bkey_ops_reflink_v (struct bkey_ops) { \ .key_invalid = bch2_reflink_v_invalid, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ } +const char *bch2_indirect_inline_data_invalid(const struct bch_fs *, + struct bkey_s_c); +void bch2_indirect_inline_data_to_text(struct printbuf *, + struct bch_fs *, struct bkey_s_c); + +#define bch2_bkey_ops_indirect_inline_data (struct bkey_ops) { \ + .key_invalid = bch2_indirect_inline_data_invalid, \ + .val_to_text = bch2_indirect_inline_data_to_text, \ +} + s64 bch2_remap_range(struct bch_fs *, struct bpos, struct bpos, u64, u64 *, u64, s64 *); diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 6b6506c..91518c0 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -122,7 +122,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e, extent_to_replicas(k, e); break; case KEY_TYPE_stripe: - e->data_type = BCH_DATA_user; + e->data_type = BCH_DATA_parity; stripe_to_replicas(k, e); break; } @@ -446,7 +446,23 @@ static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k, bch2_bkey_to_replicas(&search.e, k); - return __bch2_mark_replicas(c, &search.e, check); + ret = __bch2_mark_replicas(c, &search.e, check); + if (ret) + return ret; + + if (search.e.data_type == BCH_DATA_parity) { + search.e.data_type = BCH_DATA_cached; + ret = __bch2_mark_replicas(c, &search.e, check); + if (ret) + return ret; + + search.e.data_type = BCH_DATA_user; + ret = __bch2_mark_replicas(c, &search.e, check); + if (ret) + return ret; + } + + return 0; } bool bch2_bkey_replicas_marked(struct bch_fs *c, diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 7f301fa..015bbd9 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -175,6 +175,9 @@ static void __bch2_fs_read_only(struct bch_fs *c) bch2_copygc_stop(c); bch2_gc_thread_stop(c); + bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale); + bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale); + /* * Flush journal before stopping allocators, because flushing journal * blacklist entries involves allocating new btree nodes: @@ -224,9 +227,6 @@ nowrote_alloc: for_each_member_device(ca, c, i) bch2_dev_allocator_stop(ca); - bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale); - bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale); - clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);