From 819f2dde79241915a6edda2c20bb4ca5d4017030 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 Apr 2017 20:38:49 -0800 Subject: [PATCH] Update bcachefs sources to f026e4e024 --- .bcachefs_revision | 2 +- Makefile | 2 +- include/linux/bio.h | 4 +- libbcachefs/bcachefs.h | 6 + libbcachefs/bkey_methods.c | 14 +- libbcachefs/bkey_methods.h | 8 +- libbcachefs/btree_cache.c | 1 + libbcachefs/btree_gc.c | 5 +- libbcachefs/btree_io.c | 165 +++++++++++------ libbcachefs/btree_io.h | 5 +- libbcachefs/btree_iter.c | 22 ++- libbcachefs/btree_iter.h | 1 + libbcachefs/btree_types.h | 10 ++ libbcachefs/btree_update.c | 105 +++++------ libbcachefs/btree_update.h | 7 +- libbcachefs/debug.c | 2 +- libbcachefs/dirent.c | 63 ++++--- libbcachefs/dirent.h | 3 + libbcachefs/error.c | 99 ++++++++++ libbcachefs/error.h | 87 ++++----- libbcachefs/fs-io.c | 2 +- libbcachefs/fs.c | 2 +- libbcachefs/{fs-gc.c => fsck.c} | 308 ++++++++++++++++++++++++++------ libbcachefs/{fs-gc.h => fsck.h} | 0 libbcachefs/inode.c | 91 +++++----- libbcachefs/inode.h | 6 + libbcachefs/io.c | 4 +- libbcachefs/journal.c | 145 +++++++++------ libbcachefs/str_hash.h | 46 +++-- libbcachefs/super.c | 24 ++- libbcachefs/sysfs.c | 2 +- libbcachefs/util.c | 14 ++ libbcachefs/util.h | 5 +- libbcachefs/xattr.c | 92 ++++++---- libbcachefs/xattr.h | 3 + linux/bio.c | 46 +++-- 36 files changed, 964 insertions(+), 437 deletions(-) rename libbcachefs/{fs-gc.c => fsck.c} (75%) rename libbcachefs/{fs-gc.h => fsck.h} (100%) diff --git a/.bcachefs_revision b/.bcachefs_revision index 9a3f687..2a5e858 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -3b4024f94489e4d8dc8eb7f1278754a2545f8026 +f026e4e0243cc10e721504a8bfaa131ea8aa4c91 diff --git a/Makefile b/Makefile index e8a80c7..327fec2 100644 --- a/Makefile +++ b/Makefile @@ -78,7 +78,7 @@ SRCS=bcachefs.c \ libbcachefs/dirent.c \ libbcachefs/error.c \ libbcachefs/extents.c \ - libbcachefs/fs-gc.c \ + libbcachefs/fsck.c \ libbcachefs/inode.c \ libbcachefs/io.c \ libbcachefs/journal.c \ diff --git a/include/linux/bio.h b/include/linux/bio.h index 94e9048..49d26b5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -288,8 +288,8 @@ static inline void bio_flush_dcache_pages(struct bio *bi) { } -extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter, - struct bio *src, struct bvec_iter src_iter); +extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index c170e85..b1f2528 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -458,6 +458,7 @@ enum { BCH_FS_BDEV_MOUNTED, BCH_FS_ERROR, BCH_FS_FSCK_FIXED_ERRORS, + BCH_FS_FSCK_DONE, BCH_FS_FIXED_GENS, }; @@ -724,6 +725,11 @@ struct bch_fs { struct work_struct read_retry_work; spinlock_t read_retry_lock; + /* ERRORS */ + struct list_head fsck_errors; + struct mutex fsck_error_lock; + bool fsck_alloc_err; + /* FILESYSTEM */ wait_queue_head_t writeback_wait; atomic_t writeback_pages; diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index 51a13fc..cd9a60c 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -89,18 +89,20 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) ops->key_debugcheck(c, b, k); } -void bch2_val_to_text(struct bch_fs *c, enum bkey_type type, - char *buf, size_t size, struct bkey_s_c k) +char *bch2_val_to_text(struct bch_fs *c, enum bkey_type type, + char *buf, size_t size, struct bkey_s_c k) { const struct bkey_ops *ops = bch2_bkey_ops[type]; if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text) ops->val_to_text(c, buf, size, k); + + return buf; } -void bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type, - char *buf, size_t size, struct bkey_s_c k) +char *bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type, + char *buf, size_t size, struct bkey_s_c k) { const struct bkey_ops *ops = bch2_bkey_ops[type]; char *out = buf, *end = buf + size; @@ -109,9 +111,11 @@ void bch2_bkey_val_to_text(struct bch_fs *c, enum bkey_type type, if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text) { - out += scnprintf(out, end - out, " -> "); + out += scnprintf(out, end - out, ": "); ops->val_to_text(c, out, end - out, k); } + + return buf; } void bch2_bkey_swab(enum bkey_type type, diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h index f795db6..2d526f5 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/bkey_methods.h @@ -67,10 +67,10 @@ const char *bch2_btree_bkey_invalid(struct bch_fs *, struct btree *, struct bkey_s_c); void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); -void bch2_val_to_text(struct bch_fs *, enum bkey_type, - char *, size_t, struct bkey_s_c); -void bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type, - char *, size_t, struct bkey_s_c); +char *bch2_val_to_text(struct bch_fs *, enum bkey_type, + char *, size_t, struct bkey_s_c); +char *bch2_bkey_val_to_text(struct bch_fs *, enum bkey_type, + char *, size_t, struct bkey_s_c); void bch2_bkey_swab(enum bkey_type, const struct bkey_format *, struct bkey_packed *); diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index bd47aec..c37c895 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -91,6 +91,7 @@ static struct btree *mca_bucket_alloc(struct bch_fs *c, gfp_t gfp) six_lock_init(&b->lock); INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); + INIT_LIST_HEAD(&b->reachable); mca_data_alloc(c, b, gfp); return b->data ? b : NULL; diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index fc06a63..88ae396 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -605,10 +605,12 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES], bch2_btree_interior_update_will_free_node(c, as, old_nodes[i]); /* Repack everything with @new_format and sort down to one bset */ - for (i = 0; i < nr_old_nodes; i++) + for (i = 0; i < nr_old_nodes; i++) { new_nodes[i] = __bch2_btree_node_alloc_replacement(c, old_nodes[i], new_format, res); + list_add(&new_nodes[i]->reachable, &as->reachable_list); + } /* * Conceptually we concatenate the nodes together and slice them @@ -645,6 +647,7 @@ static void bch2_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES], set_btree_bset_end(n1, n1->set); + list_del_init(&n2->reachable); six_unlock_write(&n2->lock); bch2_btree_node_free_never_inserted(c, n2); six_unlock_intent(&n2->lock); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 8152dc4..82dd196 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -872,32 +872,57 @@ static void bset_encrypt(struct bch_fs *c, struct bset *i, struct nonce nonce) vstruct_end(i) - (void *) i->_data); } -#define btree_node_error(b, c, ptr, fmt, ...) \ - bch2_fs_inconsistent(c, \ - "btree node error at btree %u level %u/%u bucket %zu block %u u64s %u: " fmt,\ - (b)->btree_id, (b)->level, btree_node_root(c, b) \ - ? btree_node_root(c, b)->level : -1, \ - PTR_BUCKET_NR(ca, ptr), (b)->written, \ - le16_to_cpu((i)->u64s), ##__VA_ARGS__) - -static const char *validate_bset(struct bch_fs *c, struct btree *b, - struct bch_dev *ca, - const struct bch_extent_ptr *ptr, - struct bset *i, unsigned sectors, - unsigned *whiteout_u64s) +#define btree_node_error(c, b, ptr, msg, ...) \ +do { \ + if (write == READ && \ + !test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) { \ + mustfix_fsck_err(c, \ + "btree node read error at btree %u level %u/%u\n"\ + "sector %llu node offset %u bset u64s %u: " msg,\ + (b)->btree_id, (b)->level, \ + (c)->btree_roots[(b)->btree_id].level, \ + (u64) ptr->offset, (b)->written, \ + le16_to_cpu((i)->u64s), ##__VA_ARGS__); \ + } else { \ + bch_err(c, "%s at btree %u level %u/%u\n" \ + "sector %llu node offset %u bset u64s %u: " msg,\ + write == WRITE \ + ? "corrupt metadata in btree node write" \ + : "btree node error", \ + (b)->btree_id, (b)->level, \ + (c)->btree_roots[(b)->btree_id].level, \ + (u64) ptr->offset, (b)->written, \ + le16_to_cpu((i)->u64s), ##__VA_ARGS__); \ + ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + goto fsck_err; \ + } \ +} while (0) + +static int validate_bset(struct bch_fs *c, struct btree *b, + const struct bch_extent_ptr *ptr, + struct bset *i, unsigned sectors, + unsigned *whiteout_u64s, + int write) { struct bkey_packed *k, *prev = NULL; struct bpos prev_pos = POS_MIN; bool seen_non_whiteout = false; + int ret = 0; - if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION) - return "unsupported bset version"; + if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION) { + btree_node_error(c, b, ptr, "unsupported bset version"); + i->u64s = 0; + return 0; + } - if (b->written + sectors > c->sb.btree_node_size) - return "bset past end of btree node"; + if (b->written + sectors > c->sb.btree_node_size) { + btree_node_error(c, b, ptr, "bset past end of btree node"); + i->u64s = 0; + return 0; + } - if (i != &b->data->keys && !i->u64s) - btree_node_error(b, c, ptr, "empty set"); + if (b->written && !i->u64s) + btree_node_error(c, b, ptr, "empty set"); if (!BSET_SEPARATE_WHITEOUTS(i)) { seen_non_whiteout = true; @@ -911,7 +936,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b, const char *invalid; if (!k->u64s) { - btree_node_error(b, c, ptr, + btree_node_error(c, b, ptr, "KEY_U64s 0: %zu bytes of metadata lost", vstruct_end(i) - (void *) k); @@ -920,7 +945,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b, } if (bkey_next(k) > vstruct_last(i)) { - btree_node_error(b, c, ptr, + btree_node_error(c, b, ptr, "key extends past end of bset"); i->u64s = cpu_to_le16((u64 *) k - i->_data); @@ -928,7 +953,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b, } if (k->format > KEY_FORMAT_CURRENT) { - btree_node_error(b, c, ptr, + btree_node_error(c, b, ptr, "invalid bkey format %u", k->format); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -947,8 +972,8 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b, char buf[160]; bch2_bkey_val_to_text(c, btree_node_type(b), - buf, sizeof(buf), u); - btree_node_error(b, c, ptr, + buf, sizeof(buf), u); + btree_node_error(c, b, ptr, "invalid bkey %s: %s", buf, invalid); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -969,7 +994,7 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b, *whiteout_u64s = k->_data - i->_data; seen_non_whiteout = true; } else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) { - btree_node_error(b, c, ptr, + btree_node_error(c, b, ptr, "keys out of order: %llu:%llu > %llu:%llu", prev_pos.inode, prev_pos.offset, @@ -984,7 +1009,8 @@ static const char *validate_bset(struct bch_fs *c, struct btree *b, } SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN); - return NULL; +fsck_err: + return ret; } static bool extent_contains_ptr(struct bkey_s_c_extent e, @@ -1012,7 +1038,7 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, const char *err; struct bch_csum csum; struct nonce nonce; - int ret; + int ret, write = READ; iter = mempool_alloc(&c->fill_iter, GFP_NOIO); __bch2_btree_node_iter_init(iter, btree_node_is_extents(b)); @@ -1115,9 +1141,10 @@ void bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, sectors = vstruct_sectors(bne, c->block_bits); } - err = validate_bset(c, b, ca, ptr, i, sectors, &whiteout_u64s); - if (err) - goto err; + ret = validate_bset(c, b, ptr, i, sectors, + &whiteout_u64s, READ); + if (ret) + goto fsck_err; b->written += sectors; @@ -1172,8 +1199,10 @@ out: mempool_free(iter, &c->fill_iter); return; err: + btree_node_error(c, b, ptr, "%s", err); +fsck_err: + bch2_inconsistent_error(c); set_btree_node_read_error(b); - btree_node_error(b, c, ptr, "%s", err); goto out; } @@ -1309,6 +1338,23 @@ static void btree_node_write_endio(struct bio *bio) } } +static int validate_bset_for_write(struct bch_fs *c, struct btree *b, + struct bset *i, unsigned sectors) +{ + const struct bch_extent_ptr *ptr; + unsigned whiteout_u64s = 0; + int ret; + + extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr) + break; + + ret = validate_bset(c, b, ptr, i, sectors, &whiteout_u64s, WRITE); + if (ret) + bch2_fatal_error(c); + + return ret; +} + void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct closure *parent, enum six_lock_type lock_type_held) @@ -1343,18 +1389,24 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, if (!(old & (1 << BTREE_NODE_dirty))) return; + if (b->written && + !btree_node_may_write(b)) + return; + if (old & (1 << BTREE_NODE_write_in_flight)) { btree_node_wait_on_io(b); continue; } new &= ~(1 << BTREE_NODE_dirty); + new &= ~(1 << BTREE_NODE_need_write); new |= (1 << BTREE_NODE_write_in_flight); new |= (1 << BTREE_NODE_just_written); new ^= (1 << BTREE_NODE_write_idx); } while (cmpxchg_acquire(&b->flags, old, new) != old); BUG_ON(!list_empty(&b->write_blocked)); + BUG_ON(!list_empty_careful(&b->reachable) != !b->written); BUG_ON(b->written >= c->sb.btree_node_size); BUG_ON(bset_written(b, btree_bset_last(b))); @@ -1430,13 +1482,17 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, clear_needs_whiteout(i); - if (b->written && !i->u64s) { - /* Nothing to write: */ - btree_bounce_free(c, order, used_mempool, data); - btree_node_write_done(c, b); - return; - } + /* do we have data to write? */ + if (b->written && !i->u64s) + goto nowrite; + + bytes_to_write = vstruct_end(i) - data; + sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9; + + memset(data + bytes_to_write, 0, + (sectors_to_write << 9) - bytes_to_write); + BUG_ON(b->written + sectors_to_write > c->sb.btree_node_size); BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); @@ -1445,6 +1501,11 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, nonce = btree_nonce(b, i, b->written << 9); + /* if we're going to be encrypting, check metadata validity first: */ + if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && + validate_bset_for_write(c, b, i, sectors_to_write)) + goto err; + if (bn) { bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags, @@ -1464,15 +1525,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); } - bytes_to_write = vstruct_end(i) - data; - sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9; - - memset(data + bytes_to_write, 0, - (sectors_to_write << 9) - bytes_to_write); - - BUG_ON(b->written + sectors_to_write > c->sb.btree_node_size); - - trace_btree_write(b, bytes_to_write, sectors_to_write); + /* if we're not encrypting, check metadata after checksumming: */ + if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && + validate_bset_for_write(c, b, i, sectors_to_write)) + goto err; /* * We handle btree write errors by immediately halting the journal - @@ -1488,14 +1544,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, * break: */ if (bch2_journal_error(&c->journal) || - c->opts.nochanges) { - set_btree_node_noevict(b); - b->written += sectors_to_write; + c->opts.nochanges) + goto err; - btree_bounce_free(c, order, used_mempool, data); - btree_node_write_done(c, b); - return; - } + trace_btree_write(b, bytes_to_write, sectors_to_write); bio = bio_alloc_bioset(GFP_NOIO, 1 << order, &c->bio_write); @@ -1543,6 +1595,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, b->written += sectors_to_write; bch2_submit_wbio_replicas(wbio, c, &k.key); + return; +err: + set_btree_node_noevict(b); + b->written += sectors_to_write; +nowrite: + btree_bounce_free(c, order, used_mempool, data); + btree_node_write_done(c, b); } /* diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 3014b5f..d023dfa 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -27,7 +27,8 @@ static inline void btree_node_wait_on_io(struct btree *b) static inline bool btree_node_may_write(struct btree *b) { - return list_empty_careful(&b->write_blocked); + return list_empty_careful(&b->write_blocked) && + list_empty_careful(&b->reachable); } enum compact_mode { @@ -80,6 +81,8 @@ void bch2_btree_node_write(struct bch_fs *, struct btree *, #define bch2_btree_node_write_dirty(_c, _b, _cl, cond) \ do { \ while ((_b)->written && btree_node_dirty(_b) && (cond)) { \ + set_btree_node_need_write(_b); \ + \ if (!btree_node_may_write(_b)) \ break; \ \ diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 55303f0..0b28082 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1109,6 +1109,26 @@ void __bch2_btree_iter_init(struct btree_iter *iter, struct bch_fs *c, prefetch(c->btree_roots[btree_id].b); } +void bch2_btree_iter_unlink(struct btree_iter *iter) +{ + struct btree_iter *linked; + + __bch2_btree_iter_unlock(iter); + + if (!btree_iter_linked(iter)) + return; + + for_each_linked_btree_iter(iter, linked) { + + if (linked->next == iter) { + linked->next = iter->next; + return; + } + } + + BUG(); +} + void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new) { BUG_ON(btree_iter_linked(new)); @@ -1128,7 +1148,7 @@ void bch2_btree_iter_link(struct btree_iter *iter, struct btree_iter *new) void bch2_btree_iter_copy(struct btree_iter *dst, struct btree_iter *src) { - bch2_btree_iter_unlock(dst); + __bch2_btree_iter_unlock(dst); memcpy(dst, src, offsetof(struct btree_iter, next)); dst->nodes_locked = dst->nodes_intent_locked = 0; } diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 39731f0..7cf9bd6 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -185,6 +185,7 @@ static inline void bch2_btree_iter_init_intent(struct btree_iter *iter, } void bch2_btree_iter_link(struct btree_iter *, struct btree_iter *); +void bch2_btree_iter_unlink(struct btree_iter *); void bch2_btree_iter_copy(struct btree_iter *, struct btree_iter *); static inline struct bpos btree_type_successor(enum btree_id id, diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 915e42c..a0f5b57 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -110,6 +110,14 @@ struct btree { */ struct list_head write_blocked; + /* + * Also for asynchronous splits/interior node updates: + * If a btree node isn't reachable yet, we don't want to kick off + * another write - because that write also won't yet be reachable and + * marking it as completed before it's reachable would be incorrect: + */ + struct list_head reachable; + struct open_bucket *ob; /* lru list */ @@ -136,6 +144,7 @@ enum btree_flags { BTREE_NODE_read_error, BTREE_NODE_write_error, BTREE_NODE_dirty, + BTREE_NODE_need_write, BTREE_NODE_noevict, BTREE_NODE_write_idx, BTREE_NODE_accessed, @@ -146,6 +155,7 @@ enum btree_flags { BTREE_FLAG(read_error); BTREE_FLAG(write_error); BTREE_FLAG(dirty); +BTREE_FLAG(need_write); BTREE_FLAG(noevict); BTREE_FLAG(write_idx); BTREE_FLAG(accessed); diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c index 196b742..cfd2a45 100644 --- a/libbcachefs/btree_update.c +++ b/libbcachefs/btree_update.c @@ -162,9 +162,11 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b, trace_btree_node_free(c, b); BUG_ON(btree_node_dirty(b)); + BUG_ON(btree_node_need_write(b)); BUG_ON(b == btree_node_root(c, b)); BUG_ON(b->ob); BUG_ON(!list_empty(&b->write_blocked)); + BUG_ON(!list_empty(&b->reachable)); clear_btree_node_noevict(b); @@ -589,7 +591,6 @@ struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, unsigned nr_nodes = btree_reserve_required_nodes(depth) + extra_nodes; return __bch2_btree_reserve_get(c, nr_nodes, flags, cl); - } int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id, @@ -598,6 +599,7 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id, struct closure cl; struct btree_reserve *reserve; struct btree *b; + LIST_HEAD(reachable_list); closure_init_stack(&cl); @@ -614,11 +616,14 @@ int bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id, } b = __btree_root_alloc(c, 0, id, reserve); + list_add(&b->reachable, &reachable_list); bch2_btree_node_write(c, b, writes, SIX_LOCK_intent); bch2_btree_set_root_initial(c, b, reserve); bch2_btree_open_bucket_put(c, b); + + list_del_init(&b->reachable); six_unlock_intent(&b->lock); bch2_btree_reserve_put(c, reserve); @@ -659,6 +664,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_iter *iter, bch2_btree_bset_insert_key(iter, b, node_iter, insert); set_btree_node_dirty(b); + set_btree_node_need_write(b); } /* Inserting into a given leaf node (last stage of insert): */ @@ -798,12 +804,6 @@ void bch2_btree_journal_key(struct btree_insert *trans, u64 seq = trans->journal_res.seq; bool needs_whiteout = insert->k.needs_whiteout; - /* - * have a bug where we're seeing an extent with an invalid crc - * entry in the journal, trying to track it down: - */ - BUG_ON(bch2_bkey_invalid(c, b->btree_id, bkey_i_to_s_c(insert))); - /* ick */ insert->k.needs_whiteout = false; bch2_journal_add_keys(j, &trans->journal_res, @@ -878,6 +878,8 @@ bch2_btree_interior_update_alloc(struct bch_fs *c) closure_init(&as->cl, &c->cl); as->c = c; as->mode = BTREE_INTERIOR_NO_UPDATE; + INIT_LIST_HEAD(&as->write_blocked_list); + INIT_LIST_HEAD(&as->reachable_list); bch2_keylist_init(&as->parent_keys, as->inline_keys, ARRAY_SIZE(as->inline_keys)); @@ -908,6 +910,18 @@ static void btree_interior_update_nodes_reachable(struct closure *cl) mutex_lock(&c->btree_interior_update_lock); + while (!list_empty(&as->reachable_list)) { + struct btree *b = list_first_entry(&as->reachable_list, + struct btree, reachable); + list_del_init(&b->reachable); + mutex_unlock(&c->btree_interior_update_lock); + + six_lock_read(&b->lock); + bch2_btree_node_write_dirty(c, b, NULL, btree_node_need_write(b)); + six_unlock_read(&b->lock); + mutex_lock(&c->btree_interior_update_lock); + } + for (i = 0; i < as->nr_pending; i++) bch2_btree_node_free_ondisk(c, &as->pending[i]); as->nr_pending = 0; @@ -929,6 +943,7 @@ static void btree_interior_update_nodes_written(struct closure *cl) if (bch2_journal_error(&c->journal)) { /* XXX what? */ + /* we don't want to free the nodes on disk, that's what */ } /* XXX: missing error handling, damnit */ @@ -962,7 +977,8 @@ retry: list_del(&as->write_blocked_list); mutex_unlock(&c->btree_interior_update_lock); - bch2_btree_node_write_dirty(c, b, NULL, true); + bch2_btree_node_write_dirty(c, b, NULL, + btree_node_need_write(b)); six_unlock_read(&b->lock); break; @@ -1135,6 +1151,7 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c, } clear_btree_node_dirty(b); + clear_btree_node_need_write(b); w = btree_current_write(b); llist_for_each_entry_safe(cl, cl_n, llist_del_all(&w->wait.list), list) @@ -1152,6 +1169,8 @@ void bch2_btree_interior_update_will_free_node(struct bch_fs *c, &as->journal, interior_update_flush); bch2_journal_pin_drop(&c->journal, &w->journal); + if (!list_empty(&b->reachable)) + list_del_init(&b->reachable); mutex_unlock(&c->btree_interior_update_lock); } @@ -1265,7 +1284,8 @@ bch2_btree_insert_keys_interior(struct btree *b, * node) */ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n1, - struct btree_reserve *reserve) + struct btree_reserve *reserve, + struct btree_interior_update *as) { size_t nr_packed = 0, nr_unpacked = 0; struct btree *n2; @@ -1273,6 +1293,8 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n struct bkey_packed *k, *prev = NULL; n2 = bch2_btree_node_alloc(iter->c, n1->level, iter->btree_id, reserve); + list_add(&n2->reachable, &as->reachable_list); + n2->data->max_key = n1->data->max_key; n2->data->format = n1->format; n2->key.k.p = n1->key.k.p; @@ -1421,13 +1443,15 @@ static void btree_split(struct btree *b, struct btree_iter *iter, bch2_btree_interior_update_will_free_node(c, as, b); n1 = bch2_btree_node_alloc_replacement(c, b, reserve); + list_add(&n1->reachable, &as->reachable_list); + if (b->level) btree_split_insert_keys(iter, n1, insert_keys, reserve); if (vstruct_blocks(n1->data, c->block_bits) > BTREE_SPLIT_THRESHOLD(c)) { trace_btree_node_split(c, b, b->nr.live_u64s); - n2 = __btree_split_node(iter, n1, reserve); + n2 = __btree_split_node(iter, n1, reserve, as); bch2_btree_build_aux_trees(n2); bch2_btree_build_aux_trees(n1); @@ -1449,6 +1473,8 @@ static void btree_split(struct btree *b, struct btree_iter *iter, n3 = __btree_root_alloc(c, b->level + 1, iter->btree_id, reserve); + list_add(&n3->reachable, &as->reachable_list); + n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; @@ -1748,6 +1774,8 @@ retry: bch2_btree_interior_update_will_free_node(c, as, m); n = bch2_btree_node_alloc(c, b->level, b->btree_id, reserve); + list_add(&n->reachable, &as->reachable_list); + n->data->min_key = prev->data->min_key; n->data->max_key = next->data->max_key; n->data->format = new_f; @@ -1914,8 +1942,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans) int ret; trans_for_each_entry(trans, i) { - EBUG_ON(i->iter->level); - EBUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); + BUG_ON(i->iter->level); + BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); } sort(trans->entries, trans->nr, sizeof(trans->entries[0]), @@ -2076,6 +2104,19 @@ err: goto out; } +int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags) +{ + struct bkey_i k; + + bkey_init(&k.k); + k.k.p = iter->pos; + + return bch2_btree_insert_at(iter->c, NULL, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_USE_RESERVE|flags, + BTREE_INSERT_ENTRY(iter, &k)); +} + int bch2_btree_insert_list_at(struct btree_iter *iter, struct keylist *keys, struct disk_reservation *disk_res, @@ -2104,45 +2145,6 @@ int bch2_btree_insert_list_at(struct btree_iter *iter, return 0; } -/** - * bch_btree_insert_check_key - insert dummy key into btree - * - * We insert a random key on a cache miss, then compare exchange on it - * once the cache promotion or backing device read completes. This - * ensures that if this key is written to after the read, the read will - * lose and not overwrite the key with stale data. - * - * Return values: - * -EAGAIN: @iter->cl was put on a waitlist waiting for btree node allocation - * -EINTR: btree node was changed while upgrading to write lock - */ -int bch2_btree_insert_check_key(struct btree_iter *iter, - struct bkey_i *check_key) -{ - struct bpos saved_pos = iter->pos; - struct bkey_i_cookie *cookie; - BKEY_PADDED(key) tmp; - int ret; - - BUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&check_key->k))); - - check_key->k.type = KEY_TYPE_COOKIE; - set_bkey_val_bytes(&check_key->k, sizeof(struct bch_cookie)); - - cookie = bkey_i_to_cookie(check_key); - get_random_bytes(&cookie->v, sizeof(cookie->v)); - - bkey_copy(&tmp.key, check_key); - - ret = bch2_btree_insert_at(iter->c, NULL, NULL, NULL, - BTREE_INSERT_ATOMIC, - BTREE_INSERT_ENTRY(iter, &tmp.key)); - - bch2_btree_iter_rewind(iter, saved_pos); - - return ret; -} - /** * bch_btree_insert - insert keys into the extent btree * @c: pointer to struct bch_fs @@ -2310,6 +2312,7 @@ int bch2_btree_node_rewrite(struct btree_iter *iter, struct btree *b, bch2_btree_interior_update_will_free_node(c, as, b); n = bch2_btree_node_alloc_replacement(c, b, reserve); + list_add(&n->reachable, &as->reachable_list); bch2_btree_build_aux_trees(n); six_unlock_write(&n->lock); diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index b18c44c..a933d5a 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -64,7 +64,7 @@ struct pending_btree_node_free { */ struct btree_interior_update { struct closure cl; - struct bch_fs *c; + struct bch_fs *c; struct list_head list; @@ -86,6 +86,7 @@ struct btree_interior_update { */ struct btree *b; struct list_head write_blocked_list; + struct list_head reachable_list; /* * BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now @@ -317,7 +318,6 @@ struct btree_insert { int __bch2_btree_insert_at(struct btree_insert *); - #define _TENTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...) N #define COUNT_ARGS(...) _TENTH_ARG(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1) @@ -380,6 +380,8 @@ int __bch2_btree_insert_at(struct btree_insert *); */ #define BTREE_INSERT_JOURNAL_REPLAY (1 << 3) +int bch2_btree_delete_at(struct btree_iter *, unsigned); + int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *, struct disk_reservation *, struct extent_insert_hook *, u64 *, unsigned); @@ -403,7 +405,6 @@ static inline bool journal_res_insert_fits(struct btree_insert *trans, return u64s <= trans->journal_res.u64s; } -int bch2_btree_insert_check_key(struct btree_iter *, struct bkey_i *); int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct disk_reservation *, struct extent_insert_hook *, u64 *, int flags); diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 248bc7a..bf160e0 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -15,7 +15,7 @@ #include "debug.h" #include "error.h" #include "extents.h" -#include "fs-gc.h" +#include "fsck.h" #include "inode.h" #include "io.h" #include "super.h" diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index 503f0dc..e2978ba 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -20,6 +20,11 @@ unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) return len; } +static unsigned dirent_val_u64s(unsigned len) +{ + return DIV_ROUND_UP(sizeof(struct bch_dirent) + len, sizeof(u64)); +} + static u64 bch2_dirent_hash(const struct bch_hash_info *info, const struct qstr *name) { @@ -64,7 +69,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len); } -static const struct bch_hash_desc dirent_hash_desc = { +const struct bch_hash_desc bch2_dirent_hash_desc = { .btree_id = BTREE_ID_DIRENTS, .key_type = BCH_DIRENT, .whiteout_type = BCH_DIRENT_WHITEOUT, @@ -77,12 +82,30 @@ static const struct bch_hash_desc dirent_hash_desc = { static const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k) { + struct bkey_s_c_dirent d; + unsigned len; + switch (k.k->type) { case BCH_DIRENT: - return bkey_val_bytes(k.k) < sizeof(struct bch_dirent) - ? "value too small" - : NULL; + if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) + return "value too small"; + + d = bkey_s_c_to_dirent(k); + len = bch2_dirent_name_bytes(d); + + if (!len) + return "empty name"; + + if (bkey_val_u64s(k.k) > dirent_val_u64s(len)) + return "value too big"; + + if (len > NAME_MAX) + return "dirent name too big"; + if (memchr(d.v->d_name, '/', len)) + return "dirent name has invalid characters"; + + return NULL; case BCH_DIRENT_WHITEOUT: return bkey_val_bytes(k.k) != 0 ? "value size should be zero" @@ -97,21 +120,15 @@ static void bch2_dirent_to_text(struct bch_fs *c, char *buf, size_t size, struct bkey_s_c k) { struct bkey_s_c_dirent d; + size_t n = 0; switch (k.k->type) { case BCH_DIRENT: d = bkey_s_c_to_dirent(k); - if (size) { - unsigned n = min_t(unsigned, size, - bch2_dirent_name_bytes(d)); - memcpy(buf, d.v->d_name, n); - buf[size - 1] = '\0'; - buf += n; - size -= n; - } - - scnprintf(buf, size, " -> %llu", d.v->d_inum); + n += bch_scnmemcpy(buf + n, size - n, d.v->d_name, + bch2_dirent_name_bytes(d)); + n += scnprintf(buf + n, size - n, " -> %llu", d.v->d_inum); break; case BCH_DIRENT_WHITEOUT: scnprintf(buf, size, "whiteout"); @@ -128,9 +145,7 @@ static struct bkey_i_dirent *dirent_create_key(u8 type, const struct qstr *name, u64 dst) { struct bkey_i_dirent *dirent; - unsigned u64s = BKEY_U64s + - DIV_ROUND_UP(sizeof(struct bch_dirent) + name->len, - sizeof(u64)); + unsigned u64s = BKEY_U64s + dirent_val_u64s(name->len); dirent = kmalloc(u64s * sizeof(u64), GFP_NOFS); if (!dirent) @@ -163,7 +178,7 @@ int bch2_dirent_create(struct bch_fs *c, u64 dir_inum, if (!dirent) return -ENOMEM; - ret = bch2_hash_set(dirent_hash_desc, hash_info, c, dir_inum, + ret = bch2_hash_set(bch2_dirent_hash_desc, hash_info, c, dir_inum, journal_seq, &dirent->k_i, flags); kfree(dirent); @@ -223,13 +238,13 @@ retry: * from the original hashed position (like we do when creating dirents, * in bch_hash_set) - we never move existing dirents to different slot: */ - old_src = bch2_hash_lookup_at(dirent_hash_desc, + old_src = bch2_hash_lookup_at(bch2_dirent_hash_desc, &src_ei->str_hash, &src_iter, src_name); if ((ret = btree_iter_err(old_src))) goto err; - ret = bch2_hash_needs_whiteout(dirent_hash_desc, + ret = bch2_hash_needs_whiteout(bch2_dirent_hash_desc, &src_ei->str_hash, &whiteout_iter, &src_iter); if (ret < 0) @@ -242,8 +257,8 @@ retry: * to do that check for us for correctness: */ old_dst = mode == BCH_RENAME - ? bch2_hash_hole_at(dirent_hash_desc, &dst_iter) - : bch2_hash_lookup_at(dirent_hash_desc, + ? bch2_hash_hole_at(bch2_dirent_hash_desc, &dst_iter) + : bch2_hash_lookup_at(bch2_dirent_hash_desc, &dst_ei->str_hash, &dst_iter, dst_name); if ((ret = btree_iter_err(old_dst))) @@ -330,7 +345,7 @@ int bch2_dirent_delete(struct bch_fs *c, u64 dir_inum, const struct qstr *name, u64 *journal_seq) { - return bch2_hash_delete(dirent_hash_desc, hash_info, + return bch2_hash_delete(bch2_dirent_hash_desc, hash_info, c, dir_inum, journal_seq, name); } @@ -342,7 +357,7 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, struct bkey_s_c k; u64 inum; - k = bch2_hash_lookup(dirent_hash_desc, hash_info, c, + k = bch2_hash_lookup(bch2_dirent_hash_desc, hash_info, c, dir_inum, &iter, name); if (IS_ERR(k.k)) { bch2_btree_iter_unlock(&iter); diff --git a/libbcachefs/dirent.h b/libbcachefs/dirent.h index b1a30bd..fb2950a 100644 --- a/libbcachefs/dirent.h +++ b/libbcachefs/dirent.h @@ -1,6 +1,9 @@ #ifndef _BCACHE_DIRENT_H #define _BCACHE_DIRENT_H +#include "str_hash.h" + +extern const struct bch_hash_desc bch2_dirent_hash_desc; extern const struct bkey_ops bch2_bkey_dirent_ops; struct qstr; diff --git a/libbcachefs/error.c b/libbcachefs/error.c index 8babf19..5b7316d 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -49,3 +49,102 @@ void bch2_nonfatal_io_error(struct bch_dev *ca) { queue_work(system_long_wq, &ca->io_error_work); } + +#ifdef __KERNEL__ +#define ask_yn() false +#else +#include "tools-util.h" +#endif + +enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags, + const char *fmt, ...) +{ + struct fsck_err_state *s; + va_list args; + bool fix = false, print = true, suppressing = false; + char _buf[sizeof(s->buf)], *buf = _buf; + + mutex_lock(&c->fsck_error_lock); + + if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) + goto print; + + list_for_each_entry(s, &c->fsck_errors, list) + if (s->fmt == fmt) + goto found; + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) { + if (!c->fsck_alloc_err) + bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); + c->fsck_alloc_err = true; + buf = _buf; + goto print; + } + + INIT_LIST_HEAD(&s->list); + s->fmt = fmt; +found: + list_move(&s->list, &c->fsck_errors); + s->nr++; + suppressing = s->nr == 10; + print = s->nr <= 10; + buf = s->buf; +print: + va_start(args, fmt); + vscnprintf(buf, sizeof(_buf), fmt, args); + va_end(args); + + if (flags & FSCK_CAN_FIX) { + if (c->opts.fix_errors == FSCK_ERR_ASK) { + printk(KERN_ERR "%s: fix?", buf); + fix = ask_yn(); + } else if (c->opts.fix_errors == FSCK_ERR_YES || + (c->opts.nochanges && + !(flags & FSCK_CAN_IGNORE))) { + if (print) + bch_err(c, "%s, fixing", buf); + fix = true; + } else { + if (print) + bch_err(c, "%s, not fixing", buf); + fix = false; + } + } else if (flags & FSCK_NEED_FSCK) { + if (print) + bch_err(c, "%s (run fsck to correct)", buf); + } else { + if (print) + bch_err(c, "%s (repair unimplemented)", buf); + } + + if (suppressing) + bch_err(c, "Ratelimiting new instances of previous error"); + + mutex_unlock(&c->fsck_error_lock); + + if (fix) + set_bit(BCH_FS_FSCK_FIXED_ERRORS, &c->flags); + + return fix ? FSCK_ERR_FIX + : flags & FSCK_CAN_IGNORE ? FSCK_ERR_IGNORE + : FSCK_ERR_EXIT; +} + +void bch2_flush_fsck_errs(struct bch_fs *c) +{ + struct fsck_err_state *s, *n; + + mutex_lock(&c->fsck_error_lock); + set_bit(BCH_FS_FSCK_DONE, &c->flags); + + list_for_each_entry_safe(s, n, &c->fsck_errors, list) { + if (s->nr > 10) + bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf); + + list_del(&s->list); + kfree(s); + } + + mutex_unlock(&c->fsck_error_lock); +} diff --git a/libbcachefs/error.h b/libbcachefs/error.h index 5f81c34..750c676 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -95,62 +95,38 @@ enum { BCH_FSCK_UNKNOWN_VERSION = 4, }; -/* These macros return true if error should be fixed: */ - -/* XXX: mark in superblock that filesystem contains errors, if we ignore: */ - enum fsck_err_opts { FSCK_ERR_NO, FSCK_ERR_YES, FSCK_ERR_ASK, }; -#ifdef __KERNEL__ -#define __fsck_err_should_fix(c, msg, ...) \ -({ \ - bool _fix = (c)->opts.fix_errors; \ - bch_err(c, msg ", %sfixing", ##__VA_ARGS__, _fix ? "" : "not ");\ - _fix; \ -}) -#else -#include "tools-util.h" +enum fsck_err_ret { + FSCK_ERR_IGNORE = 0, + FSCK_ERR_FIX = 1, + FSCK_ERR_EXIT = 2, +}; -#define __fsck_err_should_fix(c, msg, ...) \ -({ \ - bool _fix = false; \ - switch ((c)->opts.fix_errors) { \ - case FSCK_ERR_ASK: \ - printf(msg ": fix?", ##__VA_ARGS__); \ - _fix = ask_yn(); \ - break; \ - case FSCK_ERR_YES: \ - bch_err(c, msg ", fixing", ##__VA_ARGS__); \ - _fix = true; \ - break; \ - case FSCK_ERR_NO: \ - bch_err(c, msg, ##__VA_ARGS__); \ - _fix = false; \ - break; \ - } \ - _fix; \ -}) -#endif +struct fsck_err_state { + struct list_head list; + const char *fmt; + u64 nr; + char buf[512]; +}; + +#define FSCK_CAN_FIX (1 << 0) +#define FSCK_CAN_IGNORE (1 << 1) +#define FSCK_NEED_FSCK (1 << 2) -#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \ +enum fsck_err_ret bch2_fsck_err(struct bch_fs *, + unsigned, const char *, ...); +void bch2_flush_fsck_errs(struct bch_fs *); + +#define __fsck_err(c, _flags, msg, ...) \ ({ \ - bool _fix; \ - \ - if (_can_fix) { \ - _fix = __fsck_err_should_fix(c, msg, ##__VA_ARGS__); \ - } else { \ - bch_err(c, msg " ("_nofix_msg")", ##__VA_ARGS__); \ - _fix = false; \ - } \ + int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\ \ - if (_fix) \ - set_bit(BCH_FS_FSCK_FIXED_ERRORS, &(c)->flags); \ - \ - if (!_fix && !_can_ignore) { \ + if (_fix == FSCK_ERR_EXIT) { \ bch_err(c, "Unable to continue, halting"); \ ret = BCH_FSCK_ERRORS_NOT_FIXED; \ goto fsck_err; \ @@ -159,24 +135,27 @@ enum fsck_err_opts { _fix; \ }) -#define __fsck_err_on(cond, c, _can_fix, _can_ignore, _nofix_msg, ...) \ - ((cond) ? __fsck_err(c, _can_fix, _can_ignore, \ - _nofix_msg, ##__VA_ARGS__) : false) +/* These macros return true if error should be fixed: */ + +/* XXX: mark in superblock that filesystem contains errors, if we ignore: */ + +#define __fsck_err_on(cond, c, _flags, ...) \ + ((cond) ? __fsck_err(c, _flags, ##__VA_ARGS__) : false) #define unfixable_fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, false, true, "repair unimplemented", ##__VA_ARGS__) + __fsck_err_on(cond, c, FSCK_CAN_IGNORE, ##__VA_ARGS__) #define need_fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, false, true, "run fsck to correct", ##__VA_ARGS__) + __fsck_err_on(cond, c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, ##__VA_ARGS__) #define mustfix_fsck_err(c, ...) \ - __fsck_err(c, true, false, "not fixing", ##__VA_ARGS__) + __fsck_err(c, FSCK_CAN_FIX, ##__VA_ARGS__) #define mustfix_fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, true, false, "not fixing", ##__VA_ARGS__) + __fsck_err_on(cond, c, FSCK_CAN_FIX, ##__VA_ARGS__) #define fsck_err_on(cond, c, ...) \ - __fsck_err_on(cond, c, true, true, "not fixing", ##__VA_ARGS__) + __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, ##__VA_ARGS__) /* * Fatal errors: these don't indicate a bug, but we can't continue running in RW diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 8ad192c..dc5c7f4 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -5,8 +5,8 @@ #include "clock.h" #include "error.h" #include "fs.h" -#include "fs-gc.h" #include "fs-io.h" +#include "fsck.h" #include "inode.h" #include "journal.h" #include "io.h" diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 94c5a9e..3c02b0c 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -7,8 +7,8 @@ #include "dirent.h" #include "extents.h" #include "fs.h" -#include "fs-gc.h" #include "fs-io.h" +#include "fsck.h" #include "inode.h" #include "journal.h" #include "keylist.h" diff --git a/libbcachefs/fs-gc.c b/libbcachefs/fsck.c similarity index 75% rename from libbcachefs/fs-gc.c rename to libbcachefs/fsck.c index 03370c0..3fe0387 100644 --- a/libbcachefs/fs-gc.c +++ b/libbcachefs/fsck.c @@ -4,10 +4,11 @@ #include "dirent.h" #include "error.h" #include "fs.h" -#include "fs-gc.h" +#include "fsck.h" #include "inode.h" #include "keylist.h" #include "super.h" +#include "xattr.h" #include /* struct qstr */ #include @@ -37,12 +38,16 @@ static int remove_dirent(struct bch_fs *c, struct btree_iter *iter, bch2_btree_iter_unlock(iter); ret = bch2_inode_find_by_inum(c, dir_inum, &dir_inode); - if (ret) + if (ret) { + bch_err(c, "remove_dirent: err %i looking up directory inode", ret); goto err; + } dir_hash_info = bch2_hash_info_init(c, &dir_inode); ret = bch2_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL); + if (ret) + bch_err(c, "remove_dirent: err %i deleting dirent", ret); err: kfree(buf); return ret; @@ -108,6 +113,118 @@ static int walk_inode(struct bch_fs *c, struct inode_walker *w, u64 inum) return 0; } +struct hash_check { + struct bch_hash_info info; + struct btree_iter chain; + struct btree_iter iter; + u64 next; +}; + +static void hash_check_init(const struct bch_hash_desc desc, + struct hash_check *h, struct bch_fs *c) +{ + bch2_btree_iter_init(&h->chain, c, desc.btree_id, POS_MIN); + bch2_btree_iter_init(&h->iter, c, desc.btree_id, POS_MIN); +} + +static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c, + const struct bch_inode_unpacked *bi) +{ + h->info = bch2_hash_info_init(c, bi); + h->next = -1; +} + +static int hash_redo_key(const struct bch_hash_desc desc, + struct hash_check *h, struct bch_fs *c, + struct btree_iter *k_iter, struct bkey_s_c k, + u64 hashed) +{ + struct bkey_i *tmp; + int ret = 0; + + tmp = kmalloc(bkey_bytes(k.k), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + bkey_reassemble(tmp, k); + + ret = bch2_btree_delete_at(k_iter, 0); + if (ret) + goto err; + + bch2_btree_iter_unlock(k_iter); + + bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL, + tmp, BCH_HASH_SET_MUST_CREATE); +err: + kfree(tmp); + return ret; +} + +static int hash_check_key(const struct bch_hash_desc desc, + struct hash_check *h, struct bch_fs *c, + struct btree_iter *k_iter, struct bkey_s_c k) +{ + char buf[200]; + u64 hashed; + int ret = 0; + + if (k.k->type != desc.whiteout_type && + k.k->type != desc.key_type) + return 0; + + if (k.k->p.offset != h->next) { + if (!btree_iter_linked(&h->chain)) { + bch2_btree_iter_link(k_iter, &h->chain); + bch2_btree_iter_link(k_iter, &h->iter); + } + bch2_btree_iter_copy(&h->chain, k_iter); + } + h->next = k.k->p.offset + 1; + + if (k.k->type != desc.key_type) + return 0; + + hashed = desc.hash_bkey(&h->info, k); + + if (fsck_err_on(hashed < h->chain.pos.offset || + hashed > k.k->p.offset, c, + "hash table key at wrong offset: %llu, " + "hashed to %llu chain starts at %llu\n%s", + k.k->p.offset, hashed, h->chain.pos.offset, + bch2_bkey_val_to_text(c, desc.btree_id, + buf, sizeof(buf), k))) { + ret = hash_redo_key(desc, h, c, k_iter, k, hashed); + if (ret) { + bch_err(c, "hash_redo_key err %i", ret); + return ret; + } + return 1; + } + + if (!bkey_cmp(h->chain.pos, k_iter->pos)) + return 0; + + bch2_btree_iter_copy(&h->iter, &h->chain); + while (bkey_cmp(h->iter.pos, k_iter->pos) < 0) { + struct bkey_s_c k2 = bch2_btree_iter_peek(&h->iter); + + if (fsck_err_on(k2.k->type == desc.key_type && + !desc.cmp_bkey(k, k2), c, + "duplicate hash table keys:\n%s", + bch2_bkey_val_to_text(c, desc.btree_id, + buf, sizeof(buf), k))) { + ret = bch2_hash_delete_at(desc, &h->info, &h->iter, NULL); + if (ret) + return ret; + return 1; + } + bch2_btree_iter_advance_pos(&h->iter); + } +fsck_err: + return ret; +} + /* * Walk extents: verify that extents have a corresponding S_ISREG inode, and * that i_size an i_sectors are consistent @@ -130,14 +247,18 @@ static int check_extents(struct bch_fs *c) if (ret) break; - unfixable_fsck_err_on(!w.have_inode, c, + if (fsck_err_on(!w.have_inode, c, "extent type %u for missing inode %llu", - k.k->type, k.k->p.inode); - - unfixable_fsck_err_on(w.have_inode && + k.k->type, k.k->p.inode) || + fsck_err_on(w.have_inode && !S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c, "extent type %u for non regular file, inode %llu mode %o", - k.k->type, k.k->p.inode, w.inode.i_mode); + k.k->type, k.k->p.inode, w.inode.i_mode)) { + ret = bch2_btree_delete_at(&iter, 0); + if (ret) + goto err; + continue; + } unfixable_fsck_err_on(w.first_this_inode && w.have_inode && @@ -154,6 +275,7 @@ static int check_extents(struct bch_fs *c) "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size); } +err: fsck_err: return bch2_btree_iter_unlock(&iter) ?: ret; } @@ -166,10 +288,15 @@ noinline_for_stack static int check_dirents(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); + struct hash_check h; struct btree_iter iter; struct bkey_s_c k; + unsigned name_len; + char buf[200]; int ret = 0; + hash_check_init(bch2_dirent_hash_desc, &h, c); + for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(BCACHE_ROOT_INO, 0), k) { struct bkey_s_c_dirent d; @@ -181,13 +308,32 @@ static int check_dirents(struct bch_fs *c) if (ret) break; - unfixable_fsck_err_on(!w.have_inode, c, - "dirent in nonexisting directory %llu", - k.k->p.inode); + if (fsck_err_on(!w.have_inode, c, + "dirent in nonexisting directory:\n%s", + bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, + buf, sizeof(buf), k)) || + fsck_err_on(!S_ISDIR(w.inode.i_mode), c, + "dirent in non directory inode type %u:\n%s", + mode_to_type(w.inode.i_mode), + bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, + buf, sizeof(buf), k))) { + ret = bch2_btree_delete_at(&iter, 0); + if (ret) + goto err; + continue; + } + + if (w.first_this_inode && w.have_inode) + hash_check_set_inode(&h, c, &w.inode); + + ret = hash_check_key(bch2_dirent_hash_desc, &h, c, &iter, k); + if (ret > 0) { + ret = 0; + continue; + } - unfixable_fsck_err_on(!S_ISDIR(w.inode.i_mode), c, - "dirent in non directory inode %llu, type %u", - k.k->p.inode, mode_to_type(w.inode.i_mode)); + if (ret) + goto fsck_err; if (k.k->type != BCH_DIRENT) continue; @@ -195,8 +341,25 @@ static int check_dirents(struct bch_fs *c) d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); + name_len = bch2_dirent_name_bytes(d); + + if (fsck_err_on(!name_len, c, "empty dirent") || + fsck_err_on(name_len == 1 && + !memcmp(d.v->d_name, ".", 1), c, + ". dirent") || + fsck_err_on(name_len == 2 && + !memcmp(d.v->d_name, "..", 2), c, + ".. dirent")) { + ret = remove_dirent(c, &iter, d); + if (ret) + goto err; + continue; + } + if (fsck_err_on(d_inum == d.k->p.inode, c, - "dirent points to own directory")) { + "dirent points to own directory:\n%s", + bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, + buf, sizeof(buf), k))) { ret = remove_dirent(c, &iter, d); if (ret) goto err; @@ -211,8 +374,9 @@ static int check_dirents(struct bch_fs *c) ret = 0; if (fsck_err_on(!have_target, c, - "dirent points to missing inode %llu, type %u filename %s", - d_inum, d.v->d_type, d.v->d_name)) { + "dirent points to missing inode:\n%s", + bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, + buf, sizeof(buf), k))) { ret = remove_dirent(c, &iter, d); if (ret) goto err; @@ -222,10 +386,10 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(have_target && d.v->d_type != mode_to_type(le16_to_cpu(target.i_mode)), c, - "incorrect d_type: got %u should be %u, filename %s", - d.v->d_type, + "incorrect d_type: should be %u:\n%s", mode_to_type(le16_to_cpu(target.i_mode)), - d.v->d_name)) { + bch2_bkey_val_to_text(c, BTREE_ID_DIRENTS, + buf, sizeof(buf), k))) { struct bkey_i_dirent *n; n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); @@ -248,6 +412,8 @@ static int check_dirents(struct bch_fs *c) } err: fsck_err: + bch2_btree_iter_unlock(&h.chain); + bch2_btree_iter_unlock(&h.iter); return bch2_btree_iter_unlock(&iter) ?: ret; } @@ -258,21 +424,39 @@ noinline_for_stack static int check_xattrs(struct bch_fs *c) { struct inode_walker w = inode_walker_init(); + struct hash_check h; struct btree_iter iter; struct bkey_s_c k; int ret = 0; + hash_check_init(bch2_xattr_hash_desc, &h, c); + for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(BCACHE_ROOT_INO, 0), k) { ret = walk_inode(c, &w, k.k->p.inode); if (ret) break; - unfixable_fsck_err_on(!w.have_inode, c, - "xattr for missing inode %llu", - k.k->p.inode); + if (fsck_err_on(!w.have_inode, c, + "xattr for missing inode %llu", + k.k->p.inode)) { + ret = bch2_btree_delete_at(&iter, 0); + if (ret) + goto err; + continue; + } + + if (w.first_this_inode && w.have_inode) + hash_check_set_inode(&h, c, &w.inode); + + ret = hash_check_key(bch2_xattr_hash_desc, &h, c, &iter, k); + if (ret) + goto fsck_err; } +err: fsck_err: + bch2_btree_iter_unlock(&h.chain); + bch2_btree_iter_unlock(&h.iter); return bch2_btree_iter_unlock(&iter) ?: ret; } @@ -445,6 +629,8 @@ static int check_directory_structure(struct bch_fs *c, /* DFS: */ restart_dfs: + had_unreachable = false; + ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO); if (ret) goto err; @@ -478,7 +664,8 @@ next: d_inum = le64_to_cpu(dirent.v->d_inum); if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, - "directory with multiple hardlinks")) { + "directory %llu has multiple hardlinks", + d_inum)) { ret = remove_dirent(c, &iter, dirent); if (ret) goto err; @@ -503,8 +690,6 @@ up: path.nr--; } - had_unreachable = false; - for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) { if (k.k->type != BCH_INODE_FS || !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode))) @@ -640,7 +825,7 @@ static int bch2_gc_do_inode(struct bch_fs *c, ret = bch2_inode_unpack(inode, &u); if (bch2_fs_inconsistent_on(ret, c, - "error unpacking inode %llu in fs-gc", + "error unpacking inode %llu in fsck", inode.k->p.inode)) return ret; @@ -894,36 +1079,59 @@ int bch2_fsck(struct bch_fs *c, bool full_fsck) struct bch_inode_unpacked root_inode, lostfound_inode; int ret; - ret = check_root(c, &root_inode); - if (ret) - return ret; + if (full_fsck) { + bch_verbose(c, "checking extents"); + ret = check_extents(c); + if (ret) + return ret; - ret = check_lostfound(c, &root_inode, &lostfound_inode); - if (ret) - return ret; + bch_verbose(c, "checking dirents"); + ret = check_dirents(c); + if (ret) + return ret; - if (!full_fsck) - goto check_nlinks; + bch_verbose(c, "checking xattrs"); + ret = check_xattrs(c); + if (ret) + return ret; - ret = check_extents(c); - if (ret) - return ret; + bch_verbose(c, "checking root directory"); + ret = check_root(c, &root_inode); + if (ret) + return ret; - ret = check_dirents(c); - if (ret) - return ret; + bch_verbose(c, "checking lost+found"); + ret = check_lostfound(c, &root_inode, &lostfound_inode); + if (ret) + return ret; - ret = check_xattrs(c); - if (ret) - return ret; + bch_verbose(c, "checking directory structure"); + ret = check_directory_structure(c, &lostfound_inode); + if (ret) + return ret; - ret = check_directory_structure(c, &lostfound_inode); - if (ret) - return ret; -check_nlinks: - ret = check_inode_nlinks(c, &lostfound_inode); - if (ret) - return ret; + bch_verbose(c, "checking inode nlinks"); + ret = check_inode_nlinks(c, &lostfound_inode); + if (ret) + return ret; + } else { + bch_verbose(c, "checking root directory"); + ret = check_root(c, &root_inode); + if (ret) + return ret; + + bch_verbose(c, "checking lost+found"); + ret = check_lostfound(c, &root_inode, &lostfound_inode); + if (ret) + return ret; + + bch_verbose(c, "checking inode nlinks"); + ret = check_inode_nlinks(c, &lostfound_inode); + if (ret) + return ret; + } + + bch2_flush_fsck_errs(c); return 0; } diff --git a/libbcachefs/fs-gc.h b/libbcachefs/fsck.h similarity index 100% rename from libbcachefs/fs-gc.h rename to libbcachefs/fsck.h diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 7a8467c..5b56a62 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -25,14 +25,12 @@ static const u8 bits_table[8] = { 13 * 8 - 8, }; -static int inode_encode_field(u8 *out, u8 *end, const u64 in[2]) +static int inode_encode_field(u8 *out, u8 *end, u64 hi, u64 lo) { - unsigned bytes, bits, shift; - - if (likely(!in[1])) - bits = fls64(in[0]); - else - bits = fls64(in[1]) + 64; + __be64 in[2] = { cpu_to_be64(hi), cpu_to_be64(lo), }; + unsigned shift, bytes, bits = likely(!hi) + ? fls64(lo) + : fls64(hi) + 64; for (shift = 1; shift <= 8; shift++) if (bits < bits_table[shift - 1]) @@ -44,17 +42,7 @@ got_shift: BUG_ON(out + bytes > end); - if (likely(bytes <= 8)) { - u64 b = cpu_to_be64(in[0]); - - memcpy(out, (void *) &b + 8 - bytes, bytes); - } else { - u64 b = cpu_to_be64(in[1]); - - memcpy(out, (void *) &b + 16 - bytes, bytes); - put_unaligned_be64(in[0], out + bytes - 8); - } - + memcpy(out, (u8 *) in + 16 - bytes, bytes); *out |= (1 << 8) >> shift; return bytes; @@ -63,7 +51,9 @@ got_shift: static int inode_decode_field(const u8 *in, const u8 *end, u64 out[2], unsigned *out_bits) { - unsigned bytes, bits, shift; + __be64 be[2] = { 0, 0 }; + unsigned bytes, shift; + u8 *p; if (in >= end) return -1; @@ -77,29 +67,18 @@ static int inode_decode_field(const u8 *in, const u8 *end, */ shift = 8 - __fls(*in); /* 1 <= shift <= 8 */ bytes = byte_table[shift - 1]; - bits = bytes * 8 - shift; if (in + bytes > end) return -1; - /* - * we're assuming it's safe to deref up to 7 bytes < in; this will work - * because keys always start quite a bit more than 7 bytes after the - * start of the btree node header: - */ - if (likely(bytes <= 8)) { - out[0] = get_unaligned_be64(in + bytes - 8); - out[0] <<= 64 - bits; - out[0] >>= 64 - bits; - out[1] = 0; - } else { - out[0] = get_unaligned_be64(in + bytes - 8); - out[1] = get_unaligned_be64(in + bytes - 16); - out[1] <<= 128 - bits; - out[1] >>= 128 - bits; - } + p = (u8 *) be + 16 - bytes; + memcpy(p, in, bytes); + *p ^= (1 << 8) >> shift; + + out[0] = be64_to_cpu(be[0]); + out[1] = be64_to_cpu(be[1]); + *out_bits = out[0] ? 64 + fls64(out[0]) : fls64(out[1]); - *out_bits = out[1] ? 64 + fls64(out[1]) : fls64(out[0]); return bytes; } @@ -109,7 +88,6 @@ void bch2_inode_pack(struct bkey_inode_buf *packed, u8 *out = packed->inode.v.fields; u8 *end = (void *) &packed[1]; u8 *last_nonzero_field = out; - u64 field[2]; unsigned nr_fields = 0, last_nonzero_fieldnr = 0; bkey_inode_init(&packed->inode.k_i); @@ -119,12 +97,10 @@ void bch2_inode_pack(struct bkey_inode_buf *packed, packed->inode.v.i_mode = cpu_to_le16(inode->i_mode); #define BCH_INODE_FIELD(_name, _bits) \ - field[0] = inode->_name; \ - field[1] = 0; \ - out += inode_encode_field(out, end, field); \ + out += inode_encode_field(out, end, 0, inode->_name); \ nr_fields++; \ \ - if (field[0] | field[1]) { \ + if (inode->_name) { \ last_nonzero_field = out; \ last_nonzero_fieldnr = nr_fields; \ } @@ -187,7 +163,7 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, if (field_bits > sizeof(unpacked->_name) * 8) \ return -1; \ \ - unpacked->_name = field[0]; \ + unpacked->_name = field[1]; \ in += ret; BCH_INODE_FIELDS() @@ -449,3 +425,32 @@ int bch2_cached_dev_inode_find_by_uuid(struct bch_fs *c, uuid_le *uuid, bch2_btree_iter_unlock(&iter); return -ENOENT; } + +#ifdef CONFIG_BCACHEFS_DEBUG +void bch2_inode_pack_test(void) +{ + struct bch_inode_unpacked *u, test_inodes[] = { + { + .i_atime = U64_MAX, + .i_ctime = U64_MAX, + .i_mtime = U64_MAX, + .i_otime = U64_MAX, + .i_size = U64_MAX, + .i_sectors = U64_MAX, + .i_uid = U32_MAX, + .i_gid = U32_MAX, + .i_nlink = U32_MAX, + .i_generation = U32_MAX, + .i_dev = U32_MAX, + }, + }; + + for (u = test_inodes; + u < test_inodes + ARRAY_SIZE(test_inodes); + u++) { + struct bkey_inode_buf p; + + bch2_inode_pack(&p, u); + } +} +#endif diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index d1d64a7..06e2ffd 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -54,4 +54,10 @@ static inline u64 timespec_to_bch2_time(struct bch_fs *c, struct timespec ts) return div_s64(ns, c->sb.time_precision); } +#ifdef CONFIG_BCACHEFS_DEBUG +void bch2_inode_pack_test(void); +#else +static inline void bch2_inode_pack_test(void) {} +#endif + #endif diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 039dd04..0f27eaf 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -910,8 +910,8 @@ static int bio_checksum_uncompress(struct bch_fs *c, bch2_encrypt_bio(c, rbio->crc.csum_type, nonce, src); - bio_copy_data_iter(dst, dst_iter, - src, src->bi_iter); + bio_copy_data_iter(dst, &dst_iter, + src, &src->bi_iter); } else { bch2_encrypt_bio(c, rbio->crc.csum_type, nonce, src); } diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index f6203f1..ca96330 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -527,62 +527,34 @@ fsck_err: #define JOURNAL_ENTRY_NONE 6 #define JOURNAL_ENTRY_BAD 7 -static int journal_entry_validate(struct bch_fs *c, - struct jset *j, u64 sector, - unsigned bucket_sectors_left, - unsigned sectors_read) +#define journal_entry_err(c, msg, ...) \ +({ \ + if (write == READ) { \ + mustfix_fsck_err(c, msg, ##__VA_ARGS__); \ + } else { \ + bch_err(c, "detected corrupt metadata before write:\n" \ + msg, ##__VA_ARGS__); \ + ret = BCH_FSCK_ERRORS_NOT_FIXED; \ + goto fsck_err; \ + } \ + true; \ +}) + +#define journal_entry_err_on(cond, c, msg, ...) \ + ((cond) ? journal_entry_err(c, msg, ##__VA_ARGS__) : false) + +static int __journal_entry_validate(struct bch_fs *c, struct jset *j, + int write) { struct jset_entry *entry; - size_t bytes = vstruct_bytes(j); - struct bch_csum csum; int ret = 0; - if (le64_to_cpu(j->magic) != jset_magic(c)) - return JOURNAL_ENTRY_NONE; - - if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) { - bch_err(c, "unknown journal entry version %u", - le32_to_cpu(j->version)); - return BCH_FSCK_UNKNOWN_VERSION; - } - - if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9, c, - "journal entry too big (%zu bytes), sector %lluu", - bytes, sector)) { - /* XXX: note we might have missing journal entries */ - return JOURNAL_ENTRY_BAD; - } - - if (bytes > sectors_read << 9) - return JOURNAL_ENTRY_REREAD; - - if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c, - "journal entry with unknown csum type %llu sector %lluu", - JSET_CSUM_TYPE(j), sector)) - return JOURNAL_ENTRY_BAD; - - csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j); - if (mustfix_fsck_err_on(bch2_crc_cmp(csum, j->csum), c, - "journal checksum bad, sector %llu", sector)) { - /* XXX: retry IO, when we start retrying checksum errors */ - /* XXX: note we might have missing journal entries */ - return JOURNAL_ENTRY_BAD; - } - - bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), - j->encrypted_start, - vstruct_end(j) - (void *) j->encrypted_start); - - if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c, - "invalid journal entry: last_seq > seq")) - j->last_seq = j->seq; - vstruct_for_each(j, entry) { struct bkey_i *k; - if (mustfix_fsck_err_on(vstruct_next(entry) > - vstruct_last(j), c, - "journal entry extents past end of jset")) { + if (journal_entry_err_on(vstruct_next(entry) > + vstruct_last(j), c, + "journal entry extends past end of jset")) { j->u64s = cpu_to_le64((u64 *) entry - j->_data); break; } @@ -602,7 +574,7 @@ static int journal_entry_validate(struct bch_fs *c, case JOURNAL_ENTRY_BTREE_ROOT: k = entry->start; - if (mustfix_fsck_err_on(!entry->u64s || + if (journal_entry_err_on(!entry->u64s || le16_to_cpu(entry->u64s) != k->k.u64s, c, "invalid btree root journal entry: wrong number of keys")) { journal_entry_null_range(entry, @@ -620,7 +592,7 @@ static int journal_entry_validate(struct bch_fs *c, break; case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED: - if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c, + if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, c, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); @@ -628,7 +600,7 @@ static int journal_entry_validate(struct bch_fs *c, break; default: - mustfix_fsck_err(c, "invalid journal entry type %llu", + journal_entry_err(c, "invalid journal entry type %llu", JOURNAL_ENTRY_TYPE(entry)); journal_entry_null_range(entry, vstruct_next(entry)); break; @@ -639,6 +611,61 @@ fsck_err: return ret; } +static int journal_entry_validate(struct bch_fs *c, + struct jset *j, u64 sector, + unsigned bucket_sectors_left, + unsigned sectors_read, + int write) +{ + size_t bytes = vstruct_bytes(j); + struct bch_csum csum; + int ret = 0; + + if (le64_to_cpu(j->magic) != jset_magic(c)) + return JOURNAL_ENTRY_NONE; + + if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) { + bch_err(c, "unknown journal entry version %u", + le32_to_cpu(j->version)); + return BCH_FSCK_UNKNOWN_VERSION; + } + + if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c, + "journal entry too big (%zu bytes), sector %lluu", + bytes, sector)) { + /* XXX: note we might have missing journal entries */ + return JOURNAL_ENTRY_BAD; + } + + if (bytes > sectors_read << 9) + return JOURNAL_ENTRY_REREAD; + + if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c, + "journal entry with unknown csum type %llu sector %lluu", + JSET_CSUM_TYPE(j), sector)) + return JOURNAL_ENTRY_BAD; + + csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j); + if (journal_entry_err_on(bch2_crc_cmp(csum, j->csum), c, + "journal checksum bad, sector %llu", sector)) { + /* XXX: retry IO, when we start retrying checksum errors */ + /* XXX: note we might have missing journal entries */ + return JOURNAL_ENTRY_BAD; + } + + bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), + j->encrypted_start, + vstruct_end(j) - (void *) j->encrypted_start); + + if (journal_entry_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c, + "invalid journal entry: last_seq > seq")) + j->last_seq = j->seq; + + return __journal_entry_validate(c, j, write); +fsck_err: + return ret; +} + struct journal_read_buf { void *data; size_t size; @@ -705,7 +732,8 @@ reread: sectors_read = min_t(unsigned, } ret = journal_entry_validate(c, j, offset, - end - offset, sectors_read); + end - offset, sectors_read, + READ); switch (ret) { case BCH_FSCK_OK: break; @@ -2274,6 +2302,10 @@ static void journal_write(struct closure *cl) SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN); SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c)); + if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) && + __journal_entry_validate(c, jset, WRITE)) + goto err; + bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset->encrypted_start, vstruct_end(jset) - (void *) jset->encrypted_start); @@ -2281,6 +2313,10 @@ static void journal_write(struct closure *cl) jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); + if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) && + __journal_entry_validate(c, jset, WRITE)) + goto err; + sectors = vstruct_sectors(jset, c->block_bits); BUG_ON(sectors > j->prev_buf_sectors); @@ -2349,6 +2385,9 @@ no_io: ptr->offset += sectors; closure_return_with_destructor(cl, journal_write_done); +err: + bch2_fatal_error(c); + closure_return_with_destructor(cl, journal_write_done); } static void journal_write_work(struct work_struct *work) diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index 6eac6fc..8b31c7d 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -2,7 +2,9 @@ #define _BCACHE_STR_HASH_H #include "btree_iter.h" +#include "btree_update.h" #include "checksum.h" +#include "error.h" #include "inode.h" #include "siphash.h" #include "super.h" @@ -341,6 +343,36 @@ err: return ret; } +static inline int bch2_hash_delete_at(const struct bch_hash_desc desc, + const struct bch_hash_info *info, + struct btree_iter *iter, + u64 *journal_seq) +{ + struct btree_iter whiteout_iter; + struct bkey_i delete; + int ret = -ENOENT; + + bch2_btree_iter_init(&whiteout_iter, iter->c, desc.btree_id, + iter->pos); + bch2_btree_iter_link(iter, &whiteout_iter); + + ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, iter); + if (ret < 0) + goto err; + + bkey_init(&delete.k); + delete.k.p = iter->pos; + delete.k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED; + + ret = bch2_btree_insert_at(iter->c, NULL, NULL, journal_seq, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_ATOMIC, + BTREE_INSERT_ENTRY(iter, &delete)); +err: + bch2_btree_iter_unlink(&whiteout_iter); + return ret; +} + static inline int bch2_hash_delete(const struct bch_hash_desc desc, const struct bch_hash_info *info, struct bch_fs *c, u64 inode, @@ -348,7 +380,6 @@ static inline int bch2_hash_delete(const struct bch_hash_desc desc, { struct btree_iter iter, whiteout_iter; struct bkey_s_c k; - struct bkey_i delete; int ret = -ENOENT; bch2_btree_iter_init_intent(&iter, c, desc.btree_id, @@ -361,18 +392,7 @@ retry: if ((ret = btree_iter_err(k))) goto err; - ret = bch2_hash_needs_whiteout(desc, info, &whiteout_iter, &iter); - if (ret < 0) - goto err; - - bkey_init(&delete.k); - delete.k.p = k.k->p; - delete.k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED; - - ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq, - BTREE_INSERT_NOFAIL| - BTREE_INSERT_ATOMIC, - BTREE_INSERT_ENTRY(&iter, &delete)); + ret = bch2_hash_delete_at(desc, info, &iter, journal_seq); err: if (ret == -EINTR) goto retry; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index f5ee2de..7a98136 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -19,7 +19,7 @@ #include "debug.h" #include "error.h" #include "fs.h" -#include "fs-gc.h" +#include "fsck.h" #include "inode.h" #include "io.h" #include "journal.h" @@ -513,6 +513,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) INIT_WORK(&c->read_retry_work, bch2_read_retry_work); mutex_init(&c->zlib_workspace_lock); + INIT_LIST_HEAD(&c->fsck_errors); + mutex_init(&c->fsck_error_lock); + seqcount_init(&c->gc_pos_lock); c->prio_clock[READ].hand = 1; @@ -875,12 +878,12 @@ err: switch (ret) { case BCH_FSCK_ERRORS_NOT_FIXED: bch_err(c, "filesystem contains errors: please report this to the developers"); - pr_cont("mount with -o fix_errors to repair"); + pr_cont("mount with -o fix_errors to repair\n"); err = "fsck error"; break; case BCH_FSCK_REPAIR_UNIMPLEMENTED: bch_err(c, "filesystem contains errors: please report this to the developers"); - pr_cont("repair unimplemented: inform the developers so that it can be added"); + pr_cont("repair unimplemented: inform the developers so that it can be added\n"); err = "fsck error"; break; case BCH_FSCK_REPAIR_IMPOSSIBLE: @@ -979,8 +982,8 @@ static void bch2_dev_free(struct bch_dev *ca) kvpfree(ca->disk_buckets, bucket_bytes(ca)); kfree(ca->prio_buckets); kfree(ca->bio_prio); - vfree(ca->buckets); - vfree(ca->oldest_gens); + kvpfree(ca->buckets, ca->mi.nbuckets * sizeof(struct bucket)); + kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8)); free_heap(&ca->heap); free_fifo(&ca->free_inc); @@ -1140,10 +1143,12 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) !init_fifo(&ca->free[RESERVE_NONE], reserve_none, GFP_KERNEL) || !init_fifo(&ca->free_inc, free_inc_reserve, GFP_KERNEL) || !init_heap(&ca->heap, heap_size, GFP_KERNEL) || - !(ca->oldest_gens = vzalloc(sizeof(u8) * - ca->mi.nbuckets)) || - !(ca->buckets = vzalloc(sizeof(struct bucket) * - ca->mi.nbuckets)) || + !(ca->oldest_gens = kvpmalloc(ca->mi.nbuckets * + sizeof(u8), + GFP_KERNEL|__GFP_ZERO)) || + !(ca->buckets = kvpmalloc(ca->mi.nbuckets * + sizeof(struct bucket), + GFP_KERNEL|__GFP_ZERO)) || !(ca->prio_buckets = kzalloc(sizeof(u64) * prio_buckets(ca) * 2, GFP_KERNEL)) || !(ca->disk_buckets = kvpmalloc(bucket_bytes(ca), GFP_KERNEL)) || @@ -1871,6 +1876,7 @@ static void bcachefs_exit(void) static int __init bcachefs_init(void) { bch2_bkey_pack_test(); + bch2_inode_pack_test(); if (!(bcachefs_kset = kset_create_and_add("bcachefs", NULL, fs_kobj)) || bch2_chardev_init() || diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index ba04bba..c34048a 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -512,7 +512,7 @@ STORE(bch2_fs_opts_dir) { struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); const struct bch_option *opt; - enum bch_opt_id id; + int id; u64 v; id = bch2_parse_sysfs_opt(attr->name, buf, &v); diff --git a/libbcachefs/util.c b/libbcachefs/util.c index f57224a..79a2aeb 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -417,3 +417,17 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) dst += bv.bv_len; } } + +size_t bch_scnmemcpy(char *buf, size_t size, const char *src, size_t len) +{ + size_t n; + + if (!size) + return 0; + + n = min(size - 1, len); + memcpy(buf, src, n); + buf[n] = '\0'; + + return n; +} diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 5669cb8..8aa5c34 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -93,7 +93,8 @@ static inline void kvpfree(void *p, size_t size) static inline void *kvpmalloc(size_t size, gfp_t gfp_mask) { return size < PAGE_SIZE ? kmalloc(size, gfp_mask) - : (void *) __get_free_pages(gfp_mask, get_order(size)) + : (void *) __get_free_pages(gfp_mask|__GFP_NOWARN, + get_order(size)) ?: __vmalloc(size, gfp_mask, PAGE_KERNEL); } @@ -750,4 +751,6 @@ static inline struct bio_vec next_contig_bvec(struct bio *bio, #define bio_for_each_contig_segment(bv, bio, iter) \ __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter) +size_t bch_scnmemcpy(char *, size_t, const char *, size_t); + #endif /* _BCACHE_UTIL_H */ diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 488d536..4e82e42 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -11,6 +11,16 @@ #include #include +static unsigned xattr_val_u64s(unsigned name_len, unsigned val_len) +{ + return DIV_ROUND_UP(sizeof(struct bch_xattr) + + name_len + val_len, sizeof(u64)); +} + +#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len) + +static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned); + struct xattr_search_key { u8 type; struct qstr name; @@ -31,8 +41,6 @@ static u64 bch2_xattr_hash(const struct bch_hash_info *info, return bch2_str_hash_end(&ctx, info); } -#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len) - static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key) { return bch2_xattr_hash(info, key); @@ -66,7 +74,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) memcmp(l.v->x_name, r.v->x_name, r.v->x_name_len); } -static const struct bch_hash_desc xattr_hash_desc = { +const struct bch_hash_desc bch2_xattr_hash_desc = { .btree_id = BTREE_ID_XATTRS, .key_type = BCH_XATTR, .whiteout_type = BCH_XATTR_WHITEOUT, @@ -79,12 +87,33 @@ static const struct bch_hash_desc xattr_hash_desc = { static const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k) { + const struct xattr_handler *handler; + struct bkey_s_c_xattr xattr; + unsigned u64s; + switch (k.k->type) { case BCH_XATTR: - return bkey_val_bytes(k.k) < sizeof(struct bch_xattr) - ? "value too small" - : NULL; + if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr)) + return "value too small"; + xattr = bkey_s_c_to_xattr(k); + u64s = xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len)); + + if (bkey_val_u64s(k.k) < u64s) + return "value too small"; + + if (bkey_val_u64s(k.k) > u64s) + return "value too big"; + + handler = bch2_xattr_type_to_handler(xattr.v->x_type); + if (!handler) + return "invalid type"; + + if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) + return "xattr name has invalid characters"; + + return NULL; case BCH_XATTR_WHITEOUT: return bkey_val_bytes(k.k) != 0 ? "value size should be zero" @@ -98,34 +127,29 @@ static const char *bch2_xattr_invalid(const struct bch_fs *c, static void bch2_xattr_to_text(struct bch_fs *c, char *buf, size_t size, struct bkey_s_c k) { + const struct xattr_handler *handler; struct bkey_s_c_xattr xattr; - int n; + size_t n = 0; switch (k.k->type) { case BCH_XATTR: xattr = bkey_s_c_to_xattr(k); - if (size) { - n = min_t(unsigned, size, xattr.v->x_name_len); - memcpy(buf, xattr.v->x_name, n); - buf[size - 1] = '\0'; - buf += n; - size -= n; - } - - n = scnprintf(buf, size, " -> "); - buf += n; - size -= n; - - if (size) { - n = min_t(unsigned, size, - le16_to_cpu(xattr.v->x_val_len)); - memcpy(buf, xattr_val(xattr.v), n); - buf[size - 1] = '\0'; - buf += n; - size -= n; - } - + handler = bch2_xattr_type_to_handler(xattr.v->x_type); + if (handler && handler->prefix) + n += scnprintf(buf + n, size - n, "%s", handler->prefix); + else if (handler) + n += scnprintf(buf + n, size - n, "(type %u)", + xattr.v->x_type); + else + n += scnprintf(buf + n, size - n, "(unknown type %u)", + xattr.v->x_type); + + n += bch_scnmemcpy(buf + n, size - n, xattr.v->x_name, + xattr.v->x_name_len); + n += scnprintf(buf + n, size - n, ":"); + n += bch_scnmemcpy(buf + n, size - n, xattr_val(xattr.v), + le16_to_cpu(xattr.v->x_val_len)); break; case BCH_XATTR_WHITEOUT: scnprintf(buf, size, "whiteout"); @@ -147,7 +171,7 @@ int bch2_xattr_get(struct bch_fs *c, struct inode *inode, struct bkey_s_c_xattr xattr; int ret; - k = bch2_hash_lookup(xattr_hash_desc, &ei->str_hash, c, + k = bch2_hash_lookup(bch2_xattr_hash_desc, &ei->str_hash, c, ei->vfs_inode.i_ino, &iter, &X_SEARCH(type, name, strlen(name))); if (IS_ERR(k.k)) @@ -175,15 +199,13 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum, int ret; if (!value) { - ret = bch2_hash_delete(xattr_hash_desc, hash_info, + ret = bch2_hash_delete(bch2_xattr_hash_desc, hash_info, c, inum, journal_seq, &search); } else { struct bkey_i_xattr *xattr; unsigned u64s = BKEY_U64s + - DIV_ROUND_UP(sizeof(struct bch_xattr) + - search.name.len + size, - sizeof(u64)); + xattr_val_u64s(search.name.len, size); if (u64s > U8_MAX) return -ERANGE; @@ -200,7 +222,7 @@ int __bch2_xattr_set(struct bch_fs *c, u64 inum, memcpy(xattr->v.x_name, search.name.name, search.name.len); memcpy(xattr_val(&xattr->v), value, size); - ret = bch2_hash_set(xattr_hash_desc, hash_info, c, + ret = bch2_hash_set(bch2_xattr_hash_desc, hash_info, c, inum, journal_seq, &xattr->k_i, (flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)| @@ -225,8 +247,6 @@ int bch2_xattr_set(struct bch_fs *c, struct inode *inode, &ei->journal_seq); } -static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned); - static size_t bch2_xattr_emit(struct dentry *dentry, const struct bch_xattr *xattr, char *buffer, size_t buffer_size) diff --git a/libbcachefs/xattr.h b/libbcachefs/xattr.h index 14eba24..9bc5376 100644 --- a/libbcachefs/xattr.h +++ b/libbcachefs/xattr.h @@ -1,6 +1,9 @@ #ifndef _BCACHE_XATTR_H #define _BCACHE_XATTR_H +#include "str_hash.h" + +extern const struct bch_hash_desc bch2_xattr_hash_desc; extern const struct bkey_ops bch2_bkey_xattr_ops; struct dentry; diff --git a/linux/bio.c b/linux/bio.c index 966f227..8fb10ce 100644 --- a/linux/bio.c +++ b/linux/bio.c @@ -21,32 +21,16 @@ #include #include -void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter, - struct bio *src, struct bvec_iter src_iter) +void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter) { struct bio_vec src_bv, dst_bv; void *src_p, *dst_p; unsigned bytes; - while (1) { - if (!src_iter.bi_size) { - src = src->bi_next; - if (!src) - break; - - src_iter = src->bi_iter; - } - - if (!dst_iter.bi_size) { - dst = dst->bi_next; - if (!dst) - break; - - dst_iter = dst->bi_iter; - } - - src_bv = bio_iter_iovec(src, src_iter); - dst_bv = bio_iter_iovec(dst, dst_iter); + while (src_iter->bi_size && dst_iter->bi_size) { + src_bv = bio_iter_iovec(src, *src_iter); + dst_bv = bio_iter_iovec(dst, *dst_iter); bytes = min(src_bv.bv_len, dst_bv.bv_len); @@ -60,15 +44,27 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter, kunmap_atomic(dst_p); kunmap_atomic(src_p); - bio_advance_iter(src, &src_iter, bytes); - bio_advance_iter(dst, &dst_iter, bytes); + flush_dcache_page(dst_bv.bv_page); + + bio_advance_iter(src, src_iter, bytes); + bio_advance_iter(dst, dst_iter, bytes); } } +/** + * bio_copy_data - copy contents of data buffers from one bio to another + * @src: source bio + * @dst: destination bio + * + * Stops when it reaches the end of either @src or @dst - that is, copies + * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios). + */ void bio_copy_data(struct bio *dst, struct bio *src) { - bio_copy_data_iter(dst, dst->bi_iter, - src, src->bi_iter); + struct bvec_iter src_iter = src->bi_iter; + struct bvec_iter dst_iter = dst->bi_iter; + + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); } void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start) -- 2.39.5