From 799439a88ab7afe99e5052894c20ea77133a1551 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 5 Jan 2024 12:38:14 -0500 Subject: [PATCH] Update bcachefs sources to d267e10a43b2 bcachefs: __bch2_sb_field_to_text() Signed-off-by: Kent Overstreet --- .bcachefs_revision | 2 +- libbcachefs/alloc_background.c | 225 ++++-- libbcachefs/alloc_background.h | 16 +- libbcachefs/backpointers.c | 3 +- libbcachefs/bcachefs.h | 26 +- libbcachefs/bcachefs_format.h | 84 +-- libbcachefs/bkey_methods.h | 80 +- libbcachefs/btree_cache.c | 6 - libbcachefs/btree_gc.c | 15 +- libbcachefs/btree_io.c | 4 +- libbcachefs/btree_iter.c | 35 +- libbcachefs/btree_locking.c | 19 +- libbcachefs/btree_trans_commit.c | 33 +- libbcachefs/btree_update_interior.c | 19 +- libbcachefs/buckets.c | 1044 +++++++------------------- libbcachefs/buckets.h | 37 +- libbcachefs/chardev.c | 236 +----- libbcachefs/darray.h | 2 +- libbcachefs/debug.c | 6 +- libbcachefs/disk_groups.c | 2 +- libbcachefs/ec.c | 301 ++++++++ libbcachefs/ec.h | 5 +- libbcachefs/error.c | 93 ++- libbcachefs/extents.h | 12 +- libbcachefs/fs-common.c | 36 +- libbcachefs/fs-io.c | 10 + libbcachefs/fsck.c | 16 +- libbcachefs/inode.c | 72 +- libbcachefs/inode.h | 15 +- libbcachefs/opts.h | 12 +- libbcachefs/printbuf.c | 2 +- libbcachefs/recovery.c | 40 +- libbcachefs/reflink.c | 175 ++--- libbcachefs/reflink.h | 26 +- libbcachefs/sb-downgrade.c | 111 ++- libbcachefs/sb-downgrade.h | 3 +- libbcachefs/sb-errors_types.h | 3 +- libbcachefs/sb-members.c | 2 +- libbcachefs/snapshot.c | 31 +- libbcachefs/snapshot.h | 4 +- libbcachefs/super-io.c | 54 +- libbcachefs/super-io.h | 8 +- libbcachefs/super.c | 70 +- libbcachefs/thread_with_file.c | 299 ++++++++ libbcachefs/thread_with_file.h | 41 + libbcachefs/thread_with_file_types.h | 16 + libbcachefs/trace.h | 26 +- libbcachefs/util.c | 8 +- libbcachefs/util.h | 13 +- 49 files changed, 1762 insertions(+), 1636 deletions(-) create mode 100644 libbcachefs/thread_with_file.c create mode 100644 libbcachefs/thread_with_file.h create mode 100644 libbcachefs/thread_with_file_types.h diff --git a/.bcachefs_revision b/.bcachefs_revision index 595b9ef..bb0353e 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -2a6125decb436ddc5e022c2428f64cf68dc974de +d267e10a43b2e9ab37da6c9c991ca021142f6324 diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 1a127b0..a09b9d0 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -749,95 +749,177 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans, return ret; } -int bch2_trans_mark_alloc(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, - unsigned flags) +int bch2_trigger_alloc(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { struct bch_fs *c = trans->c; - struct bch_alloc_v4 old_a_convert, *new_a; - const struct bch_alloc_v4 *old_a; - u64 old_lru, new_lru; int ret = 0; - /* - * Deletion only happens in the device removal path, with - * BTREE_TRIGGER_NORUN: - */ - BUG_ON(new->k.type != KEY_TYPE_alloc_v4); + if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans, + "alloc key for invalid device or bucket")) + return -EIO; - old_a = bch2_alloc_to_v4(old, &old_a_convert); - new_a = &bkey_i_to_alloc_v4(new)->v; + struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode); - new_a->data_type = alloc_data_type(*new_a, new_a->data_type); + struct bch_alloc_v4 old_a_convert; + const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) { - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); - SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); - SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); - } + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; - if (data_type_is_empty(new_a->data_type) && - BCH_ALLOC_V4_NEED_INC_GEN(new_a) && - !bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) { - new_a->gen++; - SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); - } + new_a->data_type = alloc_data_type(*new_a, new_a->data_type); - if (old_a->data_type != new_a->data_type || - (new_a->data_type == BCH_DATA_free && - alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { - ret = bch2_bucket_do_index(trans, old, old_a, false) ?: - bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true); - if (ret) - return ret; - } + if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) { + new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); + new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); + SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); + SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); + } - if (new_a->data_type == BCH_DATA_cached && - !new_a->io_time[READ]) - new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); + if (data_type_is_empty(new_a->data_type) && + BCH_ALLOC_V4_NEED_INC_GEN(new_a) && + !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) { + new_a->gen++; + SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); + } - old_lru = alloc_lru_idx_read(*old_a); - new_lru = alloc_lru_idx_read(*new_a); + if (old_a->data_type != new_a->data_type || + (new_a->data_type == BCH_DATA_free && + alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { + ret = bch2_bucket_do_index(trans, old, old_a, false) ?: + bch2_bucket_do_index(trans, new.s_c, new_a, true); + if (ret) + return ret; + } - if (old_lru != new_lru) { - ret = bch2_lru_change(trans, new->k.p.inode, - bucket_to_u64(new->k.p), - old_lru, new_lru); - if (ret) - return ret; - } + if (new_a->data_type == BCH_DATA_cached && + !new_a->io_time[READ]) + new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, - bch_dev_bkey_exists(c, new->k.p.inode)); + u64 old_lru = alloc_lru_idx_read(*old_a); + u64 new_lru = alloc_lru_idx_read(*new_a); + if (old_lru != new_lru) { + ret = bch2_lru_change(trans, new.k->p.inode, + bucket_to_u64(new.k->p), + old_lru, new_lru); + if (ret) + return ret; + } - if (old_a->fragmentation_lru != new_a->fragmentation_lru) { - ret = bch2_lru_change(trans, - BCH_LRU_FRAGMENTATION_START, - bucket_to_u64(new->k.p), - old_a->fragmentation_lru, new_a->fragmentation_lru); - if (ret) - return ret; + new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, + bch_dev_bkey_exists(c, new.k->p.inode)); + if (old_a->fragmentation_lru != new_a->fragmentation_lru) { + ret = bch2_lru_change(trans, + BCH_LRU_FRAGMENTATION_START, + bucket_to_u64(new.k->p), + old_a->fragmentation_lru, new_a->fragmentation_lru); + if (ret) + return ret; + } + + if (old_a->gen != new_a->gen) { + ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); + if (ret) + return ret; + } + + /* + * need to know if we're getting called from the invalidate path or + * not: + */ + + if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && + old_a->cached_sectors) { + ret = bch2_update_cached_sectors_list(trans, new.k->p.inode, + -((s64) old_a->cached_sectors)); + if (ret) + return ret; + } } - if (old_a->gen != new_a->gen) { - ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen); - if (ret) - return ret; + if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) { + struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + u64 journal_seq = trans->journal_res.seq; + u64 bucket_journal_seq = new_a->journal_seq; + + if ((flags & BTREE_TRIGGER_INSERT) && + data_type_is_empty(old_a->data_type) != + data_type_is_empty(new_a->data_type) && + new.k->type == KEY_TYPE_alloc_v4) { + struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v; + + /* + * If the btree updates referring to a bucket weren't flushed + * before the bucket became empty again, then the we don't have + * to wait on a journal flush before we can reuse the bucket: + */ + v->journal_seq = bucket_journal_seq = + data_type_is_empty(new_a->data_type) && + (journal_seq == v->journal_seq || + bch2_journal_noflush_seq(&c->journal, v->journal_seq)) + ? 0 : journal_seq; + } + + if (!data_type_is_empty(old_a->data_type) && + data_type_is_empty(new_a->data_type) && + bucket_journal_seq) { + ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, + c->journal.flushed_seq_ondisk, + new.k->p.inode, new.k->p.offset, + bucket_journal_seq); + if (ret) { + bch2_fs_fatal_error(c, + "error setting bucket_needs_journal_commit: %i", ret); + return ret; + } + } + + percpu_down_read(&c->mark_lock); + if (new_a->gen != old_a->gen) + *bucket_gen(ca, new.k->p.offset) = new_a->gen; + + bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false); + + if (new_a->data_type == BCH_DATA_free && + (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk)) + closure_wake_up(&c->freelist_wait); + + if (new_a->data_type == BCH_DATA_need_discard && + (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk)) + bch2_do_discards(c); + + if (old_a->data_type != BCH_DATA_cached && + new_a->data_type == BCH_DATA_cached && + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) + bch2_do_invalidates(c); + + if (new_a->data_type == BCH_DATA_need_gc_gens) + bch2_do_gc_gens(c); + percpu_up_read(&c->mark_lock); } - /* - * need to know if we're getting called from the invalidate path or - * not: - */ + if ((flags & BTREE_TRIGGER_GC) && + (flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) { + struct bch_alloc_v4 new_a_convert; + const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert); - if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && - old_a->cached_sectors) { - ret = bch2_update_cached_sectors_list(trans, new->k.p.inode, - -((s64) old_a->cached_sectors)); - if (ret) - return ret; + percpu_down_read(&c->mark_lock); + struct bucket *g = gc_bucket(ca, new.k->p.offset); + + bucket_lock(g); + + g->gen_valid = 1; + g->gen = new_a->gen; + g->data_type = new_a->data_type; + g->stripe = new_a->stripe; + g->stripe_redundancy = new_a->stripe_redundancy; + g->dirty_sectors = new_a->dirty_sectors; + g->cached_sectors = new_a->cached_sectors; + + bucket_unlock(g); + percpu_up_read(&c->mark_lock); } return 0; @@ -1150,9 +1232,6 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, unsigned i, gens_offset, gens_end_offset; int ret; - if (c->sb.version < bcachefs_metadata_version_bucket_gens) - return 0; - bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); k = bch2_btree_iter_peek_slot(bucket_gens_iter); diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 96671f1..e7f7e84 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -182,24 +182,21 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v1_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trans_mark_alloc, \ - .atomic_trigger = bch2_mark_alloc, \ + .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v2_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trans_mark_alloc, \ - .atomic_trigger = bch2_mark_alloc, \ + .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ .key_invalid = bch2_alloc_v3_invalid, \ .val_to_text = bch2_alloc_to_text, \ - .trans_trigger = bch2_trans_mark_alloc, \ - .atomic_trigger = bch2_mark_alloc, \ + .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) @@ -207,8 +204,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .key_invalid = bch2_alloc_v4_invalid, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ - .trans_trigger = bch2_trans_mark_alloc, \ - .atomic_trigger = bch2_mark_alloc, \ + .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) @@ -232,8 +228,8 @@ static inline bool bkey_is_alloc(const struct bkey *k) int bch2_alloc_read(struct bch_fs *); -int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_do_discards(struct bch_fs *); diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index a97fc2b..e358a2f 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -467,8 +467,7 @@ missing: prt_printf(&buf, "\nbp pos "); bch2_bpos_to_text(&buf, bp_iter.pos); - if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers || - c->opts.reconstruct_alloc || + if (c->opts.reconstruct_alloc || fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 840f605..dac383e 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -425,6 +425,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(btree_node_merge) \ x(btree_node_sort) \ x(btree_node_read) \ + x(btree_node_read_done) \ x(btree_interior_update_foreground) \ x(btree_interior_update_total) \ x(btree_gc) \ @@ -464,6 +465,7 @@ enum bch_time_stats { #include "replicas_types.h" #include "subvolume_types.h" #include "super_types.h" +#include "thread_with_file_types.h" /* Number of nodes btree coalesce will try to coalesce at once */ #define GC_MERGE_NODES 4U @@ -478,12 +480,6 @@ enum bch_time_stats { struct btree; -struct log_output { - spinlock_t lock; - wait_queue_head_t wait; - struct printbuf buf; -}; - enum gc_phase { GC_PHASE_NOT_RUNNING, GC_PHASE_START, @@ -607,9 +603,6 @@ struct bch_dev { }; /* - * fsck_done - kill? - * - * replace with something more general from enumated fsck passes/errors: * initial_gc_unfixed * error * topology error @@ -625,7 +618,7 @@ struct bch_dev { x(going_ro) \ x(write_disable_complete) \ x(clean_shutdown) \ - x(fsck_done) \ + x(fsck_running) \ x(initial_gc_unfixed) \ x(need_another_gc) \ x(need_delete_dead_snapshots) \ @@ -739,8 +732,8 @@ struct bch_fs { struct super_block *vfs_sb; dev_t dev; char name[40]; - struct log_output *output; - struct task_struct *output_filter; + struct stdio_redirect *stdio; + struct task_struct *stdio_filter; /* ro/rw, add/remove/resize devices: */ struct rw_semaphore state_lock; @@ -1252,6 +1245,15 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev) return dev < c->sb.nr_devices && c->devs[dev]; } +static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c) +{ + struct stdio_redirect *stdio = c->stdio; + + if (c->stdio_filter && c->stdio_filter != current) + stdio = NULL; + return stdio; +} + #define BKEY_PADDED_ONSTACK(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index e7a2d25..0d5ac41 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1672,73 +1672,41 @@ struct bch_sb_field_downgrade { #define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10))) #define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0) -#define RECOVERY_PASS_ALL_FSCK (1ULL << 63) - /* * field 1: version name * field 2: BCH_VERSION(major, minor) * field 3: recovery passess required on upgrade */ #define BCH_METADATA_VERSIONS() \ - x(bkey_renumber, BCH_VERSION(0, 10), \ - RECOVERY_PASS_ALL_FSCK) \ - x(inode_btree_change, BCH_VERSION(0, 11), \ - RECOVERY_PASS_ALL_FSCK) \ - x(snapshot, BCH_VERSION(0, 12), \ - RECOVERY_PASS_ALL_FSCK) \ - x(inode_backpointers, BCH_VERSION(0, 13), \ - RECOVERY_PASS_ALL_FSCK) \ - x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \ - RECOVERY_PASS_ALL_FSCK) \ - x(snapshot_2, BCH_VERSION(0, 15), \ - BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \ - BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \ - RECOVERY_PASS_ALL_FSCK) \ - x(reflink_p_fix, BCH_VERSION(0, 16), \ - BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \ - x(subvol_dirent, BCH_VERSION(0, 17), \ - RECOVERY_PASS_ALL_FSCK) \ - x(inode_v2, BCH_VERSION(0, 18), \ - RECOVERY_PASS_ALL_FSCK) \ - x(freespace, BCH_VERSION(0, 19), \ - RECOVERY_PASS_ALL_FSCK) \ - x(alloc_v4, BCH_VERSION(0, 20), \ - RECOVERY_PASS_ALL_FSCK) \ - x(new_data_types, BCH_VERSION(0, 21), \ - RECOVERY_PASS_ALL_FSCK) \ - x(backpointers, BCH_VERSION(0, 22), \ - RECOVERY_PASS_ALL_FSCK) \ - x(inode_v3, BCH_VERSION(0, 23), \ - RECOVERY_PASS_ALL_FSCK) \ - x(unwritten_extents, BCH_VERSION(0, 24), \ - RECOVERY_PASS_ALL_FSCK) \ - x(bucket_gens, BCH_VERSION(0, 25), \ - BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ - RECOVERY_PASS_ALL_FSCK) \ - x(lru_v2, BCH_VERSION(0, 26), \ - RECOVERY_PASS_ALL_FSCK) \ - x(fragmentation_lru, BCH_VERSION(0, 27), \ - RECOVERY_PASS_ALL_FSCK) \ - x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \ - RECOVERY_PASS_ALL_FSCK) \ - x(snapshot_trees, BCH_VERSION(0, 29), \ - RECOVERY_PASS_ALL_FSCK) \ - x(major_minor, BCH_VERSION(1, 0), \ - 0) \ - x(snapshot_skiplists, BCH_VERSION(1, 1), \ - BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \ - x(deleted_inodes, BCH_VERSION(1, 2), \ - BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \ - x(rebalance_work, BCH_VERSION(1, 3), \ - BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \ - x(member_seq, BCH_VERSION(1, 4), \ - 0) \ - x(disk_accounting_v2, BCH_VERSION(1, 5), \ - BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info)) + x(bkey_renumber, BCH_VERSION(0, 10)) \ + x(inode_btree_change, BCH_VERSION(0, 11)) \ + x(snapshot, BCH_VERSION(0, 12)) \ + x(inode_backpointers, BCH_VERSION(0, 13)) \ + x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \ + x(snapshot_2, BCH_VERSION(0, 15)) \ + x(reflink_p_fix, BCH_VERSION(0, 16)) \ + x(subvol_dirent, BCH_VERSION(0, 17)) \ + x(inode_v2, BCH_VERSION(0, 18)) \ + x(freespace, BCH_VERSION(0, 19)) \ + x(alloc_v4, BCH_VERSION(0, 20)) \ + x(new_data_types, BCH_VERSION(0, 21)) \ + x(backpointers, BCH_VERSION(0, 22)) \ + x(inode_v3, BCH_VERSION(0, 23)) \ + x(unwritten_extents, BCH_VERSION(0, 24)) \ + x(bucket_gens, BCH_VERSION(0, 25)) \ + x(lru_v2, BCH_VERSION(0, 26)) \ + x(fragmentation_lru, BCH_VERSION(0, 27)) \ + x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \ + x(snapshot_trees, BCH_VERSION(0, 29)) \ + x(major_minor, BCH_VERSION(1, 0)) \ + x(snapshot_skiplists, BCH_VERSION(1, 1)) \ + x(deleted_inodes, BCH_VERSION(1, 2)) \ + x(rebalance_work, BCH_VERSION(1, 3)) \ + x(member_seq, BCH_VERSION(1, 4)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, -#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n, +#define x(t, n) bcachefs_metadata_version_##t = n, BCH_METADATA_VERSIONS() #undef x bcachefs_metadata_version_max diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h index 912adad..ee82283 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/bkey_methods.h @@ -28,10 +28,8 @@ struct bkey_ops { void (*swab)(struct bkey_s); bool (*key_normalize)(struct bch_fs *, struct bkey_s); bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c); - int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); - int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + int (*trigger)(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); void (*compat)(enum btree_id id, unsigned version, unsigned big_endian, int write, struct bkey_s); @@ -78,82 +76,86 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); -static inline int bch2_mark_key(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type); - - return ops->atomic_trigger - ? ops->atomic_trigger(trans, btree, level, old, new, flags) - : 0; -} - enum btree_update_flags { __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END, __BTREE_UPDATE_NOJOURNAL, __BTREE_UPDATE_KEY_CACHE_RECLAIM, - __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ - + __BTREE_TRIGGER_NORUN, + __BTREE_TRIGGER_TRANSACTIONAL, __BTREE_TRIGGER_INSERT, __BTREE_TRIGGER_OVERWRITE, - __BTREE_TRIGGER_GC, __BTREE_TRIGGER_BUCKET_INVALIDATE, - __BTREE_TRIGGER_NOATOMIC, }; #define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) #define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL) #define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM) +/* Don't run triggers at all */ #define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN) +/* + * If set, we're running transactional triggers as part of a transaction commit: + * triggers may generate new updates + * + * If cleared, and either BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE are set, + * we're running atomic triggers during a transaction commit: we have our + * journal reservation, we're holding btree node write locks, and we know the + * transaction is going to commit (returning an error here is a fatal error, + * causing us to go emergency read-only) + */ +#define BTREE_TRIGGER_TRANSACTIONAL (1U << __BTREE_TRIGGER_TRANSACTIONAL) + +/* @new is entering the btree */ #define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT) + +/* @old is leaving the btree */ #define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE) +/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */ #define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC) + +/* signal from bucket invalidate path to alloc trigger */ #define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE) -#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC) -static inline int bch2_trans_mark_key(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, - unsigned flags) +static inline int bch2_key_trigger(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { - const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new->k.type); + const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type); - return ops->trans_trigger - ? ops->trans_trigger(trans, btree_id, level, old, new, flags) + return ops->trigger + ? ops->trigger(trans, btree, level, old, new, flags) : 0; } -static inline int bch2_trans_mark_old(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, unsigned flags) +static inline int bch2_key_trigger_old(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, unsigned flags) { struct bkey_i deleted; bkey_init(&deleted.k); deleted.k.p = old.k->p; - return bch2_trans_mark_key(trans, btree_id, level, old, &deleted, - BTREE_TRIGGER_OVERWRITE|flags); + return bch2_key_trigger(trans, btree_id, level, old, bkey_i_to_s(&deleted), + BTREE_TRIGGER_OVERWRITE|flags); } -static inline int bch2_trans_mark_new(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_i *new, unsigned flags) +static inline int bch2_key_trigger_new(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s new, unsigned flags) { struct bkey_i deleted; bkey_init(&deleted.k); - deleted.k.p = new->k.p; + deleted.k.p = new.k->p; - return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new, - BTREE_TRIGGER_INSERT|flags); + return bch2_key_trigger(trans, btree_id, level, bkey_i_to_s_c(&deleted), new, + BTREE_TRIGGER_INSERT|flags); } void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 9574c8c..8e2488a 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -719,12 +719,6 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, if (IS_ERR(b)) return b; - /* - * Btree nodes read in from disk should not have the accessed bit set - * initially, so that linear scans don't thrash the cache: - */ - clear_btree_node_accessed(b); - bkey_copy(&b->key, k); if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { /* raced with another fill: */ diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 9f27cb3..49b4ade 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -41,6 +41,14 @@ #define DROP_THIS_NODE 10 #define DROP_PREV_NODE 11 +static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k) +{ + return (struct bkey_s) {{{ + (struct bkey *) k.k, + (struct bch_val *) k.v + }}}; +} + static bool should_restart_for_topology_repair(struct bch_fs *c) { return c->opts.fix_errors != FSCK_FIX_no && @@ -805,9 +813,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, struct bch_fs *c = trans->c; struct bkey deleted = KEY(0, 0, 0); struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; - unsigned flags = - BTREE_TRIGGER_GC| - (initial ? BTREE_TRIGGER_NOATOMIC : 0); int ret = 0; deleted.p = k->k->p; @@ -829,7 +834,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, } ret = commit_do(trans, NULL, NULL, 0, - bch2_mark_key(trans, btree_id, level, old, *k, flags)); + bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC)); fsck_err: err: bch_err_fn(c, ret); @@ -1589,7 +1594,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, if (!r->refcount) new->k.type = KEY_TYPE_deleted; else - *bkey_refcount(new) = cpu_to_le64(r->refcount); + *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); } fsck_err: printbuf_exit(&buf); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 38d27ca..378579b 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -942,6 +942,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, unsigned ptr_written = btree_ptr_sectors_written(&b->key); struct printbuf buf = PRINTBUF; int ret = 0, retry_read = 0, write = READ; + u64 start_time = local_clock(); b->version_ondisk = U16_MAX; /* We might get called multiple times on read retry: */ @@ -1209,6 +1210,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, out: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); return retry_read; fsck_err: if (ret == -BCH_ERR_btree_node_read_err_want_retry || @@ -1645,7 +1647,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, if (sync) { submit_bio_wait(bio); - + bch2_latency_acct(ca, rb->start_time, READ); btree_node_read_work(&rb->work); } else { submit_bio(bio); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 7e5c797..6e8e9ba 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -897,7 +897,8 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, bch2_bkey_buf_reassemble(out, c, k); - if (flags & BTREE_ITER_PREFETCH) + if ((flags & BTREE_ITER_PREFETCH) && + c->opts.btree_node_prefetch) ret = btree_path_prefetch_j(trans, path, &jiter); bch2_btree_and_journal_iter_exit(&jiter); @@ -929,7 +930,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans, bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_btree_node_iter_peek(&l->iter, l->b)); - if (flags & BTREE_ITER_PREFETCH) { + if ((flags & BTREE_ITER_PREFETCH) && + c->opts.btree_node_prefetch) { ret = btree_path_prefetch(trans, path); if (ret) goto err; @@ -2816,11 +2818,34 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) return p; } +#include "sb-members.h" + static inline void check_srcu_held_too_long(struct btree_trans *trans) { - WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10), - "btree trans held srcu lock (delaying memory reclaim) for %lu seconds", - (jiffies - trans->srcu_lock_time) / HZ); + if (trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10)) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "btree node read time:\n"); + bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read]); + + prt_str(&buf, "btree node read_done time:\n"); + bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read_done]); + + for_each_member_device(trans->c, ca) { + prt_printf(&buf, "device %u read time:\n", ca->dev_idx); + bch2_time_stats_to_text(&buf, &ca->io_latency[READ]); + } + + struct btree_transaction_stats *s = btree_trans_stats(trans); + prt_str(&buf, "transaction duration:\n"); + bch2_time_stats_to_text(&buf, &s->duration); + + WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10), + "btree trans held srcu lock (delaying memory reclaim) for %lu seconds", + (jiffies - trans->srcu_lock_time) / HZ); + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); + } } void bch2_trans_srcu_unlock(struct btree_trans *trans) diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 1ed8327..2d1c95c 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -86,8 +86,14 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g) prt_printf(out, "Found lock cycle (%u entries):", g->nr); prt_newline(out); - for (i = g->g; i < g->g + g->nr; i++) + for (i = g->g; i < g->g + g->nr; i++) { + struct task_struct *task = READ_ONCE(i->trans->locking_wait.task); + if (!task) + continue; + bch2_btree_trans_to_text(out, i->trans); + bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1); + } } static noinline void print_chain(struct printbuf *out, struct lock_graph *g) @@ -144,8 +150,7 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g) return false; } -static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans, - unsigned long ip) +static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -157,7 +162,7 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans buf.atomic++; print_cycle(&buf, g); - trace_trans_restart_would_deadlock(trans, ip, buf.buf); + trace_trans_restart_would_deadlock(trans, buf.buf); printbuf_exit(&buf); } } @@ -165,7 +170,7 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) { if (i == g->g) { - trace_would_deadlock(g, i->trans, _RET_IP_); + trace_would_deadlock(g, i->trans); return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); } else { i->trans->lock_must_abort = true; @@ -222,7 +227,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle) prt_printf(&buf, "backtrace:"); prt_newline(&buf); printbuf_indent_add(&buf, 2); - bch2_prt_task_backtrace(&buf, trans->locking_wait.task); + bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2); printbuf_indent_sub(&buf, 2); prt_newline(&buf); } @@ -291,7 +296,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) if (cycle) return -1; - trace_would_deadlock(&g, trans, _RET_IP_); + trace_would_deadlock(&g, trans); return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); } diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c index 3472882..8050555 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree_trans_commit.c @@ -451,20 +451,15 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id))) return 0; - if (old_ops->atomic_trigger == new_ops->atomic_trigger) { - ret = bch2_mark_key(trans, i->btree_id, i->level, - old, bkey_i_to_s_c(new), + if (old_ops->trigger == new_ops->trigger) { + ret = bch2_key_trigger(trans, i->btree_id, i->level, + old, bkey_i_to_s(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); } else { - struct bkey _deleted = POS_KEY((trans->paths + i->path)->pos); - struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL }; - - ret = bch2_mark_key(trans, i->btree_id, i->level, - deleted, bkey_i_to_s_c(new), - BTREE_TRIGGER_INSERT|flags) ?: - bch2_mark_key(trans, i->btree_id, i->level, - old, deleted, - BTREE_TRIGGER_OVERWRITE|flags); + ret = bch2_key_trigger_new(trans, i->btree_id, i->level, + bkey_i_to_s(new), flags) ?: + bch2_key_trigger_old(trans, i->btree_id, i->level, + old, flags); } return ret; @@ -482,6 +477,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ struct bkey_s_c old = { &old_k, i->old_v }; const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type); const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type); + unsigned flags = i->flags|BTREE_TRIGGER_TRANSACTIONAL; verify_update_old_key(trans, i); @@ -491,19 +487,18 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ if (!i->insert_trigger_run && !i->overwrite_trigger_run && - old_ops->trans_trigger == new_ops->trans_trigger) { + old_ops->trigger == new_ops->trigger) { i->overwrite_trigger_run = true; i->insert_trigger_run = true; - return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k, - BTREE_TRIGGER_INSERT| - BTREE_TRIGGER_OVERWRITE| - i->flags) ?: 1; + return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k), + BTREE_TRIGGER_INSERT| + BTREE_TRIGGER_OVERWRITE|flags) ?: 1; } else if (overwrite && !i->overwrite_trigger_run) { i->overwrite_trigger_run = true; - return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1; + return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1; } else if (!overwrite && !i->insert_trigger_run) { i->insert_trigger_run = true; - return bch2_trans_mark_new(trans, i->btree_id, i->level, i->k, i->flags) ?: 1; + return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1; } else { return 0; } diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 2a93eb9..44f9dfa 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -568,7 +568,8 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, for_each_keylist_key(&as->old_keys, k) { unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; - ret = bch2_trans_mark_old(trans, as->btree_id, level, bkey_i_to_s_c(k), 0); + ret = bch2_key_trigger_old(trans, as->btree_id, level, bkey_i_to_s_c(k), + BTREE_TRIGGER_TRANSACTIONAL); if (ret) return ret; } @@ -576,7 +577,8 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans, for_each_keylist_key(&as->new_keys, k) { unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr; - ret = bch2_trans_mark_new(trans, as->btree_id, level, k, 0); + ret = bch2_key_trigger_new(trans, as->btree_id, level, bkey_i_to_s(k), + BTREE_TRIGGER_TRANSACTIONAL); if (ret) return ret; } @@ -2156,13 +2158,12 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, int ret; if (!skip_triggers) { - ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1, - bkey_i_to_s_c(&b->key), 0); - if (ret) - return ret; - - ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1, - new_key, 0); + ret = bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s_c(&b->key), + BTREE_TRIGGER_TRANSACTIONAL) ?: + bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1, + bkey_i_to_s(new_key), + BTREE_TRIGGER_TRANSACTIONAL); if (ret) return ret; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index c0dac04..67b7e79 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -296,10 +296,10 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) } } -static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, - struct bch_alloc_v4 old, - struct bch_alloc_v4 new, - u64 journal_seq, bool gc) +void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, + const struct bch_alloc_v4 *old, + const struct bch_alloc_v4 *new, + u64 journal_seq, bool gc) { struct bch_fs_usage *fs_usage; struct bch_dev_usage *u; @@ -307,24 +307,24 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, preempt_disable(); fs_usage = fs_usage_ptr(c, journal_seq, gc); - if (data_type_is_hidden(old.data_type)) + if (data_type_is_hidden(old->data_type)) fs_usage->hidden -= ca->mi.bucket_size; - if (data_type_is_hidden(new.data_type)) + if (data_type_is_hidden(new->data_type)) fs_usage->hidden += ca->mi.bucket_size; u = dev_usage_ptr(ca, journal_seq, gc); - u->d[old.data_type].buckets--; - u->d[new.data_type].buckets++; + u->d[old->data_type].buckets--; + u->d[new->data_type].buckets++; - u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old); - u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new); + u->d[old->data_type].sectors -= bch2_bucket_sectors_dirty(*old); + u->d[new->data_type].sectors += bch2_bucket_sectors_dirty(*new); - u->d[BCH_DATA_cached].sectors += new.cached_sectors; - u->d[BCH_DATA_cached].sectors -= old.cached_sectors; + u->d[BCH_DATA_cached].sectors += new->cached_sectors; + u->d[BCH_DATA_cached].sectors -= old->cached_sectors; - u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old); - u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new); + u->d[old->data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, *old); + u->d[new->data_type].fragmented += bch2_bucket_sectors_fragmented(ca, *new); preempt_enable(); } @@ -340,13 +340,13 @@ static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b) }; } -static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, - struct bucket old, struct bucket new) +void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, + struct bucket *old, struct bucket *new) { - bch2_dev_usage_update(c, ca, - bucket_m_to_alloc(old), - bucket_m_to_alloc(new), - 0, true); + struct bch_alloc_v4 old_a = bucket_m_to_alloc(*old); + struct bch_alloc_v4 new_a = bucket_m_to_alloc(*new); + + bch2_dev_usage_update(c, ca, &old_a, &new_a, 0, true); } static inline int __update_replicas(struct bch_fs *c, @@ -364,9 +364,9 @@ static inline int __update_replicas(struct bch_fs *c, return 0; } -static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, - struct bch_replicas_entry_v1 *r, s64 sectors, - unsigned journal_seq, bool gc) +int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k, + struct bch_replicas_entry_v1 *r, s64 sectors, + unsigned journal_seq, bool gc) { struct bch_fs_usage *fs_usage; int idx, ret = 0; @@ -413,7 +413,7 @@ static inline int update_cached_sectors(struct bch_fs *c, bch2_replicas_entry_cached(&r.e, dev); - return update_replicas(c, k, &r.e, sectors, journal_seq, gc); + return bch2_update_replicas(c, k, &r.e, sectors, journal_seq, gc); } static int __replicas_deltas_realloc(struct btree_trans *trans, unsigned more, @@ -496,114 +496,6 @@ int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64 return bch2_update_replicas_list(trans, &r.e, sectors); } -int bch2_mark_alloc(struct btree_trans *trans, - enum btree_id btree, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - bool gc = flags & BTREE_TRIGGER_GC; - u64 journal_seq = trans->journal_res.seq; - u64 bucket_journal_seq; - struct bch_fs *c = trans->c; - struct bch_alloc_v4 old_a_convert, new_a_convert; - const struct bch_alloc_v4 *old_a, *new_a; - struct bch_dev *ca; - int ret = 0; - - /* - * alloc btree is read in by bch2_alloc_read, not gc: - */ - if ((flags & BTREE_TRIGGER_GC) && - !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) - return 0; - - if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans, - "alloc key for invalid device or bucket")) - return -EIO; - - ca = bch_dev_bkey_exists(c, new.k->p.inode); - - old_a = bch2_alloc_to_v4(old, &old_a_convert); - new_a = bch2_alloc_to_v4(new, &new_a_convert); - - bucket_journal_seq = new_a->journal_seq; - - if ((flags & BTREE_TRIGGER_INSERT) && - data_type_is_empty(old_a->data_type) != - data_type_is_empty(new_a->data_type) && - new.k->type == KEY_TYPE_alloc_v4) { - struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; - - EBUG_ON(!journal_seq); - - /* - * If the btree updates referring to a bucket weren't flushed - * before the bucket became empty again, then the we don't have - * to wait on a journal flush before we can reuse the bucket: - */ - v->journal_seq = bucket_journal_seq = - data_type_is_empty(new_a->data_type) && - (journal_seq == v->journal_seq || - bch2_journal_noflush_seq(&c->journal, v->journal_seq)) - ? 0 : journal_seq; - } - - if (!data_type_is_empty(old_a->data_type) && - data_type_is_empty(new_a->data_type) && - bucket_journal_seq) { - ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, - new.k->p.inode, new.k->p.offset, - bucket_journal_seq); - if (ret) { - bch2_fs_fatal_error(c, - "error setting bucket_needs_journal_commit: %i", ret); - return ret; - } - } - - percpu_down_read(&c->mark_lock); - if (!gc && new_a->gen != old_a->gen) - *bucket_gen(ca, new.k->p.offset) = new_a->gen; - - bch2_dev_usage_update(c, ca, *old_a, *new_a, journal_seq, gc); - - if (gc) { - struct bucket *g = gc_bucket(ca, new.k->p.offset); - - bucket_lock(g); - - g->gen_valid = 1; - g->gen = new_a->gen; - g->data_type = new_a->data_type; - g->stripe = new_a->stripe; - g->stripe_redundancy = new_a->stripe_redundancy; - g->dirty_sectors = new_a->dirty_sectors; - g->cached_sectors = new_a->cached_sectors; - - bucket_unlock(g); - } - percpu_up_read(&c->mark_lock); - - if (new_a->data_type == BCH_DATA_free && - (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk)) - closure_wake_up(&c->freelist_wait); - - if (new_a->data_type == BCH_DATA_need_discard && - (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk)) - bch2_do_discards(c); - - if (old_a->data_type != BCH_DATA_cached && - new_a->data_type == BCH_DATA_cached && - should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) - bch2_do_invalidates(c); - - if (new_a->data_type == BCH_DATA_need_gc_gens) - bch2_do_gc_gens(c); - - return 0; -} - int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, size_t b, enum bch_data_type data_type, unsigned sectors, struct gc_pos pos, @@ -652,17 +544,17 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, err: bucket_unlock(g); if (!ret) - bch2_dev_usage_update_m(c, ca, old, new); + bch2_dev_usage_update_m(c, ca, &old, &new); percpu_up_read(&c->mark_lock); return ret; } -static int check_bucket_ref(struct btree_trans *trans, - struct bkey_s_c k, - const struct bch_extent_ptr *ptr, - s64 sectors, enum bch_data_type ptr_data_type, - u8 b_gen, u8 bucket_data_type, - u32 bucket_sectors) +int bch2_check_bucket_ref(struct btree_trans *trans, + struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 b_gen, u8 bucket_data_type, + u32 bucket_sectors) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); @@ -761,404 +653,6 @@ err: goto out; } -static int mark_stripe_bucket(struct btree_trans *trans, - struct bkey_s_c k, - unsigned ptr_idx, - unsigned flags) -{ - struct bch_fs *c = trans->c; - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - unsigned nr_data = s->nr_blocks - s->nr_redundant; - bool parity = ptr_idx >= nr_data; - enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe; - s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; - const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket old, new, *g; - struct printbuf buf = PRINTBUF; - int ret = 0; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - /* * XXX doesn't handle deletion */ - - percpu_down_read(&c->mark_lock); - g = PTR_GC_BUCKET(ca, ptr); - - if (g->dirty_sectors || - (g->stripe && g->stripe != k.k->p.offset)) { - bch2_fs_inconsistent(c, - "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", - ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - ret = -EINVAL; - goto err; - } - - bucket_lock(g); - old = *g; - - ret = check_bucket_ref(trans, k, ptr, sectors, data_type, - g->gen, g->data_type, - g->dirty_sectors); - if (ret) - goto err; - - g->data_type = data_type; - g->dirty_sectors += sectors; - - g->stripe = k.k->p.offset; - g->stripe_redundancy = s->nr_redundant; - new = *g; -err: - bucket_unlock(g); - if (!ret) - bch2_dev_usage_update_m(c, ca, old, new); - percpu_up_read(&c->mark_lock); - printbuf_exit(&buf); - return ret; -} - -static int __mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, - const struct bch_extent_ptr *ptr, - s64 sectors, enum bch_data_type ptr_data_type, - u8 bucket_gen, u8 *bucket_data_type, - u32 *dirty_sectors, u32 *cached_sectors) -{ - u32 *dst_sectors = !ptr->cached - ? dirty_sectors - : cached_sectors; - int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, - bucket_gen, *bucket_data_type, *dst_sectors); - - if (ret) - return ret; - - *dst_sectors += sectors; - - if (!*dirty_sectors && !*cached_sectors) - *bucket_data_type = 0; - else if (*bucket_data_type != BCH_DATA_stripe) - *bucket_data_type = ptr_data_type; - - return 0; -} - -static int bch2_mark_pointer(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, - struct extent_ptr_decoded p, - s64 sectors, - unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bucket old, new, *g; - enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); - u8 bucket_data_type; - int ret = 0; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - percpu_down_read(&c->mark_lock); - g = PTR_GC_BUCKET(ca, &p.ptr); - bucket_lock(g); - old = *g; - - bucket_data_type = g->data_type; - ret = __mark_pointer(trans, k, &p.ptr, sectors, - data_type, g->gen, - &bucket_data_type, - &g->dirty_sectors, - &g->cached_sectors); - if (!ret) - g->data_type = bucket_data_type; - - new = *g; - bucket_unlock(g); - if (!ret) - bch2_dev_usage_update_m(c, ca, old, new); - percpu_up_read(&c->mark_lock); - - return ret; -} - -static int bch2_mark_stripe_ptr(struct btree_trans *trans, - struct bkey_s_c k, - struct bch_extent_stripe_ptr p, - enum bch_data_type data_type, - s64 sectors, - unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bch_replicas_padded r; - struct gc_stripe *m; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL); - if (!m) { - bch_err(c, "error allocating memory for gc_stripes, idx %llu", - (u64) p.idx); - return -BCH_ERR_ENOMEM_mark_stripe_ptr; - } - - mutex_lock(&c->ec_stripes_heap_lock); - - if (!m || !m->alive) { - mutex_unlock(&c->ec_stripes_heap_lock); - bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", - (u64) p.idx); - bch2_inconsistent_error(c); - return -EIO; - } - - m->block_sectors[p.block] += sectors; - - r = m->r; - mutex_unlock(&c->ec_stripes_heap_lock); - - r.e.data_type = data_type; - update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); - - return 0; -} - -static int __mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - u64 journal_seq = trans->journal_res.seq; - struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - struct bch_replicas_padded r; - enum bch_data_type data_type = bkey_is_btree_ptr(k.k) - ? BCH_DATA_btree - : BCH_DATA_user; - s64 sectors = bkey_is_btree_ptr(k.k) - ? btree_sectors(c) - : k.k->size; - s64 dirty_sectors = 0; - bool stale; - int ret; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - r.e.data_type = data_type; - r.e.nr_devs = 0; - r.e.nr_required = 1; - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - s64 disk_sectors = ptr_disk_sectors(sectors, p); - - if (flags & BTREE_TRIGGER_OVERWRITE) - disk_sectors = -disk_sectors; - - ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags); - if (ret < 0) - return ret; - - stale = ret > 0; - - if (p.ptr.cached) { - if (!stale) { - ret = update_cached_sectors(c, k, p.ptr.dev, - disk_sectors, journal_seq, true); - if (ret) { - bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", - __func__); - return ret; - } - } - } else if (!p.has_ec) { - dirty_sectors += disk_sectors; - r.e.devs[r.e.nr_devs++] = p.ptr.dev; - } else { - ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type, - disk_sectors, flags); - if (ret) - return ret; - - /* - * There may be other dirty pointers in this extent, but - * if so they're not required for mounting if we have an - * erasure coded pointer in this extent: - */ - r.e.nr_required = 0; - } - } - - if (r.e.nr_devs) { - ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true); - if (ret) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, k); - bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); - printbuf_exit(&buf); - return ret; - } - } - - return 0; -} - -int bch2_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags); -} - -int bch2_mark_stripe(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - bool gc = flags & BTREE_TRIGGER_GC; - u64 journal_seq = trans->journal_res.seq; - struct bch_fs *c = trans->c; - u64 idx = new.k->p.offset; - const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe - ? bkey_s_c_to_stripe(old).v : NULL; - const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe - ? bkey_s_c_to_stripe(new).v : NULL; - unsigned i; - int ret; - - BUG_ON(gc && old_s); - - if (!gc) { - struct stripe *m = genradix_ptr(&c->stripes, idx); - - if (!m) { - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; - - bch2_bkey_val_to_text(&buf1, c, old); - bch2_bkey_val_to_text(&buf2, c, new); - bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n" - "old %s\n" - "new %s", idx, buf1.buf, buf2.buf); - printbuf_exit(&buf2); - printbuf_exit(&buf1); - bch2_inconsistent_error(c); - return -1; - } - - if (!new_s) { - bch2_stripes_heap_del(c, m, idx); - - memset(m, 0, sizeof(*m)); - } else { - m->sectors = le16_to_cpu(new_s->sectors); - m->algorithm = new_s->algorithm; - m->nr_blocks = new_s->nr_blocks; - m->nr_redundant = new_s->nr_redundant; - m->blocks_nonempty = 0; - - for (i = 0; i < new_s->nr_blocks; i++) - m->blocks_nonempty += !!stripe_blockcount_get(new_s, i); - - if (!old_s) - bch2_stripes_heap_insert(c, m, idx); - else - bch2_stripes_heap_update(c, m, idx); - } - } else { - struct gc_stripe *m = - genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); - - if (!m) { - bch_err(c, "error allocating memory for gc_stripes, idx %llu", - idx); - return -BCH_ERR_ENOMEM_mark_stripe; - } - /* - * This will be wrong when we bring back runtime gc: we should - * be unmarking the old key and then marking the new key - */ - m->alive = true; - m->sectors = le16_to_cpu(new_s->sectors); - m->nr_blocks = new_s->nr_blocks; - m->nr_redundant = new_s->nr_redundant; - - for (i = 0; i < new_s->nr_blocks; i++) - m->ptrs[i] = new_s->ptrs[i]; - - bch2_bkey_to_replicas(&m->r.e, new); - - /* - * gc recalculates this field from stripe ptr - * references: - */ - memset(m->block_sectors, 0, sizeof(m->block_sectors)); - - for (i = 0; i < new_s->nr_blocks; i++) { - ret = mark_stripe_bucket(trans, new, i, flags); - if (ret) - return ret; - } - - ret = update_replicas(c, new, &m->r.e, - ((s64) m->sectors * m->nr_redundant), - journal_seq, gc); - if (ret) { - struct printbuf buf = PRINTBUF; - - bch2_bkey_val_to_text(&buf, c, new); - bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); - printbuf_exit(&buf); - return ret; - } - } - - return 0; -} - -static int __mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bch_fs_usage *fs_usage; - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - s64 sectors = (s64) k.k->size; - - BUG_ON(!(flags & BTREE_TRIGGER_GC)); - - if (flags & BTREE_TRIGGER_OVERWRITE) - sectors = -sectors; - sectors *= replicas; - - percpu_down_read(&c->mark_lock); - preempt_disable(); - - fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC); - replicas = clamp_t(unsigned, replicas, 1, - ARRAY_SIZE(fs_usage->persistent_reserved)); - - fs_usage->reserved += sectors; - fs_usage->persistent_reserved[replicas - 1] += sectors; - - preempt_enable(); - percpu_up_read(&c->mark_lock); - - return 0; -} - -int bch2_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags); -} - void bch2_trans_fs_usage_revert(struct btree_trans *trans, struct replicas_delta_list *deltas) { @@ -1278,92 +772,184 @@ need_mark: return -1; } -/* trans_mark: */ +/* KEY_TYPE_extent: */ + +static int __mark_pointer(struct btree_trans *trans, + struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 *bucket_data_type, + u32 *dirty_sectors, u32 *cached_sectors) +{ + u32 *dst_sectors = !ptr->cached + ? dirty_sectors + : cached_sectors; + int ret = bch2_check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, + bucket_gen, *bucket_data_type, *dst_sectors); + + if (ret) + return ret; + + *dst_sectors += sectors; + + if (!*dirty_sectors && !*cached_sectors) + *bucket_data_type = 0; + else if (*bucket_data_type != BCH_DATA_stripe) + *bucket_data_type = ptr_data_type; + + return 0; +} -static inline int bch2_trans_mark_pointer(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, struct extent_ptr_decoded p, - unsigned flags) +static int bch2_trigger_pointer(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, struct extent_ptr_decoded p, + s64 *sectors, + unsigned flags) { bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); - struct btree_iter iter; - struct bkey_i_alloc_v4 *a; struct bpos bucket; struct bch_backpointer bp; - s64 sectors; - int ret; bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp); - sectors = bp.bucket_len; - if (!insert) - sectors = -sectors; - - a = bch2_trans_start_alloc_update(trans, &iter, bucket); - if (IS_ERR(a)) - return PTR_ERR(a); + *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len); - ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type, - a->v.gen, &a->v.data_type, - &a->v.dirty_sectors, &a->v.cached_sectors) ?: - bch2_trans_update(trans, &iter, &a->k_i, 0); - bch2_trans_iter_exit(trans, &iter); + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + struct btree_iter iter; + struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &iter, bucket); + int ret = PTR_ERR_OR_ZERO(a); + if (ret) + return ret; - if (ret) - return ret; + ret = __mark_pointer(trans, k, &p.ptr, *sectors, bp.data_type, + a->v.gen, &a->v.data_type, + &a->v.dirty_sectors, &a->v.cached_sectors) ?: + bch2_trans_update(trans, &iter, &a->k_i, 0); + bch2_trans_iter_exit(trans, &iter); - if (!p.ptr.cached) { - ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert); if (ret) return ret; + + if (!p.ptr.cached) { + ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert); + if (ret) + return ret; + } + } + + if (flags & BTREE_TRIGGER_GC) { + struct bch_fs *c = trans->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); + enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p); + + percpu_down_read(&c->mark_lock); + struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); + bucket_lock(g); + struct bucket old = *g; + + u8 bucket_data_type = g->data_type; + int ret = __mark_pointer(trans, k, &p.ptr, *sectors, + data_type, g->gen, + &bucket_data_type, + &g->dirty_sectors, + &g->cached_sectors); + if (ret) { + bucket_unlock(g); + percpu_up_read(&c->mark_lock); + return ret; + } + + g->data_type = bucket_data_type; + struct bucket new = *g; + bucket_unlock(g); + bch2_dev_usage_update_m(c, ca, &old, &new); + percpu_up_read(&c->mark_lock); } return 0; } -static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, - struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) +static int bch2_trigger_stripe_ptr(struct btree_trans *trans, + struct bkey_s_c k, + struct extent_ptr_decoded p, + enum bch_data_type data_type, + s64 sectors, unsigned flags) { - struct btree_iter iter; - struct bkey_i_stripe *s; - struct bch_replicas_padded r; - int ret = 0; + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + struct btree_iter iter; + struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter, + BTREE_ID_stripes, POS(0, p.ec.idx), + BTREE_ITER_WITH_UPDATES, stripe); + int ret = PTR_ERR_OR_ZERO(s); + if (unlikely(ret)) { + bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, + "pointer to nonexistent stripe %llu", + (u64) p.ec.idx); + goto err; + } - s = bch2_bkey_get_mut_typed(trans, &iter, - BTREE_ID_stripes, POS(0, p.ec.idx), - BTREE_ITER_WITH_UPDATES, stripe); - ret = PTR_ERR_OR_ZERO(s); - if (unlikely(ret)) { - bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, - "pointer to nonexistent stripe %llu", - (u64) p.ec.idx); - goto err; - } + if (!bch2_ptr_matches_stripe(&s->v, p)) { + bch2_trans_inconsistent(trans, + "stripe pointer doesn't match stripe %llu", + (u64) p.ec.idx); + ret = -EIO; + goto err; + } - if (!bch2_ptr_matches_stripe(&s->v, p)) { - bch2_trans_inconsistent(trans, - "stripe pointer doesn't match stripe %llu", - (u64) p.ec.idx); - ret = -EIO; - goto err; + stripe_blockcount_set(&s->v, p.ec.block, + stripe_blockcount_get(&s->v, p.ec.block) + + sectors); + + struct bch_replicas_padded r; + bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); + r.e.data_type = data_type; + ret = bch2_update_replicas_list(trans, &r.e, sectors); +err: + bch2_trans_iter_exit(trans, &iter); + return ret; } - stripe_blockcount_set(&s->v, p.ec.block, - stripe_blockcount_get(&s->v, p.ec.block) + - sectors); + if (flags & BTREE_TRIGGER_GC) { + struct bch_fs *c = trans->c; - bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i)); - r.e.data_type = data_type; - ret = bch2_update_replicas_list(trans, &r.e, sectors); -err: - bch2_trans_iter_exit(trans, &iter); - return ret; + BUG_ON(!(flags & BTREE_TRIGGER_GC)); + + struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL); + if (!m) { + bch_err(c, "error allocating memory for gc_stripes, idx %llu", + (u64) p.ec.idx); + return -BCH_ERR_ENOMEM_mark_stripe_ptr; + } + + mutex_lock(&c->ec_stripes_heap_lock); + + if (!m || !m->alive) { + mutex_unlock(&c->ec_stripes_heap_lock); + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, k); + bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s", + (u64) p.ec.idx, buf.buf); + printbuf_exit(&buf); + bch2_inconsistent_error(c); + return -EIO; + } + + m->block_sectors[p.ec.block] += sectors; + + struct bch_replicas_padded r = m->r; + mutex_unlock(&c->ec_stripes_heap_lock); + + r.e.data_type = data_type; + bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true); + } + + return 0; } -static int __trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) +static int __trigger_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) { + bool gc = flags & BTREE_TRIGGER_GC; struct bch_fs *c = trans->c; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1372,11 +958,7 @@ static int __trans_mark_extent(struct btree_trans *trans, enum bch_data_type data_type = bkey_is_btree_ptr(k.k) ? BCH_DATA_btree : BCH_DATA_user; - s64 sectors = bkey_is_btree_ptr(k.k) - ? btree_sectors(c) - : k.k->size; s64 dirty_sectors = 0; - bool stale; int ret = 0; r.e.data_type = data_type; @@ -1384,21 +966,20 @@ static int __trans_mark_extent(struct btree_trans *trans, r.e.nr_required = 1; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - s64 disk_sectors = ptr_disk_sectors(sectors, p); - - if (flags & BTREE_TRIGGER_OVERWRITE) - disk_sectors = -disk_sectors; - - ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags); + s64 disk_sectors; + ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags); if (ret < 0) return ret; - stale = ret > 0; + bool stale = ret > 0; if (p.ptr.cached) { if (!stale) { - ret = bch2_update_cached_sectors_list(trans, p.ptr.dev, - disk_sectors); + ret = !gc + ? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors) + : update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true); + bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors", + __func__); if (ret) return ret; } @@ -1406,226 +987,111 @@ static int __trans_mark_extent(struct btree_trans *trans, dirty_sectors += disk_sectors; r.e.devs[r.e.nr_devs++] = p.ptr.dev; } else { - ret = bch2_trans_mark_stripe_ptr(trans, p, - disk_sectors, data_type); + ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); if (ret) return ret; + /* + * There may be other dirty pointers in this extent, but + * if so they're not required for mounting if we have an + * erasure coded pointer in this extent: + */ r.e.nr_required = 0; } } - if (r.e.nr_devs) - ret = bch2_update_replicas_list(trans, &r.e, dirty_sectors); - - return ret; -} - -int bch2_trans_mark_extent(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, - unsigned flags) -{ - struct bch_fs *c = trans->c; - int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) - - (int) bch2_bkey_needs_rebalance(c, old); + if (r.e.nr_devs) { + ret = !gc + ? bch2_update_replicas_list(trans, &r.e, dirty_sectors) + : bch2_update_replicas(c, k, &r.e, dirty_sectors, 0, true); + if (unlikely(ret && gc)) { + struct printbuf buf = PRINTBUF; - if (mod) { - int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0); + bch2_bkey_val_to_text(&buf, c, k); + bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); + printbuf_exit(&buf); + } if (ret) return ret; } - return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags); + return 0; } -static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, - struct bkey_s_c_stripe s, - unsigned idx, bool deleting) +int bch2_trigger_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { - struct bch_fs *c = trans->c; - const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; - struct btree_iter iter; - struct bkey_i_alloc_v4 *a; - enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant - ? BCH_DATA_parity : 0; - s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0; - int ret = 0; - - if (deleting) - sectors = -sectors; - - a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr)); - if (IS_ERR(a)) - return PTR_ERR(a); - - ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type, - a->v.gen, a->v.data_type, - a->v.dirty_sectors); - if (ret) - goto err; - - if (!deleting) { - if (bch2_trans_inconsistent_on(a->v.stripe || - a->v.stripe_redundancy, trans, - "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", - iter.pos.inode, iter.pos.offset, a->v.gen, - bch2_data_types[a->v.data_type], - a->v.dirty_sectors, - a->v.stripe, s.k->p.offset)) { - ret = -EIO; - goto err; - } - - if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans, - "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", - iter.pos.inode, iter.pos.offset, a->v.gen, - bch2_data_types[a->v.data_type], - a->v.dirty_sectors, - s.k->p.offset)) { - ret = -EIO; - goto err; - } + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + struct bch_fs *c = trans->c; + int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - + (int) bch2_bkey_needs_rebalance(c, old); - a->v.stripe = s.k->p.offset; - a->v.stripe_redundancy = s.v->nr_redundant; - a->v.data_type = BCH_DATA_stripe; - } else { - if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset || - a->v.stripe_redundancy != s.v->nr_redundant, trans, - "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", - iter.pos.inode, iter.pos.offset, a->v.gen, - s.k->p.offset, a->v.stripe)) { - ret = -EIO; - goto err; + if (mod) { + int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0); + if (ret) + return ret; } - - a->v.stripe = 0; - a->v.stripe_redundancy = 0; - a->v.data_type = alloc_data_type(a->v, BCH_DATA_user); } - a->v.dirty_sectors += sectors; - if (data_type) - a->v.data_type = !deleting ? data_type : 0; + if (flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC)) + return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree_id, level, old, new, flags); - ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - if (ret) - goto err; -err: - bch2_trans_iter_exit(trans, &iter); - return ret; + return 0; } -int bch2_trans_mark_stripe(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, - unsigned flags) -{ - const struct bch_stripe *old_s = NULL; - struct bch_stripe *new_s = NULL; - struct bch_replicas_padded r; - unsigned i, nr_blocks; - int ret = 0; - - if (old.k->type == KEY_TYPE_stripe) - old_s = bkey_s_c_to_stripe(old).v; - if (new->k.type == KEY_TYPE_stripe) - new_s = &bkey_i_to_stripe(new)->v; - - /* - * If the pointers aren't changing, we don't need to do anything: - */ - if (new_s && old_s && - new_s->nr_blocks == old_s->nr_blocks && - new_s->nr_redundant == old_s->nr_redundant && - !memcmp(old_s->ptrs, new_s->ptrs, - new_s->nr_blocks * sizeof(struct bch_extent_ptr))) - return 0; +/* KEY_TYPE_reservation */ - BUG_ON(new_s && old_s && - (new_s->nr_blocks != old_s->nr_blocks || - new_s->nr_redundant != old_s->nr_redundant)); - - nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks; - - if (new_s) { - s64 sectors = le16_to_cpu(new_s->sectors); - - bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new)); - ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); - if (ret) - return ret; - } +static int __trigger_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, unsigned flags) +{ + struct bch_fs *c = trans->c; + unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; + s64 sectors = (s64) k.k->size * replicas; - if (old_s) { - s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); + if (flags & BTREE_TRIGGER_OVERWRITE) + sectors = -sectors; - bch2_bkey_to_replicas(&r.e, old); - ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + int ret = bch2_replicas_deltas_realloc(trans, 0); if (ret) return ret; - } - - for (i = 0; i < nr_blocks; i++) { - if (new_s && old_s && - !memcmp(&new_s->ptrs[i], - &old_s->ptrs[i], - sizeof(new_s->ptrs[i]))) - continue; - if (new_s) { - ret = bch2_trans_mark_stripe_bucket(trans, - bkey_i_to_s_c_stripe(new), i, false); - if (ret) - break; - } + struct replicas_delta_list *d = trans->fs_usage_deltas; + replicas = min(replicas, ARRAY_SIZE(d->persistent_reserved)); - if (old_s) { - ret = bch2_trans_mark_stripe_bucket(trans, - bkey_s_c_to_stripe(old), i, true); - if (ret) - break; - } + d->persistent_reserved[replicas - 1] += sectors; } - return ret; -} - -static int __trans_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - s64 sectors = (s64) k.k->size; - struct replicas_delta_list *d; - int ret; + if (flags & BTREE_TRIGGER_GC) { + percpu_down_read(&c->mark_lock); + preempt_disable(); - if (flags & BTREE_TRIGGER_OVERWRITE) - sectors = -sectors; - sectors *= replicas; + struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc); - ret = bch2_replicas_deltas_realloc(trans, 0); - if (ret) - return ret; + replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved)); + fs_usage->reserved += sectors; + fs_usage->persistent_reserved[replicas - 1] += sectors; - d = trans->fs_usage_deltas; - replicas = clamp_t(unsigned, replicas, 1, - ARRAY_SIZE(d->persistent_reserved)); + preempt_enable(); + percpu_up_read(&c->mark_lock); + } - d->persistent_reserved[replicas - 1] += sectors; return 0; } -int bch2_trans_mark_reservation(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) +int bch2_trigger_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) { - return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags); + return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags); } +/* Mark superblocks: */ + static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, size_t b, enum bch_data_type type, diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 379101d..2c95cc5 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -302,6 +302,12 @@ u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *); struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); +void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *, + const struct bch_alloc_v4 *, + const struct bch_alloc_v4 *, u64, bool); +void bch2_dev_usage_update_m(struct bch_fs *, struct bch_dev *, + struct bucket *, struct bucket *); + /* key/bucket marking: */ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, @@ -316,6 +322,9 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, : c->usage[journal_seq & JOURNAL_BUF_MASK]); } +int bch2_update_replicas(struct bch_fs *, struct bkey_s_c, + struct bch_replicas_entry_v1 *, s64, + unsigned, bool); int bch2_update_replicas_list(struct btree_trans *, struct bch_replicas_entry_v1 *, s64); int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64); @@ -323,36 +332,30 @@ int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned); void bch2_fs_usage_initialize(struct bch_fs *); +int bch2_check_bucket_ref(struct btree_trans *, struct bkey_s_c, + const struct bch_extent_ptr *, + s64, enum bch_data_type, u8, u8, u32); + int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, size_t, enum bch_data_type, unsigned, struct gc_pos, unsigned); -int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); -int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); - -int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned); -#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ +int bch2_trigger_extent(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); +int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); + +#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\ ({ \ int ret = 0; \ \ if (_old.k->type) \ ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \ if (!ret && _new.k->type) \ - ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \ + ret = _fn(_trans, _btree_id, _level, _new.s_c, _flags & ~BTREE_TRIGGER_OVERWRITE);\ ret; \ }) -#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \ - mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags) - void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *); int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *); diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index 22a52bc..226b39c 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -11,16 +11,13 @@ #include "replicas.h" #include "super.h" #include "super-io.h" +#include "thread_with_file.h" -#include #include #include -#include #include #include -#include #include -#include #include #include #include @@ -31,65 +28,6 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long return copy_to_user(to, from, n) ? -EFAULT : 0; } -struct thread_with_file { - struct task_struct *task; - int ret; - bool done; -}; - -static void thread_with_file_exit(struct thread_with_file *thr) -{ - if (thr->task) { - kthread_stop(thr->task); - put_task_struct(thr->task); - } -} - -__printf(4, 0) -static int run_thread_with_file(struct thread_with_file *thr, - const struct file_operations *fops, - int (*fn)(void *), const char *fmt, ...) -{ - va_list args; - struct file *file = NULL; - int ret, fd = -1; - struct printbuf name = PRINTBUF; - unsigned fd_flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK; - - va_start(args, fmt); - prt_vprintf(&name, fmt, args); - va_end(args); - - thr->ret = 0; - thr->task = kthread_create(fn, thr, name.buf); - ret = PTR_ERR_OR_ZERO(thr->task); - if (ret) - goto err; - - ret = get_unused_fd_flags(fd_flags); - if (ret < 0) - goto err_stop_task; - fd = ret; - - file = anon_inode_getfile(name.buf, fops, thr, fd_flags); - ret = PTR_ERR_OR_ZERO(file); - if (ret) - goto err_put_fd; - - fd_install(fd, file); - get_task_struct(thr->task); - wake_up_process(thr->task); - printbuf_exit(&name); - return fd; -err_put_fd: - put_unused_fd(fd); -err_stop_task: - kthread_stop(thr->task); -err: - printbuf_exit(&name); - return ret; -} - /* returns with ref on ca->ref */ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, unsigned flags) @@ -200,132 +138,33 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg #endif struct fsck_thread { - struct thread_with_file thr; - struct printbuf buf; + struct thread_with_stdio thr; struct bch_fs *c; char **devs; size_t nr_devs; struct bch_opts opts; - - struct log_output output; - DARRAY(char) output2; }; -static void bch2_fsck_thread_free(struct fsck_thread *thr) +static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) { - thread_with_file_exit(&thr->thr); + struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); if (thr->devs) for (size_t i = 0; i < thr->nr_devs; i++) kfree(thr->devs[i]); - darray_exit(&thr->output2); - printbuf_exit(&thr->output.buf); kfree(thr->devs); kfree(thr); } -static int bch2_fsck_thread_release(struct inode *inode, struct file *file) -{ - struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); - - bch2_fsck_thread_free(thr); - return 0; -} - -static bool fsck_thread_ready(struct fsck_thread *thr) -{ - return thr->output.buf.pos || - thr->output2.nr || - thr->thr.done; -} - -static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); - size_t copied = 0, b; - int ret = 0; - - if ((file->f_flags & O_NONBLOCK) && - !fsck_thread_ready(thr)) - return -EAGAIN; - - ret = wait_event_interruptible(thr->output.wait, - fsck_thread_ready(thr)); - if (ret) - return ret; - - if (thr->thr.done) - return 0; - - while (len) { - ret = darray_make_room(&thr->output2, thr->output.buf.pos); - if (ret) - break; - - spin_lock_irq(&thr->output.lock); - b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos); - - memcpy(&darray_top(thr->output2), thr->output.buf.buf, b); - memmove(thr->output.buf.buf, - thr->output.buf.buf + b, - thr->output.buf.pos - b); - - thr->output2.nr += b; - thr->output.buf.pos -= b; - spin_unlock_irq(&thr->output.lock); - - b = min(len, thr->output2.nr); - if (!b) - break; - - b -= copy_to_user(buf, thr->output2.data, b); - if (!b) { - ret = -EFAULT; - break; - } - - copied += b; - buf += b; - len -= b; - - memmove(thr->output2.data, - thr->output2.data + b, - thr->output2.nr - b); - thr->output2.nr -= b; - } - - return copied ?: ret; -} - -static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait) -{ - struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr); - - poll_wait(file, &thr->output.wait, wait); - - return fsck_thread_ready(thr) - ? EPOLLIN|EPOLLHUP - : 0; -} - -static const struct file_operations fsck_thread_ops = { - .release = bch2_fsck_thread_release, - .read = bch2_fsck_thread_read, - .poll = bch2_fsck_thread_poll, - .llseek = no_llseek, -}; - static int bch2_fsck_offline_thread_fn(void *arg) { struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr); struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts); - thr->thr.ret = PTR_ERR_OR_ZERO(c); - if (!thr->thr.ret) + thr->thr.thr.ret = PTR_ERR_OR_ZERO(c); + if (!thr->thr.thr.ret) bch2_fs_stop(c); - thr->thr.done = true; - wake_up(&thr->output.wait); + thread_with_stdio_done(&thr->thr); return 0; } @@ -354,11 +193,6 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a thr->opts = bch2_opts_empty(); thr->nr_devs = arg.nr_devs; - thr->output.buf = PRINTBUF; - thr->output.buf.atomic++; - spin_lock_init(&thr->output.lock); - init_waitqueue_head(&thr->output.wait); - darray_init(&thr->output2); if (copy_from_user(devs, &user_arg->devs[0], array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) { @@ -384,16 +218,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a goto err; } - opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output); + opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); - ret = run_thread_with_file(&thr->thr, - &fsck_thread_ops, - bch2_fsck_offline_thread_fn, - "bch-fsck"); + ret = bch2_run_thread_with_stdio(&thr->thr, + bch2_fsck_thread_exit, + bch2_fsck_offline_thread_fn); err: if (ret < 0) { if (thr) - bch2_fsck_thread_free(thr); + bch2_fsck_thread_exit(&thr->thr); pr_err("ret %s", bch2_err_str(ret)); } kfree(devs); @@ -592,7 +425,7 @@ static int bch2_data_job_release(struct inode *inode, struct file *file) { struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr); - thread_with_file_exit(&ctx->thr); + bch2_thread_with_file_exit(&ctx->thr); kfree(ctx); return 0; } @@ -642,10 +475,9 @@ static long bch2_ioctl_data(struct bch_fs *c, ctx->c = c; ctx->arg = arg; - ret = run_thread_with_file(&ctx->thr, - &bcachefs_data_ops, - bch2_data_thread, - "bch-data/%s", c->name); + ret = bch2_run_thread_with_file(&ctx->thr, + &bcachefs_data_ops, + bch2_data_thread); if (ret < 0) kfree(ctx); return ret; @@ -936,24 +768,32 @@ static int bch2_fsck_online_thread_fn(void *arg) struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr); struct bch_fs *c = thr->c; - c->output_filter = current; - c->output = &thr->output; + c->stdio_filter = current; + c->stdio = &thr->thr.stdio; /* * XXX: can we figure out a way to do this without mucking with c->opts? */ + unsigned old_fix_errors = c->opts.fix_errors; if (opt_defined(thr->opts, fix_errors)) c->opts.fix_errors = thr->opts.fix_errors; + else + c->opts.fix_errors = FSCK_FIX_ask; + c->opts.fsck = true; + set_bit(BCH_FS_fsck_running, &c->flags); c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; - bch2_run_online_recovery_passes(c); + int ret = bch2_run_online_recovery_passes(c); + + clear_bit(BCH_FS_fsck_running, &c->flags); + bch_err_fn(c, ret); - c->output = NULL; - c->output_filter = NULL; + c->stdio = NULL; + c->stdio_filter = NULL; + c->opts.fix_errors = old_fix_errors; - thr->thr.done = true; - wake_up(&thr->output.wait); + thread_with_stdio_done(&thr->thr); up(&c->online_fsck_mutex); bch2_ro_ref_put(c); @@ -988,11 +828,6 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c, thr->c = c; thr->opts = bch2_opts_empty(); - thr->output.buf = PRINTBUF; - thr->output.buf.atomic++; - spin_lock_init(&thr->output.lock); - init_waitqueue_head(&thr->output.wait); - darray_init(&thr->output2); if (arg.opts) { char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); @@ -1005,15 +840,14 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c, goto err; } - ret = run_thread_with_file(&thr->thr, - &fsck_thread_ops, - bch2_fsck_online_thread_fn, - "bch-fsck"); + ret = bch2_run_thread_with_stdio(&thr->thr, + bch2_fsck_thread_exit, + bch2_fsck_online_thread_fn); err: if (ret < 0) { bch_err_fn(c, ret); if (thr) - bch2_fsck_thread_free(thr); + bch2_fsck_thread_exit(&thr->thr); up(&c->online_fsck_mutex); bch2_ro_ref_put(c); } diff --git a/libbcachefs/darray.h b/libbcachefs/darray.h index d867ee6..4b340d1 100644 --- a/libbcachefs/darray.h +++ b/libbcachefs/darray.h @@ -20,7 +20,7 @@ struct { \ #define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0) typedef DARRAY(char) darray_char; -typedef DARRAY(char *) darray_str; +typedef DARRAY(char *) darray_str; int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t); diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index de5bfc0..d641894 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -627,7 +627,7 @@ restart: prt_printf(&i->buf, "backtrace:"); prt_newline(&i->buf); printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, task); + bch2_prt_task_backtrace(&i->buf, task, 0); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); @@ -930,8 +930,6 @@ void bch2_debug_exit(void) int __init bch2_debug_init(void) { - int ret = 0; - bch_debug = debugfs_create_dir("bcachefs", NULL); - return ret; + return 0; } diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c index 1cd6ba8..06a7df5 100644 --- a/libbcachefs/disk_groups.c +++ b/libbcachefs/disk_groups.c @@ -557,7 +557,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) : NULL; if (ca && percpu_ref_tryget(&ca->io_ref)) { - prt_printf(out, "/dev/%pg", ca->disk_sb.bdev); + prt_printf(out, "/dev/%s", ca->name); percpu_ref_put(&ca->io_ref); } else if (ca) { prt_printf(out, "offline device %u", t.dev); diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index e89185a..d802bc6 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -3,6 +3,7 @@ /* erasure coding */ #include "bcachefs.h" +#include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" #include "bkey_buf.h" @@ -156,6 +157,306 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, } } +/* Triggers: */ + +static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, + struct bkey_s_c_stripe s, + unsigned idx, bool deleting) +{ + struct bch_fs *c = trans->c; + const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; + struct btree_iter iter; + struct bkey_i_alloc_v4 *a; + enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant + ? BCH_DATA_parity : 0; + s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0; + int ret = 0; + + if (deleting) + sectors = -sectors; + + a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr)); + if (IS_ERR(a)) + return PTR_ERR(a); + + ret = bch2_check_bucket_ref(trans, s.s_c, ptr, sectors, data_type, + a->v.gen, a->v.data_type, + a->v.dirty_sectors); + if (ret) + goto err; + + if (!deleting) { + if (bch2_trans_inconsistent_on(a->v.stripe || + a->v.stripe_redundancy, trans, + "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", + iter.pos.inode, iter.pos.offset, a->v.gen, + bch2_data_types[a->v.data_type], + a->v.dirty_sectors, + a->v.stripe, s.k->p.offset)) { + ret = -EIO; + goto err; + } + + if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans, + "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", + iter.pos.inode, iter.pos.offset, a->v.gen, + bch2_data_types[a->v.data_type], + a->v.dirty_sectors, + s.k->p.offset)) { + ret = -EIO; + goto err; + } + + a->v.stripe = s.k->p.offset; + a->v.stripe_redundancy = s.v->nr_redundant; + a->v.data_type = BCH_DATA_stripe; + } else { + if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset || + a->v.stripe_redundancy != s.v->nr_redundant, trans, + "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", + iter.pos.inode, iter.pos.offset, a->v.gen, + s.k->p.offset, a->v.stripe)) { + ret = -EIO; + goto err; + } + + a->v.stripe = 0; + a->v.stripe_redundancy = 0; + a->v.data_type = alloc_data_type(a->v, BCH_DATA_user); + } + + a->v.dirty_sectors += sectors; + if (data_type) + a->v.data_type = !deleting ? data_type : 0; + + ret = bch2_trans_update(trans, &iter, &a->k_i, 0); + if (ret) + goto err; +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static int mark_stripe_bucket(struct btree_trans *trans, + struct bkey_s_c k, + unsigned ptr_idx, + unsigned flags) +{ + struct bch_fs *c = trans->c; + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned nr_data = s->nr_blocks - s->nr_redundant; + bool parity = ptr_idx >= nr_data; + enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe; + s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; + const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bucket old, new, *g; + struct printbuf buf = PRINTBUF; + int ret = 0; + + BUG_ON(!(flags & BTREE_TRIGGER_GC)); + + /* * XXX doesn't handle deletion */ + + percpu_down_read(&c->mark_lock); + g = PTR_GC_BUCKET(ca, ptr); + + if (g->dirty_sectors || + (g->stripe && g->stripe != k.k->p.offset)) { + bch2_fs_inconsistent(c, + "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EINVAL; + goto err; + } + + bucket_lock(g); + old = *g; + + ret = bch2_check_bucket_ref(trans, k, ptr, sectors, data_type, + g->gen, g->data_type, + g->dirty_sectors); + if (ret) + goto err; + + g->data_type = data_type; + g->dirty_sectors += sectors; + + g->stripe = k.k->p.offset; + g->stripe_redundancy = s->nr_redundant; + new = *g; +err: + bucket_unlock(g); + if (!ret) + bch2_dev_usage_update_m(c, ca, &old, &new); + percpu_up_read(&c->mark_lock); + printbuf_exit(&buf); + return ret; +} + +int bch2_trigger_stripe(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, struct bkey_s _new, + unsigned flags) +{ + struct bkey_s_c new = _new.s_c; + struct bch_fs *c = trans->c; + u64 idx = new.k->p.offset; + const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe + ? bkey_s_c_to_stripe(old).v : NULL; + const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe + ? bkey_s_c_to_stripe(new).v : NULL; + + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + /* + * If the pointers aren't changing, we don't need to do anything: + */ + if (new_s && old_s && + new_s->nr_blocks == old_s->nr_blocks && + new_s->nr_redundant == old_s->nr_redundant && + !memcmp(old_s->ptrs, new_s->ptrs, + new_s->nr_blocks * sizeof(struct bch_extent_ptr))) + return 0; + + BUG_ON(new_s && old_s && + (new_s->nr_blocks != old_s->nr_blocks || + new_s->nr_redundant != old_s->nr_redundant)); + + if (new_s) { + s64 sectors = le16_to_cpu(new_s->sectors); + + struct bch_replicas_padded r; + bch2_bkey_to_replicas(&r.e, new); + int ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant); + if (ret) + return ret; + } + + if (old_s) { + s64 sectors = -((s64) le16_to_cpu(old_s->sectors)); + + struct bch_replicas_padded r; + bch2_bkey_to_replicas(&r.e, old); + int ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant); + if (ret) + return ret; + } + + unsigned nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks; + for (unsigned i = 0; i < nr_blocks; i++) { + if (new_s && old_s && + !memcmp(&new_s->ptrs[i], + &old_s->ptrs[i], + sizeof(new_s->ptrs[i]))) + continue; + + if (new_s) { + int ret = bch2_trans_mark_stripe_bucket(trans, + bkey_s_c_to_stripe(new), i, false); + if (ret) + return ret; + } + + if (old_s) { + int ret = bch2_trans_mark_stripe_bucket(trans, + bkey_s_c_to_stripe(old), i, true); + if (ret) + return ret; + } + } + } + + if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) { + struct stripe *m = genradix_ptr(&c->stripes, idx); + + if (!m) { + struct printbuf buf1 = PRINTBUF; + struct printbuf buf2 = PRINTBUF; + + bch2_bkey_val_to_text(&buf1, c, old); + bch2_bkey_val_to_text(&buf2, c, new); + bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n" + "old %s\n" + "new %s", idx, buf1.buf, buf2.buf); + printbuf_exit(&buf2); + printbuf_exit(&buf1); + bch2_inconsistent_error(c); + return -1; + } + + if (!new_s) { + bch2_stripes_heap_del(c, m, idx); + + memset(m, 0, sizeof(*m)); + } else { + m->sectors = le16_to_cpu(new_s->sectors); + m->algorithm = new_s->algorithm; + m->nr_blocks = new_s->nr_blocks; + m->nr_redundant = new_s->nr_redundant; + m->blocks_nonempty = 0; + + for (unsigned i = 0; i < new_s->nr_blocks; i++) + m->blocks_nonempty += !!stripe_blockcount_get(new_s, i); + + if (!old_s) + bch2_stripes_heap_insert(c, m, idx); + else + bch2_stripes_heap_update(c, m, idx); + } + } + + if (flags & BTREE_TRIGGER_GC) { + struct gc_stripe *m = + genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL); + + if (!m) { + bch_err(c, "error allocating memory for gc_stripes, idx %llu", + idx); + return -BCH_ERR_ENOMEM_mark_stripe; + } + /* + * This will be wrong when we bring back runtime gc: we should + * be unmarking the old key and then marking the new key + */ + m->alive = true; + m->sectors = le16_to_cpu(new_s->sectors); + m->nr_blocks = new_s->nr_blocks; + m->nr_redundant = new_s->nr_redundant; + + for (unsigned i = 0; i < new_s->nr_blocks; i++) + m->ptrs[i] = new_s->ptrs[i]; + + bch2_bkey_to_replicas(&m->r.e, new); + + /* + * gc recalculates this field from stripe ptr + * references: + */ + memset(m->block_sectors, 0, sizeof(m->block_sectors)); + + for (unsigned i = 0; i < new_s->nr_blocks; i++) { + int ret = mark_stripe_bucket(trans, new, i, flags); + if (ret) + return ret; + } + + int ret = bch2_update_replicas(c, new, &m->r.e, + ((s64) m->sectors * m->nr_redundant), + 0, true); + if (ret) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, c, new); + bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); + printbuf_exit(&buf); + return ret; + } + } + + return 0; +} + /* returns blocknr in stripe that we matched: */ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s, struct bkey_s_c k, unsigned *block) diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 7d0237c..f4369b0 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -12,13 +12,14 @@ int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ .key_invalid = bch2_stripe_invalid, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ - .trans_trigger = bch2_trans_mark_stripe, \ - .atomic_trigger = bch2_mark_stripe, \ + .trigger = bch2_trigger_stripe, \ .min_val_size = 8, \ }) diff --git a/libbcachefs/error.c b/libbcachefs/error.c index aa4f7f4..d32c8be 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "error.h" #include "super.h" +#include "thread_with_file.h" #define FSCK_ERR_RATELIMIT_NR 10 @@ -27,7 +28,7 @@ bool bch2_inconsistent_error(struct bch_fs *c) void bch2_topology_error(struct bch_fs *c) { set_bit(BCH_FS_topology_error, &c->flags); - if (test_bit(BCH_FS_fsck_done, &c->flags)) + if (!test_bit(BCH_FS_fsck_running, &c->flags)) bch2_inconsistent_error(c); } @@ -69,40 +70,66 @@ enum ask_yn { YN_ALLYES, }; +static enum ask_yn parse_yn_response(char *buf) +{ + buf = strim(buf); + + if (strlen(buf) == 1) + switch (buf[0]) { + case 'n': + return YN_NO; + case 'y': + return YN_YES; + case 'N': + return YN_ALLNO; + case 'Y': + return YN_ALLYES; + } + return -1; +} + #ifdef __KERNEL__ -#define bch2_fsck_ask_yn() YN_NO +static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) +{ + struct stdio_redirect *stdio = c->stdio; + + if (c->stdio_filter && c->stdio_filter != current) + stdio = NULL; + + if (!stdio) + return YN_NO; + + char buf[100]; + int ret; + + do { + bch2_print(c, " (y,n, or Y,N for all errors of this type) "); + + int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); + if (r < 0) + return YN_NO; + buf[r] = '\0'; + } while ((ret = parse_yn_response(buf)) < 0); + + return ret; +} #else #include "tools-util.h" -enum ask_yn bch2_fsck_ask_yn(void) +static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) { char *buf = NULL; size_t buflen = 0; - bool ret; + int ret; - while (true) { + do { fputs(" (y,n, or Y,N for all errors of this type) ", stdout); fflush(stdout); if (getline(&buf, &buflen, stdin) < 0) die("error reading from standard input"); - - strim(buf); - if (strlen(buf) != 1) - continue; - - switch (buf[0]) { - case 'n': - return YN_NO; - case 'y': - return YN_YES; - case 'N': - return YN_ALLNO; - case 'Y': - return YN_ALLYES; - } - } + } while ((ret = parse_yn_response(buf)) < 0); free(buf); return ret; @@ -114,7 +141,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) { struct fsck_err_state *s; - if (test_bit(BCH_FS_fsck_done, &c->flags)) + if (!test_bit(BCH_FS_fsck_running, &c->flags)) return NULL; list_for_each_entry(s, &c->fsck_error_msgs, list) @@ -152,7 +179,8 @@ int bch2_fsck_err(struct bch_fs *c, struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; - if (test_bit(err, c->sb.errors_silent)) + if ((flags & FSCK_CAN_FIX) && + test_bit(err, c->sb.errors_silent)) return -BCH_ERR_fsck_fix; bch2_sb_error_count(c, err); @@ -196,7 +224,7 @@ int bch2_fsck_err(struct bch_fs *c, prt_printf(out, bch2_log_msg(c, "")); #endif - if (test_bit(BCH_FS_fsck_done, &c->flags)) { + if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { prt_str(out, ", shutting down"); @@ -221,10 +249,13 @@ int bch2_fsck_err(struct bch_fs *c, int ask; prt_str(out, ": fix?"); - bch2_print_string_as_lines(KERN_ERR, out->buf); + if (bch2_fs_stdio_redirect(c)) + bch2_print(c, "%s", out->buf); + else + bch2_print_string_as_lines(KERN_ERR, out->buf); print = false; - ask = bch2_fsck_ask_yn(); + ask = bch2_fsck_ask_yn(c); if (ask >= YN_ALLNO && s) s->fix = ask == YN_ALLNO @@ -253,10 +284,14 @@ int bch2_fsck_err(struct bch_fs *c, !(flags & FSCK_CAN_IGNORE))) ret = -BCH_ERR_fsck_errors_not_fixed; - if (print) - bch2_print_string_as_lines(KERN_ERR, out->buf); + if (print) { + if (bch2_fs_stdio_redirect(c)) + bch2_print(c, "%s\n", out->buf); + else + bch2_print_string_as_lines(KERN_ERR, out->buf); + } - if (!test_bit(BCH_FS_fsck_done, &c->flags) && + if (test_bit(BCH_FS_fsck_running, &c->flags) && (ret != -BCH_ERR_fsck_fix && ret != -BCH_ERR_fsck_ignore)) bch_err(c, "Unable to continue, halting"); diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 77ae447..a855c94 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -415,8 +415,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, .key_invalid = bch2_btree_ptr_invalid, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ - .trans_trigger = bch2_trans_mark_extent, \ - .atomic_trigger = bch2_mark_extent, \ + .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ @@ -424,8 +423,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ - .trans_trigger = bch2_trans_mark_extent, \ - .atomic_trigger = bch2_mark_extent, \ + .trigger = bch2_trigger_extent, \ .min_val_size = 40, \ }) @@ -439,8 +437,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ .key_merge = bch2_extent_merge, \ - .trans_trigger = bch2_trans_mark_extent, \ - .atomic_trigger = bch2_mark_extent, \ + .trigger = bch2_trigger_extent, \ }) /* KEY_TYPE_reservation: */ @@ -454,8 +451,7 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .key_invalid = bch2_reservation_invalid, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ - .trans_trigger = bch2_trans_mark_reservation, \ - .atomic_trigger = bch2_mark_reservation, \ + .trigger = bch2_trigger_reservation, \ .min_val_size = 8, \ }) diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index 4496cf9..1c1ea0f 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -166,10 +166,8 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { - new_inode->bi_dir = dir_u->bi_inum; - new_inode->bi_dir_offset = dir_offset; - } + new_inode->bi_dir = dir_u->bi_inum; + new_inode->bi_dir_offset = dir_offset; } inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS; @@ -228,10 +226,8 @@ int bch2_link_trans(struct btree_trans *trans, if (ret) goto err; - if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { - inode_u->bi_dir = dir.inum; - inode_u->bi_dir_offset = dir_offset; - } + inode_u->bi_dir = dir.inum; + inode_u->bi_dir_offset = dir_offset; ret = bch2_inode_write(trans, &dir_iter, dir_u) ?: bch2_inode_write(trans, &inode_iter, inode_u); @@ -414,21 +410,19 @@ int bch2_rename_trans(struct btree_trans *trans, goto err; } - if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) { - src_inode_u->bi_dir = dst_dir_u->bi_inum; - src_inode_u->bi_dir_offset = dst_offset; + src_inode_u->bi_dir = dst_dir_u->bi_inum; + src_inode_u->bi_dir_offset = dst_offset; - if (mode == BCH_RENAME_EXCHANGE) { - dst_inode_u->bi_dir = src_dir_u->bi_inum; - dst_inode_u->bi_dir_offset = src_offset; - } + if (mode == BCH_RENAME_EXCHANGE) { + dst_inode_u->bi_dir = src_dir_u->bi_inum; + dst_inode_u->bi_dir_offset = src_offset; + } - if (mode == BCH_RENAME_OVERWRITE && - dst_inode_u->bi_dir == dst_dir_u->bi_inum && - dst_inode_u->bi_dir_offset == src_offset) { - dst_inode_u->bi_dir = 0; - dst_inode_u->bi_dir_offset = 0; - } + if (mode == BCH_RENAME_OVERWRITE && + dst_inode_u->bi_dir == dst_dir_u->bi_inum && + dst_inode_u->bi_dir_offset == src_offset) { + dst_inode_u->bi_dir = 0; + dst_inode_u->bi_dir_offset = 0; } if (mode == BCH_RENAME_OVERWRITE) { diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 98bd5ba..9fea897 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -194,6 +194,16 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct bch_fs *c = inode->v.i_sb->s_fs_info; int ret; + /* + * check if unlinked, disable/defer until relink + */ + + /* + * also: add a mode where a file is a tmpfile until fully, + * asynchronously written + */ + + ret = file_write_and_wait_range(file, start, end); if (ret) goto out; diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index de1617e..4f0ecd6 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -870,8 +870,7 @@ static int check_inode(struct btree_trans *trans, return 0; } - if (u.bi_flags & BCH_INODE_unlinked && - c->sb.version >= bcachefs_metadata_version_deleted_inodes) { + if (u.bi_flags & BCH_INODE_unlinked) { ret = check_inode_deleted_list(trans, k.k->p); if (ret < 0) return ret; @@ -1594,13 +1593,12 @@ static int check_dirent_target(struct btree_trans *trans, d = dirent_i_to_s_c(n); } - if (d.v->d_type == DT_SUBVOL && - target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) && - (c->sb.version < bcachefs_metadata_version_subvol_dirent || - fsck_err(c, dirent_d_parent_subvol_wrong, - "dirent has wrong d_parent_subvol field: got %u, should be %u", - le32_to_cpu(d.v->d_parent_subvol), - target->bi_parent_subvol))) { + if (fsck_err_on(d.v->d_type == DT_SUBVOL && + target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol), + c, dirent_d_parent_subvol_wrong, + "dirent has wrong d_parent_subvol field: got %u, should be %u", + le32_to_cpu(d.v->d_parent_subvol), + target->bi_parent_subvol)) { n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); ret = PTR_ERR_OR_ZERO(n); if (ret) diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index c39844b..37dce96 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -561,64 +561,46 @@ static inline bool bkey_is_deleted_inode(struct bkey_s_c k) return bkey_inode_flags(k) & BCH_INODE_unlinked; } -int bch2_trans_mark_inode(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) +int bch2_trigger_inode(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_s new, + unsigned flags) { - int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); - bool old_deleted = bkey_is_deleted_inode(old); - bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new)); + s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); - if (nr) { - int ret = bch2_replicas_deltas_realloc(trans, 0); - struct replicas_delta_list *d = trans->fs_usage_deltas; + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + if (nr) { + int ret = bch2_replicas_deltas_realloc(trans, 0); + if (ret) + return ret; - if (ret) - return ret; - - d->nr_inodes += nr; - } + trans->fs_usage_deltas->nr_inodes += nr; + } - if (old_deleted != new_deleted) { - int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted); - if (ret) - return ret; + bool old_deleted = bkey_is_deleted_inode(old); + bool new_deleted = bkey_is_deleted_inode(new.s_c); + if (old_deleted != new_deleted) { + int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted); + if (ret) + return ret; + } } - return 0; -} + if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) { + BUG_ON(!trans->journal_res.seq); -int bch2_mark_inode(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) -{ - struct bch_fs *c = trans->c; - struct bch_fs_usage *fs_usage; - u64 journal_seq = trans->journal_res.seq; - - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v; - - BUG_ON(!journal_seq); - BUG_ON(new.k->type != KEY_TYPE_inode_v3); - - v->bi_journal_seq = cpu_to_le64(journal_seq); + bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq); } if (flags & BTREE_TRIGGER_GC) { - percpu_down_read(&c->mark_lock); - preempt_disable(); - - fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC); - fs_usage->nr_inodes += bkey_is_inode(new.k); - fs_usage->nr_inodes -= bkey_is_inode(old.k); + struct bch_fs *c = trans->c; - preempt_enable(); + percpu_down_read(&c->mark_lock); + this_cpu_add(c->usage_gc->nr_inodes, nr); percpu_up_read(&c->mark_lock); } + return 0; } diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 88818a3..b63f312 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -17,32 +17,27 @@ int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); -int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trans_mark_inode, \ - .atomic_trigger = bch2_mark_inode, \ + .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ .key_invalid = bch2_inode_v2_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trans_mark_inode, \ - .atomic_trigger = bch2_mark_inode, \ + .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ .key_invalid = bch2_inode_v3_invalid, \ .val_to_text = bch2_inode_to_text, \ - .trans_trigger = bch2_trans_mark_inode, \ - .atomic_trigger = bch2_mark_inode, \ + .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ }) diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 42cad83..93a24fe 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -414,11 +414,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Allocate the buckets_nouse bitmap") \ - x(log_output, u64, \ + x(stdio, u64, \ 0, \ OPT_UINT(0, S64_MAX), \ BCH2_NO_SB_OPT, false, \ - NULL, "Pointer to a struct log_output") \ + NULL, "Pointer to a struct stdio_redirect") \ x(project, u8, \ OPT_INODE, \ OPT_BOOL(), \ @@ -458,7 +458,13 @@ enum fsck_err_opts { OPT_UINT(0, BCH_REPLICAS_MAX), \ BCH2_NO_SB_OPT, 1, \ "n", "Data written to this device will be considered\n"\ - "to have already been replicated n times") + "to have already been replicated n times") \ + x(btree_node_prefetch, u8, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, true, \ + NULL, "BTREE_ITER_PREFETCH casuse btree nodes to be\n"\ + " prefetched sequentially") struct bch_opts { #define x(_name, _bits, ...) unsigned _name##_defined:1; diff --git a/libbcachefs/printbuf.c b/libbcachefs/printbuf.c index 187b037..accf246 100644 --- a/libbcachefs/printbuf.c +++ b/libbcachefs/printbuf.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: LGPL-2.1+ /* Copyright (C) 2022 Kent Overstreet */ -#include #include +#include #include #include #include diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 3e49209..e1f0da6 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -575,7 +575,7 @@ u64 bch2_recovery_passes_from_stable(u64 v) return ret; } -static u64 check_version_upgrade(struct bch_fs *c) +static bool check_version_upgrade(struct bch_fs *c) { unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); unsigned latest_version = bcachefs_metadata_version_current; @@ -624,10 +624,15 @@ static u64 check_version_upgrade(struct bch_fs *c) bch2_version_to_text(&buf, new_version); prt_newline(&buf); - u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); - if (recovery_passes) { + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_upgrade(c, old_version, new_version); + passes = ext->recovery_passes_required[0] & ~passes; + + if (passes) { prt_str(&buf, " running recovery passes: "); - prt_bitflags(&buf, bch2_recovery_passes, recovery_passes); + prt_bitflags(&buf, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); } bch_info(c, "%s", buf.buf); @@ -635,10 +640,6 @@ static u64 check_version_upgrade(struct bch_fs *c) bch2_sb_upgrade(c, new_version); printbuf_exit(&buf); - - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - ext->recovery_passes_required[0] |= - cpu_to_le64(bch2_recovery_passes_to_stable(recovery_passes)); return true; } @@ -795,23 +796,17 @@ int bch2_fs_recovery(struct bch_fs *c) prt_bitflags(&buf, bch2_recovery_passes, sb_passes); bch_info(c, "%s", buf.buf); printbuf_exit(&buf); - c->recovery_passes_explicit |= sb_passes; } - if (bcachefs_metadata_version_current < c->sb.version) { + if (bch2_check_version_downgrade(c)) { struct printbuf buf = PRINTBUF; prt_str(&buf, "Version downgrade required:\n"); - u64 passes = ext->recovery_passes_required[0]; - ret = bch2_sb_set_downgrade(c, + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_downgrade(c, BCH_VERSION_MINOR(bcachefs_metadata_version_current), BCH_VERSION_MINOR(c->sb.version)); - if (ret) { - mutex_unlock(&c->sb_lock); - goto err; - } - passes = ext->recovery_passes_required[0] & ~passes; if (passes) { prt_str(&buf, " running recovery passes: "); @@ -821,8 +816,6 @@ int bch2_fs_recovery(struct bch_fs *c) bch_info(c, "%s", buf.buf); printbuf_exit(&buf); - - bch2_sb_maybe_downgrade(c); write_sb = true; } @@ -839,6 +832,9 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); + if (c->opts.fsck) + set_bit(BCH_FS_fsck_running, &c->flags); + ret = bch2_blacklist_table_initialize(c); if (ret) { bch_err(c, "error initializing blacklist table"); @@ -979,6 +975,8 @@ use_clean: if (ret) goto err; + clear_bit(BCH_FS_fsck_running, &c->flags); + /* If we fixed errors, verify that fs is actually clean now: */ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && test_bit(BCH_FS_errors_fixed, &c->flags) && @@ -1073,7 +1071,6 @@ use_clean: ret = 0; out: - set_bit(BCH_FS_fsck_done, &c->flags); bch2_flush_fsck_errs(c); if (!c->opts.keep_journal && @@ -1109,7 +1106,7 @@ int bch2_fs_initialize(struct bch_fs *c) c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - bch2_sb_maybe_downgrade(c); + bch2_check_version_downgrade(c); if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { bch2_sb_upgrade(c, bcachefs_metadata_version_current); @@ -1120,7 +1117,6 @@ int bch2_fs_initialize(struct bch_fs *c) c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns); set_bit(BCH_FS_may_go_rw, &c->flags); - set_bit(BCH_FS_fsck_done, &c->flags); for (unsigned i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index 9f9c8a2..b24b71b 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -34,15 +34,14 @@ int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); + int ret = 0; - if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix && - le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) { - prt_printf(err, "idx < front_pad (%llu < %u)", - le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); - return -EINVAL; - } - - return 0; + bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), + c, err, reflink_p_front_pad_bad, + "idx < front_pad (%llu < %u)", + le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); +fsck_err: + return ret; } void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c, @@ -74,7 +73,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r return true; } -static int trans_mark_reflink_p_segment(struct btree_trans *trans, +static int trans_trigger_reflink_p_segment(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 *idx, unsigned flags) { @@ -93,7 +92,7 @@ static int trans_mark_reflink_p_segment(struct btree_trans *trans, if (ret) goto err; - refcount = bkey_refcount(k); + refcount = bkey_refcount(bkey_i_to_s(k)); if (!refcount) { bch2_bkey_val_to_text(&buf, c, p.s_c); bch2_trans_inconsistent(trans, @@ -141,47 +140,16 @@ err: return ret; } -static int __trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c k, unsigned flags) -{ - struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - u64 idx, end_idx; - int ret = 0; - - idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); - end_idx = le64_to_cpu(p.v->idx) + p.k->size + - le32_to_cpu(p.v->back_pad); - - while (idx < end_idx && !ret) - ret = trans_mark_reflink_p_segment(trans, p, &idx, flags); - return ret; -} - -int bch2_trans_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) -{ - if (flags & BTREE_TRIGGER_INSERT) { - struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v; - - v->front_pad = v->back_pad = 0; - } - - return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags); -} - -static s64 __bch2_mark_reflink_p(struct btree_trans *trans, - struct bkey_s_c_reflink_p p, - u64 start, u64 end, - u64 *idx, unsigned flags, size_t r_idx) +static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, + struct bkey_s_c_reflink_p p, + u64 *idx, unsigned flags, size_t r_idx) { struct bch_fs *c = trans->c; struct reflink_gc *r; int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1; - u64 next_idx = end; + u64 start = le64_to_cpu(p.v->idx); + u64 end = le64_to_cpu(p.v->idx) + p.k->size; + u64 next_idx = end + le32_to_cpu(p.v->back_pad); s64 ret = 0; struct printbuf buf = PRINTBUF; @@ -205,20 +173,24 @@ not_found: " missing range %llu-%llu", (bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf), *idx, next_idx)) { - struct bkey_i_error *new; - - new = bch2_trans_kmalloc(trans, sizeof(*new)); - ret = PTR_ERR_OR_ZERO(new); + struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, p.s_c); + ret = PTR_ERR_OR_ZERO(update); if (ret) goto err; - bkey_init(&new->k); - new->k.type = KEY_TYPE_error; - new->k.p = bkey_start_pos(p.k); - new->k.p.offset += *idx - start; - bch2_key_resize(&new->k, next_idx - *idx); - ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, - BTREE_TRIGGER_NORUN); + if (next_idx <= start) { + bkey_i_to_reflink_p(update)->v.front_pad = cpu_to_le32(start - next_idx); + } else if (*idx >= end) { + bkey_i_to_reflink_p(update)->v.back_pad = cpu_to_le32(*idx - end); + } else { + bkey_error_init(update); + update->k.p = p.k->p; + update->k.p.offset = next_idx; + update->k.size = next_idx - *idx; + set_bkey_val_u64s(&update->k, 0); + } + + ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, update, BTREE_TRIGGER_NORUN); } *idx = next_idx; @@ -228,50 +200,55 @@ fsck_err: return ret; } -static int __mark_reflink_p(struct btree_trans *trans, +static int __trigger_reflink_p(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, unsigned flags) { struct bch_fs *c = trans->c; struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); - struct reflink_gc *ref; - size_t l, r, m; - u64 idx = le64_to_cpu(p.v->idx), start = idx; - u64 end = le64_to_cpu(p.v->idx) + p.k->size; int ret = 0; - BUG_ON(!(flags & BTREE_TRIGGER_GC)); + u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad); + u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad); - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) { - idx -= le32_to_cpu(p.v->front_pad); - end += le32_to_cpu(p.v->back_pad); + if (flags & BTREE_TRIGGER_TRANSACTIONAL) { + while (idx < end && !ret) + ret = trans_trigger_reflink_p_segment(trans, p, &idx, flags); } - l = 0; - r = c->reflink_gc_nr; - while (l < r) { - m = l + (r - l) / 2; + if (flags & BTREE_TRIGGER_GC) { + size_t l = 0, r = c->reflink_gc_nr; - ref = genradix_ptr(&c->reflink_gc_table, m); - if (ref->offset <= idx) - l = m + 1; - else - r = m; - } + while (l < r) { + size_t m = l + (r - l) / 2; + struct reflink_gc *ref = genradix_ptr(&c->reflink_gc_table, m); + if (ref->offset <= idx) + l = m + 1; + else + r = m; + } - while (idx < end && !ret) - ret = __bch2_mark_reflink_p(trans, p, start, end, - &idx, flags, l++); + while (idx < end && !ret) + ret = gc_trigger_reflink_p_segment(trans, p, &idx, flags, l++); + } return ret; } -int bch2_mark_reflink_p(struct btree_trans *trans, - enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +int bch2_trigger_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, + struct bkey_s_c old, + struct bkey_s new, + unsigned flags) { - return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags); + if ((flags & BTREE_TRIGGER_TRANSACTIONAL) && + (flags & BTREE_TRIGGER_INSERT)) { + struct bch_reflink_p *v = bkey_s_to_reflink_p(new).v; + + v->front_pad = v->back_pad = 0; + } + + return trigger_run_overwrite_then_insert(__trigger_reflink_p, trans, btree_id, level, old, new, flags); } /* indirect extents */ @@ -305,32 +282,34 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r } #endif -static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags) +static inline void check_indirect_extent_deleting(struct bkey_s new, unsigned *flags) { if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) { - new->k.type = KEY_TYPE_deleted; - new->k.size = 0; - set_bkey_val_u64s(&new->k, 0); + new.k->type = KEY_TYPE_deleted; + new.k->size = 0; + set_bkey_val_u64s(new.k, 0); *flags &= ~BTREE_TRIGGER_INSERT; } } int bch2_trans_mark_reflink_v(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { - check_indirect_extent_deleting(new, &flags); + if ((flags & BTREE_TRIGGER_TRANSACTIONAL) && + (flags & BTREE_TRIGGER_INSERT)) + check_indirect_extent_deleting(new, &flags); if (old.k->type == KEY_TYPE_reflink_v && - new->k.type == KEY_TYPE_reflink_v && - old.k->u64s == new->k.u64s && + new.k->type == KEY_TYPE_reflink_v && + old.k->u64s == new.k->u64s && !memcmp(bkey_s_c_to_reflink_v(old).v->start, - bkey_i_to_reflink_v(new)->v.start, - bkey_val_bytes(&new->k) - 8)) + bkey_s_to_reflink_v(new).v->start, + bkey_val_bytes(new.k) - 8)) return 0; - return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags); + return bch2_trigger_extent(trans, btree_id, level, old, new, flags); } /* indirect inline data */ @@ -355,7 +334,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out, int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans, enum btree_id btree_id, unsigned level, - struct bkey_s_c old, struct bkey_i *new, + struct bkey_s_c old, struct bkey_s new, unsigned flags) { check_indirect_extent_deleting(new, &flags); @@ -398,7 +377,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); - refcount = bkey_refcount(r_v); + refcount = bkey_refcount(bkey_i_to_s(r_v)); *refcount = 0; memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h index 6cc9c4a..8ee778e 100644 --- a/libbcachefs/reflink.h +++ b/libbcachefs/reflink.h @@ -9,17 +9,14 @@ int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); -int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); -int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ .key_invalid = bch2_reflink_p_invalid, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ - .trans_trigger = bch2_trans_mark_reflink_p, \ - .atomic_trigger = bch2_mark_reflink_p, \ + .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) @@ -28,14 +25,13 @@ int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ .key_invalid = bch2_reflink_v_invalid, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ - .trans_trigger = bch2_trans_mark_reflink_v, \ - .atomic_trigger = bch2_mark_extent, \ + .trigger = bch2_trans_mark_reflink_v, \ .min_val_size = 8, \ }) @@ -45,13 +41,13 @@ void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trans_mark_indirect_inline_data(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_i *, + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ .key_invalid = bch2_indirect_inline_data_invalid, \ .val_to_text = bch2_indirect_inline_data_to_text, \ - .trans_trigger = bch2_trans_mark_indirect_inline_data, \ + .trigger = bch2_trans_mark_indirect_inline_data, \ .min_val_size = 8, \ }) @@ -67,13 +63,13 @@ static inline const __le64 *bkey_refcount_c(struct bkey_s_c k) } } -static inline __le64 *bkey_refcount(struct bkey_i *k) +static inline __le64 *bkey_refcount(struct bkey_s k) { - switch (k->k.type) { + switch (k.k->type) { case KEY_TYPE_reflink_v: - return &bkey_i_to_reflink_v(k)->v.refcount; + return &bkey_s_to_reflink_v(k).v->refcount; case KEY_TYPE_indirect_inline_data: - return &bkey_i_to_indirect_inline_data(k)->v.refcount; + return &bkey_s_to_indirect_inline_data(k).v->refcount; default: return NULL; } diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c index d2a92fb..441dcb1 100644 --- a/libbcachefs/sb-downgrade.c +++ b/libbcachefs/sb-downgrade.c @@ -12,36 +12,105 @@ #include "sb-errors.h" #include "super-io.h" +#define RECOVERY_PASS_ALL_FSCK BIT_ULL(63) + /* - * Downgrade table: - * When dowgrading past certain versions, we need to run certain recovery passes - * and fix certain errors: + * Upgrade, downgrade tables - run certain recovery passes, fix certain errors * * x(version, recovery_passes, errors...) */ - -#define DOWNGRADE_TABLE() \ - x(disk_accounting_v2, \ - BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info), \ - BCH_FSCK_ERR_dev_usage_buckets_wrong) - -struct downgrade_entry { +#define UPGRADE_TABLE() \ + x(backpointers, \ + RECOVERY_PASS_ALL_FSCK) \ + x(inode_v3, \ + RECOVERY_PASS_ALL_FSCK) \ + x(unwritten_extents, \ + RECOVERY_PASS_ALL_FSCK) \ + x(bucket_gens, \ + BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ + RECOVERY_PASS_ALL_FSCK) \ + x(lru_v2, \ + RECOVERY_PASS_ALL_FSCK) \ + x(fragmentation_lru, \ + RECOVERY_PASS_ALL_FSCK) \ + x(no_bps_in_alloc_keys, \ + RECOVERY_PASS_ALL_FSCK) \ + x(snapshot_trees, \ + RECOVERY_PASS_ALL_FSCK) \ + x(snapshot_skiplists, \ + BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \ + BCH_FSCK_ERR_snapshot_bad_depth, \ + BCH_FSCK_ERR_snapshot_bad_skiplist) \ + x(deleted_inodes, \ + BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ + BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \ + x(rebalance_work, \ + BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) + +#define DOWNGRADE_TABLE() + +struct upgrade_downgrade_entry { u64 recovery_passes; u16 version; u16 nr_errors; const u16 *errors; }; -#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ }; +#define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ }; +UPGRADE_TABLE() +#undef x + +static const struct upgrade_downgrade_entry upgrade_table[] = { +#define x(ver, passes, ...) { \ + .recovery_passes = passes, \ + .version = bcachefs_metadata_version_##ver,\ + .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \ + .errors = upgrade_##ver##_errors, \ +}, +UPGRADE_TABLE() +#undef x +}; + +void bch2_sb_set_upgrade(struct bch_fs *c, + unsigned old_version, + unsigned new_version) +{ + lockdep_assert_held(&c->sb_lock); + + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + for (const struct upgrade_downgrade_entry *i = upgrade_table; + i < upgrade_table + ARRAY_SIZE(upgrade_table); + i++) + if (i->version > old_version && i->version <= new_version) { + u64 passes = i->recovery_passes; + + if (passes & RECOVERY_PASS_ALL_FSCK) + passes |= bch2_fsck_recovery_passes(); + passes &= ~RECOVERY_PASS_ALL_FSCK; + + ext->recovery_passes_required[0] |= + cpu_to_le64(bch2_recovery_passes_to_stable(passes)); + + for (const u16 *e = i->errors; + e < i->errors + i->nr_errors; + e++) { + __set_bit(*e, c->sb.errors_silent); + ext->errors_silent[*e / 64] |= cpu_to_le64(BIT_ULL(*e % 64)); + } + } +} + +#define x(ver, passes, ...) static const u16 downgrade_ver_##errors[] = { __VA_ARGS__ }; DOWNGRADE_TABLE() #undef x -static const struct downgrade_entry downgrade_table[] = { +static const struct upgrade_downgrade_entry downgrade_table[] = { #define x(ver, passes, ...) { \ .recovery_passes = passes, \ .version = bcachefs_metadata_version_##ver,\ - .nr_errors = ARRAY_SIZE(ver_##errors), \ - .errors = ver_##errors, \ + .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \ + .errors = downgrade_##ver##_errors, \ }, DOWNGRADE_TABLE() #undef x @@ -59,12 +128,6 @@ downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e) (void *) &_i->errors[0] < vstruct_end(&(_d)->field); \ _i = downgrade_entry_next_c(_i)) -static inline unsigned bch2_sb_field_downgrade_u64s(unsigned nr) -{ - return (sizeof(struct bch_sb_field_downgrade) + - sizeof(struct bch_sb_field_downgrade_entry) * nr) / sizeof(u64); -} - static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f, struct printbuf *err) { @@ -127,7 +190,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) darray_char table = {}; int ret = 0; - for (const struct downgrade_entry *src = downgrade_table; + for (const struct upgrade_downgrade_entry *src = downgrade_table; src < downgrade_table + ARRAY_SIZE(downgrade_table); src++) { if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) @@ -171,11 +234,11 @@ out: return ret; } -int bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) +void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) { struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); if (!d) - return 0; + return; struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); @@ -194,6 +257,4 @@ int bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_min } } } - - return 0; } diff --git a/libbcachefs/sb-downgrade.h b/libbcachefs/sb-downgrade.h index 0703ad7..57e6c91 100644 --- a/libbcachefs/sb-downgrade.h +++ b/libbcachefs/sb-downgrade.h @@ -5,6 +5,7 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade; int bch2_sb_downgrade_update(struct bch_fs *); -int bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned); +void bch2_sb_set_upgrade(struct bch_fs *, unsigned, unsigned); +void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned); #endif /* _BCACHEFS_SB_DOWNGRADE_H */ diff --git a/libbcachefs/sb-errors_types.h b/libbcachefs/sb-errors_types.h index e7be1f9..c08aacd 100644 --- a/libbcachefs/sb-errors_types.h +++ b/libbcachefs/sb-errors_types.h @@ -249,7 +249,8 @@ x(dir_loop, 241) \ x(hash_table_key_duplicate, 242) \ x(hash_table_key_wrong_offset, 243) \ - x(unlinked_inode_not_on_deleted_list, 244) + x(unlinked_inode_not_on_deleted_list, 244) \ + x(reflink_p_front_pad_bad, 245) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index 4c19a80..a44a238 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -266,7 +266,7 @@ static void member_to_text(struct printbuf *out, prt_str(out, "Durability:"); prt_tab(out); - prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m)); + prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); prt_newline(out); prt_printf(out, "Discard:"); diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index 96df405..56af937 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -276,7 +276,7 @@ static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id) mutex_unlock(&c->snapshot_table_lock); } -int bch2_mark_snapshot(struct btree_trans *trans, +static int __bch2_mark_snapshot(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_s_c new, unsigned flags) @@ -330,6 +330,14 @@ err: return ret; } +int bch2_mark_snapshot(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_s_c old, struct bkey_s new, + unsigned flags) +{ + return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags); +} + int bch2_snapshot_lookup(struct btree_trans *trans, u32 id, struct bch_snapshot *s) { @@ -806,11 +814,10 @@ static int check_snapshot(struct btree_trans *trans, real_depth = bch2_snapshot_depth(c, parent_id); - if (le32_to_cpu(s.depth) != real_depth && - (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, snapshot_bad_depth, - "snapshot with incorrect depth field, should be %u:\n %s", - real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { + if (fsck_err_on(le32_to_cpu(s.depth) != real_depth, + c, snapshot_bad_depth, + "snapshot with incorrect depth field, should be %u:\n %s", + real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); if (ret) @@ -824,11 +831,9 @@ static int check_snapshot(struct btree_trans *trans, if (ret < 0) goto err; - if (!ret && - (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists || - fsck_err(c, snapshot_bad_skiplist, - "snapshot with bad skiplist field:\n %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { + if (fsck_err_on(!ret, c, snapshot_bad_skiplist, + "snapshot with bad skiplist field:\n %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot); ret = PTR_ERR_OR_ZERO(u); if (ret) @@ -1055,7 +1060,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree, bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32); SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); - ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, + ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); if (ret) goto err; @@ -1664,7 +1669,7 @@ int bch2_snapshots_read(struct bch_fs *c) int ret = bch2_trans_run(c, for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, - bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: + __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: bch2_snapshot_set_equiv(trans, k) ?: bch2_check_snapshot_needs_deletion(trans, k)) ?: for_each_btree_key(trans, iter, BTREE_ID_snapshots, diff --git a/libbcachefs/snapshot.h b/libbcachefs/snapshot.h index 94f35b2..7c66ffc 100644 --- a/libbcachefs/snapshot.h +++ b/libbcachefs/snapshot.h @@ -22,12 +22,12 @@ void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, enum bkey_invalid_flags, struct printbuf *); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, - struct bkey_s_c, struct bkey_s_c, unsigned); + struct bkey_s_c, struct bkey_s, unsigned); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ .key_invalid = bch2_snapshot_invalid, \ .val_to_text = bch2_snapshot_to_text, \ - .atomic_trigger = bch2_mark_snapshot, \ + .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ }) diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 7cbf496..ea86921 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -30,14 +30,12 @@ static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { struct bch2_metadata_version { u16 version; const char *name; - u64 recovery_passes; }; static const struct bch2_metadata_version bch2_metadata_versions[] = { -#define x(n, v, _recovery_passes) { \ +#define x(n, v) { \ .version = v, \ .name = #n, \ - .recovery_passes = _recovery_passes, \ }, BCH_METADATA_VERSIONS() #undef x @@ -70,24 +68,6 @@ unsigned bch2_latest_compatible_version(unsigned v) return v; } -u64 bch2_upgrade_recovery_passes(struct bch_fs *c, - unsigned old_version, - unsigned new_version) -{ - u64 ret = 0; - - for (const struct bch2_metadata_version *i = bch2_metadata_versions; - i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions); - i++) - if (i->version > old_version && i->version <= new_version) { - if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK) - ret |= bch2_fsck_recovery_passes(); - ret |= i->recovery_passes; - } - - return ret &= ~RECOVERY_PASS_ALL_FSCK; -} - const char * const bch2_sb_fields[] = { #define x(name, nr) #name, BCH_SB_FIELDS() @@ -190,8 +170,12 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; if (new_bytes > max_bytes) { - pr_err("%pg: superblock too big: want %zu but have %llu", - sb->bdev, new_bytes, max_bytes); + struct printbuf buf = PRINTBUF; + + prt_bdevname(&buf, sb->bdev); + prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes); + pr_err("%s", buf.buf); + printbuf_exit(&buf); return -BCH_ERR_ENOSPC_sb; } } @@ -1095,8 +1079,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) } /* Downgrade if superblock is at a higher version than currently supported: */ -void bch2_sb_maybe_downgrade(struct bch_fs *c) +bool bch2_check_version_downgrade(struct bch_fs *c) { + bool ret = bcachefs_metadata_version_current < c->sb.version; + lockdep_assert_held(&c->sb_lock); /* @@ -1110,6 +1096,7 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c) if (c->sb.version_min > bcachefs_metadata_version_current) c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); + return ret; } void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) @@ -1200,8 +1187,8 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, return ret; } -void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, - struct bch_sb_field *f) +void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) { unsigned type = le32_to_cpu(f->type); const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); @@ -1209,6 +1196,15 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, if (!out->nr_tabstops) printbuf_tabstop_push(out, 32); + if (ops->to_text) + ops->to_text(out, sb, f); +} + +void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + unsigned type = le32_to_cpu(f->type); + if (type < BCH_SB_FIELD_NR) prt_printf(out, "%s", bch2_sb_fields[type]); else @@ -1217,11 +1213,7 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, prt_printf(out, " (size %zu):", vstruct_bytes(f)); prt_newline(out); - if (ops->to_text) { - printbuf_indent_add(out, 2); - ops->to_text(out, sb, f); - printbuf_indent_sub(out, 2); - } + __bch2_sb_field_to_text(out, sb, f); } void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h index 1a8c208..95e80e0 100644 --- a/libbcachefs/super-io.h +++ b/libbcachefs/super-io.h @@ -19,10 +19,6 @@ static inline bool bch2_version_compatible(u16 version) void bch2_version_to_text(struct printbuf *, unsigned); unsigned bch2_latest_compatible_version(unsigned); -u64 bch2_upgrade_recovery_passes(struct bch_fs *c, - unsigned, - unsigned); - static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f) { return le32_to_cpu(f->u64s) * sizeof(u64); @@ -94,9 +90,11 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) __bch2_check_set_feature(c, feat); } -void bch2_sb_maybe_downgrade(struct bch_fs *); +bool bch2_check_version_downgrade(struct bch_fs *); void bch2_sb_upgrade(struct bch_fs *, unsigned); +void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, + struct bch_sb_field *); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, struct bch_sb_field *); void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 0f3a924..9dbc359 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -88,14 +88,11 @@ const char * const bch2_fs_flag_strs[] = { void __bch2_print(struct bch_fs *c, const char *fmt, ...) { - struct log_output *output = c->output; - va_list args; - - if (c->output_filter && c->output_filter != current) - output = NULL; + struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); + va_list args; va_start(args, fmt); - if (likely(!output)) { + if (likely(!stdio)) { vprintk(fmt, args); } else { unsigned long flags; @@ -103,11 +100,11 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...) if (fmt[0] == KERN_SOH[0]) fmt += 2; - spin_lock_irqsave(&output->lock, flags); - prt_vprintf(&output->buf, fmt, args); - spin_unlock_irqrestore(&output->lock, flags); + spin_lock_irqsave(&stdio->output_lock, flags); + prt_vprintf(&stdio->output_buf, fmt, args); + spin_unlock_irqrestore(&stdio->output_lock, flags); - wake_up(&output->wait); + wake_up(&stdio->output_wait); } va_end(args); } @@ -724,7 +721,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto out; } - c->output = (void *)(unsigned long) opts.log_output; + c->stdio = (void *)(unsigned long) opts.stdio; __module_get(THIS_MODULE); @@ -871,7 +868,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || !(c->io_complete_wq = alloc_workqueue("bcachefs_io", - WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) || + WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 512)) || !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref", WQ_FREEZABLE, 0)) || #ifndef BCH_WRITE_REF_DEBUG @@ -1086,17 +1083,22 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, fs->sb->write_time != sb->sb->write_time) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "Split brain detected between %pg and %pg:", - sb->bdev, fs->bdev); + prt_str(&buf, "Split brain detected between "); + prt_bdevname(&buf, sb->bdev); + prt_str(&buf, " and "); + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ':'); prt_newline(&buf); prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq)); prt_newline(&buf); - prt_printf(&buf, "%pg ", fs->bdev); + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ' '); bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));; prt_newline(&buf); - prt_printf(&buf, "%pg ", sb->bdev); + prt_bdevname(&buf, sb->bdev); + prt_char(&buf, ' '); bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));; prt_newline(&buf); @@ -1112,13 +1114,26 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, u64 seq_from_member = le64_to_cpu(sb->sb->seq); if (seq_from_fs && seq_from_fs < seq_from_member) { - pr_err("Split brain detected between %pg and %pg:\n" - "%pg believes seq of %pg to be %llu, but %pg has %llu\n" - "Not using %pg", - sb->bdev, fs->bdev, - fs->bdev, sb->bdev, seq_from_fs, - sb->bdev, seq_from_member, - sb->bdev); + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Split brain detected between "); + prt_bdevname(&buf, sb->bdev); + prt_str(&buf, " and "); + prt_bdevname(&buf, fs->bdev); + prt_char(&buf, ':'); + prt_newline(&buf); + + prt_bdevname(&buf, fs->bdev); + prt_str(&buf, "believes seq of "); + prt_bdevname(&buf, sb->bdev); + prt_printf(&buf, " to be %llu, but ", seq_from_fs); + prt_bdevname(&buf, sb->bdev); + prt_printf(&buf, " has %llu\n", seq_from_member); + prt_str(&buf, "Not using "); + prt_bdevname(&buf, sb->bdev); + + pr_err("%s", buf.buf); + printbuf_exit(&buf); return -BCH_ERR_device_splitbrain; } @@ -1367,9 +1382,14 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) bch2_dev_sysfs_online(c, ca); + struct printbuf name = PRINTBUF; + prt_bdevname(&name, ca->disk_sb.bdev); + if (c->sb.nr_devices == 1) - snprintf(c->name, sizeof(c->name), "%pg", ca->disk_sb.bdev); - snprintf(ca->name, sizeof(ca->name), "%pg", ca->disk_sb.bdev); + strlcpy(c->name, name.buf, sizeof(c->name)); + strlcpy(ca->name, name.buf, sizeof(ca->name)); + + printbuf_exit(&name); rebalance_wakeup(c); return 0; diff --git a/libbcachefs/thread_with_file.c b/libbcachefs/thread_with_file.c new file mode 100644 index 0000000..b1c867a --- /dev/null +++ b/libbcachefs/thread_with_file.c @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef NO_BCACHEFS_FS + +#include "bcachefs.h" +#include "printbuf.h" +#include "thread_with_file.h" + +#include +#include +#include +#include +#include + +void bch2_thread_with_file_exit(struct thread_with_file *thr) +{ + if (thr->task) { + kthread_stop(thr->task); + put_task_struct(thr->task); + } +} + +int bch2_run_thread_with_file(struct thread_with_file *thr, + const struct file_operations *fops, + int (*fn)(void *)) +{ + struct file *file = NULL; + int ret, fd = -1; + unsigned fd_flags = O_CLOEXEC; + + if (fops->read && fops->write) + fd_flags |= O_RDWR; + else if (fops->read) + fd_flags |= O_RDONLY; + else if (fops->write) + fd_flags |= O_WRONLY; + + char name[TASK_COMM_LEN]; + get_task_comm(name, current); + + thr->ret = 0; + thr->task = kthread_create(fn, thr, "%s", name); + ret = PTR_ERR_OR_ZERO(thr->task); + if (ret) + return ret; + + ret = get_unused_fd_flags(fd_flags); + if (ret < 0) + goto err; + fd = ret; + + file = anon_inode_getfile(name, fops, thr, fd_flags); + ret = PTR_ERR_OR_ZERO(file); + if (ret) + goto err; + + fd_install(fd, file); + get_task_struct(thr->task); + wake_up_process(thr->task); + return fd; +err: + if (fd >= 0) + put_unused_fd(fd); + if (thr->task) + kthread_stop(thr->task); + return ret; +} + +static inline bool thread_with_stdio_has_output(struct thread_with_stdio *thr) +{ + return thr->stdio.output_buf.pos || + thr->output2.nr || + thr->thr.done; +} + +static ssize_t thread_with_stdio_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct thread_with_stdio *thr = + container_of(file->private_data, struct thread_with_stdio, thr); + size_t copied = 0, b; + int ret = 0; + + if ((file->f_flags & O_NONBLOCK) && + !thread_with_stdio_has_output(thr)) + return -EAGAIN; + + ret = wait_event_interruptible(thr->stdio.output_wait, + thread_with_stdio_has_output(thr)); + if (ret) + return ret; + + if (thr->thr.done) + return 0; + + while (len) { + ret = darray_make_room(&thr->output2, thr->stdio.output_buf.pos); + if (ret) + break; + + spin_lock_irq(&thr->stdio.output_lock); + b = min_t(size_t, darray_room(thr->output2), thr->stdio.output_buf.pos); + + memcpy(&darray_top(thr->output2), thr->stdio.output_buf.buf, b); + memmove(thr->stdio.output_buf.buf, + thr->stdio.output_buf.buf + b, + thr->stdio.output_buf.pos - b); + + thr->output2.nr += b; + thr->stdio.output_buf.pos -= b; + spin_unlock_irq(&thr->stdio.output_lock); + + b = min(len, thr->output2.nr); + if (!b) + break; + + b -= copy_to_user(buf, thr->output2.data, b); + if (!b) { + ret = -EFAULT; + break; + } + + copied += b; + buf += b; + len -= b; + + memmove(thr->output2.data, + thr->output2.data + b, + thr->output2.nr - b); + thr->output2.nr -= b; + } + + return copied ?: ret; +} + +static int thread_with_stdio_release(struct inode *inode, struct file *file) +{ + struct thread_with_stdio *thr = + container_of(file->private_data, struct thread_with_stdio, thr); + + bch2_thread_with_file_exit(&thr->thr); + printbuf_exit(&thr->stdio.input_buf); + printbuf_exit(&thr->stdio.output_buf); + darray_exit(&thr->output2); + thr->exit(thr); + return 0; +} + +#define WRITE_BUFFER 4096 + +static inline bool thread_with_stdio_has_input_space(struct thread_with_stdio *thr) +{ + return thr->stdio.input_buf.pos < WRITE_BUFFER || thr->thr.done; +} + +static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct thread_with_stdio *thr = + container_of(file->private_data, struct thread_with_stdio, thr); + struct printbuf *buf = &thr->stdio.input_buf; + size_t copied = 0; + ssize_t ret = 0; + + while (len) { + if (thr->thr.done) { + ret = -EPIPE; + break; + } + + size_t b = len - fault_in_readable(ubuf, len); + if (!b) { + ret = -EFAULT; + break; + } + + spin_lock(&thr->stdio.input_lock); + if (buf->pos < WRITE_BUFFER) + bch2_printbuf_make_room(buf, min(b, WRITE_BUFFER - buf->pos)); + b = min(len, printbuf_remaining_size(buf)); + + if (b && !copy_from_user_nofault(&buf->buf[buf->pos], ubuf, b)) { + ubuf += b; + len -= b; + copied += b; + buf->pos += b; + } + spin_unlock(&thr->stdio.input_lock); + + if (b) { + wake_up(&thr->stdio.input_wait); + } else { + if ((file->f_flags & O_NONBLOCK)) { + ret = -EAGAIN; + break; + } + + ret = wait_event_interruptible(thr->stdio.input_wait, + thread_with_stdio_has_input_space(thr)); + if (ret) + break; + } + } + + return copied ?: ret; +} + +static __poll_t thread_with_stdio_poll(struct file *file, struct poll_table_struct *wait) +{ + struct thread_with_stdio *thr = + container_of(file->private_data, struct thread_with_stdio, thr); + + poll_wait(file, &thr->stdio.output_wait, wait); + poll_wait(file, &thr->stdio.input_wait, wait); + + __poll_t mask = 0; + + if (thread_with_stdio_has_output(thr)) + mask |= EPOLLIN; + if (thread_with_stdio_has_input_space(thr)) + mask |= EPOLLOUT; + if (thr->thr.done) + mask |= EPOLLHUP|EPOLLERR; + return mask; +} + +static const struct file_operations thread_with_stdio_fops = { + .release = thread_with_stdio_release, + .read = thread_with_stdio_read, + .write = thread_with_stdio_write, + .poll = thread_with_stdio_poll, + .llseek = no_llseek, +}; + +int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, + void (*exit)(struct thread_with_stdio *), + int (*fn)(void *)) +{ + thr->stdio.input_buf = PRINTBUF; + thr->stdio.input_buf.atomic++; + spin_lock_init(&thr->stdio.input_lock); + init_waitqueue_head(&thr->stdio.input_wait); + + thr->stdio.output_buf = PRINTBUF; + thr->stdio.output_buf.atomic++; + spin_lock_init(&thr->stdio.output_lock); + init_waitqueue_head(&thr->stdio.output_wait); + + darray_init(&thr->output2); + thr->exit = exit; + + return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, fn); +} + +int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *buf, size_t len) +{ + wait_event(stdio->input_wait, + stdio->input_buf.pos || stdio->done); + + if (stdio->done) + return -1; + + spin_lock(&stdio->input_lock); + int ret = min(len, stdio->input_buf.pos); + stdio->input_buf.pos -= ret; + memcpy(buf, stdio->input_buf.buf, ret); + memmove(stdio->input_buf.buf, + stdio->input_buf.buf + ret, + stdio->input_buf.pos); + spin_unlock(&stdio->input_lock); + + wake_up(&stdio->input_wait); + return ret; +} + +int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *buf, size_t len) +{ + wait_event(stdio->input_wait, + stdio->input_buf.pos || stdio->done); + + if (stdio->done) + return -1; + + spin_lock(&stdio->input_lock); + int ret = min(len, stdio->input_buf.pos); + char *n = memchr(stdio->input_buf.buf, '\n', ret); + if (n) + ret = min(ret, n + 1 - stdio->input_buf.buf); + stdio->input_buf.pos -= ret; + memcpy(buf, stdio->input_buf.buf, ret); + memmove(stdio->input_buf.buf, + stdio->input_buf.buf + ret, + stdio->input_buf.pos); + spin_unlock(&stdio->input_lock); + + wake_up(&stdio->input_wait); + return ret; +} + +#endif /* NO_BCACHEFS_FS */ diff --git a/libbcachefs/thread_with_file.h b/libbcachefs/thread_with_file.h new file mode 100644 index 0000000..05879c5 --- /dev/null +++ b/libbcachefs/thread_with_file.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_THREAD_WITH_FILE_H +#define _BCACHEFS_THREAD_WITH_FILE_H + +#include "thread_with_file_types.h" + +struct task_struct; + +struct thread_with_file { + struct task_struct *task; + int ret; + bool done; +}; + +void bch2_thread_with_file_exit(struct thread_with_file *); +int bch2_run_thread_with_file(struct thread_with_file *, + const struct file_operations *, + int (*fn)(void *)); + +struct thread_with_stdio { + struct thread_with_file thr; + struct stdio_redirect stdio; + DARRAY(char) output2; + void (*exit)(struct thread_with_stdio *); +}; + +static inline void thread_with_stdio_done(struct thread_with_stdio *thr) +{ + thr->thr.done = true; + thr->stdio.done = true; + wake_up(&thr->stdio.input_wait); + wake_up(&thr->stdio.output_wait); +} + +int bch2_run_thread_with_stdio(struct thread_with_stdio *, + void (*exit)(struct thread_with_stdio *), + int (*fn)(void *)); +int bch2_stdio_redirect_read(struct stdio_redirect *, char *, size_t); +int bch2_stdio_redirect_readline(struct stdio_redirect *, char *, size_t); + +#endif /* _BCACHEFS_THREAD_WITH_FILE_H */ diff --git a/libbcachefs/thread_with_file_types.h b/libbcachefs/thread_with_file_types.h new file mode 100644 index 0000000..90b5e64 --- /dev/null +++ b/libbcachefs/thread_with_file_types.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_THREAD_WITH_FILE_TYPES_H +#define _BCACHEFS_THREAD_WITH_FILE_TYPES_H + +struct stdio_redirect { + spinlock_t output_lock; + wait_queue_head_t output_wait; + struct printbuf output_buf; + + spinlock_t input_lock; + wait_queue_head_t input_wait; + struct printbuf input_buf; + bool done; +}; + +#endif /* _BCACHEFS_THREAD_WITH_FILE_TYPES_H */ diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h index 427edb3..c94876b 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/trace.h @@ -72,6 +72,27 @@ DECLARE_EVENT_CLASS(trans_str, __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str)) ); +DECLARE_EVENT_CLASS(trans_str_nocaller, + TP_PROTO(struct btree_trans *trans, const char *str), + TP_ARGS(trans, str), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __array(char, trans_fn, 32 ) + __string(str, str ) + ), + + TP_fast_assign( + __entry->dev = trans->c->dev; + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __assign_str(str, str); + ), + + TP_printk("%d,%d %s %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->trans_fn, __get_str(str)) +); + DECLARE_EVENT_CLASS(btree_node_nofs, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b), @@ -1243,11 +1264,10 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(trans_str, trans_restart_would_deadlock, +DEFINE_EVENT(trans_str_nocaller, trans_restart_would_deadlock, TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip, const char *cycle), - TP_ARGS(trans, caller_ip, cycle) + TP_ARGS(trans, cycle) ); DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit, diff --git a/libbcachefs/util.c b/libbcachefs/util.c index 2e4c5d9..c2ef7cd 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -267,7 +267,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines) console_unlock(); } -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task) +int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr) { #ifdef CONFIG_STACKTRACE unsigned nr_entries = 0; @@ -282,7 +282,7 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task) return -1; do { - nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, 0); + nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); } while (nr_entries == stack->size && !(ret = darray_make_room(stack, stack->size * 2))); @@ -303,10 +303,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) } } -int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task) +int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr) { bch_stacktrace stack = { 0 }; - int ret = bch2_save_backtrace(&stack, task); + int ret = bch2_save_backtrace(&stack, task, skipnr + 1); bch2_prt_backtrace(out, &stack); darray_exit(&stack); diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 4290e0a..c75fc31 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -347,9 +347,18 @@ void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); void bch2_print_string_as_lines(const char *prefix, const char *lines); typedef DARRAY(unsigned long) bch_stacktrace; -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *); +int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned); void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); -int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *); +int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned); + +static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev) +{ +#ifdef __KERNEL__ + prt_printf(out, "%pg", bdev); +#else + prt_str(out, bdev->name); +#endif +} #define NR_QUANTILES 15 #define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) -- 2.39.2