From: Kent Overstreet Date: Fri, 17 Feb 2023 22:51:22 +0000 (-0500) Subject: Update bcachefs sources to 1b14994029 bcachefs: Fragmentation LRU X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=46a6b9210c927ab46fd1227cb6f641be0b4a7505;p=bcachefs-tools-debian Update bcachefs sources to 1b14994029 bcachefs: Fragmentation LRU --- diff --git a/.bcachefs_revision b/.bcachefs_revision index bfcbcf5..7b2b6b4 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -8dbfede1d9e6483c682956c7c8a4900a65f98dde +1b149940290c0ef39070b4afaadab84a65bba034 diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 10e51bb..b9dca1d 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -723,8 +723,8 @@ TRACE_EVENT(move_data, TRACE_EVENT(evacuate_bucket, TP_PROTO(struct bch_fs *c, struct bpos *bucket, unsigned sectors, unsigned bucket_size, - int ret), - TP_ARGS(c, bucket, sectors, bucket_size, ret), + u64 fragmentation, int ret), + TP_ARGS(c, bucket, sectors, bucket_size, fragmentation, ret), TP_STRUCT__entry( __field(dev_t, dev ) @@ -732,6 +732,7 @@ TRACE_EVENT(evacuate_bucket, __field(u64, bucket ) __field(u32, sectors ) __field(u32, bucket_size ) + __field(u64, fragmentation ) __field(int, ret ) ), @@ -741,14 +742,15 @@ TRACE_EVENT(evacuate_bucket, __entry->bucket = bucket->offset; __entry->sectors = sectors; __entry->bucket_size = bucket_size; + __entry->fragmentation = fragmentation; __entry->ret = ret; ), - TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i", + TP_printk("%d,%d %llu:%llu sectors %u/%u fragmentation %llu ret %i", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->member, __entry->bucket, __entry->sectors, __entry->bucket_size, - __entry->ret) + __entry->fragmentation, __entry->ret) ); TRACE_EVENT(copygc, diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 2db4436..755faa3 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -9,6 +9,7 @@ #include "btree_update.h" #include "btree_update_interior.h" #include "btree_gc.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "buckets_waiting_for_journal.h" #include "clock.h" @@ -414,6 +415,8 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_newline(out); prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]); prt_newline(out); + prt_printf(out, "fragmentation %llu", a->fragmentation_lru); + prt_newline(out); prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a)); prt_newline(out); @@ -909,8 +912,8 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, !new_a->io_time[READ]) new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - old_lru = alloc_lru_idx(*old_a); - new_lru = alloc_lru_idx(*new_a); + old_lru = alloc_lru_idx_read(*old_a); + new_lru = alloc_lru_idx_read(*new_a); if (old_lru != new_lru) { ret = bch2_lru_change(trans, new->k.p.inode, @@ -920,6 +923,18 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, return ret; } + new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, + bch_dev_bkey_exists(c, new->k.p.inode)); + + if (old_a->fragmentation_lru != new_a->fragmentation_lru) { + ret = bch2_lru_change(trans, + BCH_LRU_FRAGMENTATION_START, + bucket_to_u64(new->k.p), + old_a->fragmentation_lru, new_a->fragmentation_lru); + if (ret) + return ret; + } + if (old_a->gen != new_a->gen) { ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen); if (ret) @@ -1775,15 +1790,11 @@ static int invalidate_one_bucket(struct btree_trans *trans, if (ret) goto out; - if (lru_pos_time(lru_iter->pos) != alloc_lru_idx(a->v)) { - prt_str(&buf, "alloc key does not point back to lru entry when invalidating bucket:"); - goto err; - } + /* We expect harmless races here due to the btree write buffer: */ + if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v)) + goto out; - if (a->v.data_type != BCH_DATA_cached) { - prt_str(&buf, "lru entry points to non cached bucket:"); - goto err; - } + BUG_ON(a->v.data_type != BCH_DATA_cached); if (!a->v.cached_sectors) bch_err(c, "invalidating empty bucket, confused"); @@ -1845,6 +1856,10 @@ static void bch2_do_invalidates_work(struct work_struct *work) bch2_trans_init(&trans, c, 0, 0); + ret = bch2_btree_write_buffer_flush(&trans); + if (ret) + goto err; + for_each_member_device(ca, c, i) { s64 nr_to_invalidate = should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); @@ -1860,7 +1875,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) break; } } - +err: bch2_trans_exit(&trans); bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); } diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index b3c2f1e..96ac8f3 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -64,11 +64,24 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a, a.stripe, a, data_type); } -static inline u64 alloc_lru_idx(struct bch_alloc_v4 a) +static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; } +static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, + struct bch_dev *ca) +{ + if (a.data_type != BCH_DATA_btree && + a.data_type != BCH_DATA_user) + return 0; + + if (a.dirty_sectors >= ca->mi.bucket_size) + return 0; + + return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size); +} + static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a) { return ((u64) alloc_gc_gen(a) >> 4) << 56; diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index c269fc7..e001f41 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -738,7 +738,7 @@ static size_t btree_nodes_fit_in_ram(struct bch_fs *c) si_meminfo(&i); mem_bytes = i.totalram * i.mem_unit; - return (mem_bytes >> 1) / btree_bytes(c); + return div_u64(mem_bytes >> 1, btree_bytes(c)); } int bch2_get_btree_in_memory_pos(struct btree_trans *trans, diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index c862012..b10e39d 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -927,7 +927,6 @@ struct bch_fs { /* COPYGC */ struct task_struct *copygc_thread; - copygc_heap copygc_heap; struct write_point copygc_write_point; s64 copygc_wait; bool copygc_running; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 7574ecc..fdd0050 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -988,6 +988,7 @@ struct bch_alloc_v4 { __u64 io_time[2]; __u32 stripe; __u32 nr_external_backpointers; + __u64 fragmentation_lru; } __packed __aligned(8); #define BCH_ALLOC_V4_U64s_V0 6 @@ -1559,7 +1560,8 @@ struct bch_sb_field_journal_seq_blacklist { x(inode_v3, 23) \ x(unwritten_extents, 24) \ x(bucket_gens, 25) \ - x(lru_v2, 26) + x(lru_v2, 26) \ + x(fragmentation_lru, 27) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 3977bb1..dc2b2a0 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1174,17 +1174,10 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans, path->uptodate = BTREE_ITER_UPTODATE; out: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted) { - struct printbuf buf = PRINTBUF; - - prt_printf(&buf, "ret %s (%i) trans->restarted %s (%i)\n", - bch2_err_str(ret), ret, - bch2_err_str(trans->restarted), trans->restarted); -#ifdef CONFIG_BCACHEFS_DEBUG - bch2_prt_backtrace(&buf, &trans->last_restarted); -#endif - panic("%s", buf.buf); - } + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted) + panic("ret %s (%i) trans->restarted %s (%i)\n", + bch2_err_str(ret), ret, + bch2_err_str(trans->restarted), trans->restarted); bch2_btree_path_verify(trans, path); return ret; } @@ -1367,14 +1360,14 @@ void bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count) { panic("trans->restart_count %u, should be %u, last restarted by %pS\n", trans->restart_count, restart_count, - (void *) trans->last_begin_ip); + (void *) trans->last_restarted_ip); } void bch2_trans_in_restart_error(struct btree_trans *trans) { panic("in transaction restart: %s, last restarted by %pS\n", bch2_err_str(trans->restarted), - (void *) trans->last_begin_ip); + (void *) trans->last_restarted_ip); } noinline __cold @@ -2872,7 +2865,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10)))) bch2_trans_reset_srcu_lock(trans); - trans->last_begin_ip = _RET_IP_; + trans->last_restarted_ip = _RET_IP_; if (trans->restarted) { bch2_btree_path_traverse_all(trans); trans->notrace_relock_fail = false; @@ -3053,10 +3046,6 @@ void bch2_trans_exit(struct btree_trans *trans) if (trans->paths) mempool_free(trans->paths, &c->btree_paths_pool); -#ifdef CONFIG_BCACHEFS_DEBUG - darray_exit(&trans->last_restarted); -#endif - trans->mem = (void *) 0x1; trans->paths = (void *) 0x1; } diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index bbbbe52..0ede02c 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -251,10 +251,6 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er BUG_ON(err <= 0); BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart)); -#ifdef CONFIG_BCACHEFS_DEBUG - bch2_save_backtrace(&trans->last_restarted, current); -#endif - trans->restarted = err; return -err; } diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 97ff267..ad73cd2 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -442,10 +442,7 @@ struct btree_trans { bool notrace_relock_fail:1; enum bch_errcode restarted:16; u32 restart_count; -#ifdef CONFIG_BCACHEFS_DEBUG - bch_stacktrace last_restarted; -#endif - unsigned long last_begin_ip; + unsigned long last_restarted_ip; unsigned long srcu_lock_time; /* diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 771e4b2..ee1d159 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -58,6 +58,9 @@ int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *, unsigned, unsigned); int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); +int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id, + struct bkey_i *, enum btree_update_flags); + int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *, enum btree_update_flags); int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 0195b13..20ad798 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -56,9 +56,10 @@ static void verify_update_old_key(struct btree_trans *trans, struct btree_insert k = bkey_i_to_s_c(j_k); } - i->old_k.needs_whiteout = k.k->needs_whiteout; + u = *k.k; + u.needs_whiteout = i->old_k.needs_whiteout; - BUG_ON(memcmp(&i->old_k, k.k, sizeof(struct bkey))); + BUG_ON(memcmp(&i->old_k, &u, sizeof(struct bkey))); BUG_ON(i->old_v != k.v); #endif } @@ -1306,12 +1307,45 @@ static noinline int extent_back_merge(struct btree_trans *trans, return 0; } +/* + * When deleting, check if we need to emit a whiteout (because we're overwriting + * something in an ancestor snapshot) + */ +static int need_whiteout_for_snapshot(struct btree_trans *trans, + enum btree_id btree_id, struct bpos pos) +{ + struct btree_iter iter; + struct bkey_s_c k; + u32 snapshot = pos.snapshot; + int ret; + + if (!bch2_snapshot_parent(trans->c, pos.snapshot)) + return 0; + + pos.snapshot++; + + for_each_btree_key_norestart(trans, iter, btree_id, pos, + BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_NOPRESERVE, k, ret) { + if (!bkey_eq(k.k->p, pos)) + break; + + if (bch2_snapshot_is_ancestor(trans->c, snapshot, + k.k->p.snapshot)) { + ret = !bkey_whiteout(k.k); + break; + } + } + bch2_trans_iter_exit(trans, &iter); + + return ret; +} int bch2_trans_update_extent(struct btree_trans *trans, struct btree_iter *orig_iter, struct bkey_i *insert, enum btree_update_flags flags) { - struct btree_iter iter, update_iter; + struct btree_iter iter; struct bpos start = bkey_start_pos(&insert->k); struct bkey_i *update; struct bkey_s_c k; @@ -1359,16 +1393,8 @@ int bch2_trans_update_extent(struct btree_trans *trans, bch2_cut_back(start, update); - bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(&update_iter) ?: - bch2_trans_update(trans, &update_iter, update, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| - flags); - bch2_trans_iter_exit(trans, &update_iter); - + ret = bch2_btree_insert_nonextent(trans, btree_id, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); if (ret) goto err; } @@ -1382,15 +1408,8 @@ int bch2_trans_update_extent(struct btree_trans *trans, bch2_cut_front(start, update); bch2_cut_back(insert->k.p, update); - bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(&update_iter) ?: - bch2_trans_update(trans, &update_iter, update, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| - flags); - bch2_trans_iter_exit(trans, &update_iter); + ret = bch2_btree_insert_nonextent(trans, btree_id, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); if (ret) goto err; } @@ -1402,21 +1421,15 @@ int bch2_trans_update_extent(struct btree_trans *trans, bkey_init(&update->k); update->k.p = k.k->p; + update->k.p.snapshot = insert->k.p.snapshot; - if (insert->k.p.snapshot != k.k->p.snapshot) { - update->k.p.snapshot = insert->k.p.snapshot; + if (insert->k.p.snapshot != k.k->p.snapshot || + (btree_type_has_snapshots(btree_id) && + need_whiteout_for_snapshot(trans, btree_id, update->k.p))) update->k.type = KEY_TYPE_whiteout; - } - - bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(&update_iter) ?: - bch2_trans_update(trans, &update_iter, update, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| - flags); - bch2_trans_iter_exit(trans, &update_iter); + ret = bch2_btree_insert_nonextent(trans, btree_id, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); if (ret) goto err; } @@ -1468,40 +1481,6 @@ err: return ret; } -/* - * When deleting, check if we need to emit a whiteout (because we're overwriting - * something in an ancestor snapshot) - */ -static int need_whiteout_for_snapshot(struct btree_trans *trans, - enum btree_id btree_id, struct bpos pos) -{ - struct btree_iter iter; - struct bkey_s_c k; - u32 snapshot = pos.snapshot; - int ret; - - if (!bch2_snapshot_parent(trans->c, pos.snapshot)) - return 0; - - pos.snapshot++; - - for_each_btree_key_norestart(trans, iter, btree_id, pos, - BTREE_ITER_ALL_SNAPSHOTS| - BTREE_ITER_NOPRESERVE, k, ret) { - if (!bkey_eq(k.k->p, pos)) - break; - - if (bch2_snapshot_is_ancestor(trans->c, snapshot, - k.k->p.snapshot)) { - ret = !bkey_whiteout(k.k); - break; - } - } - bch2_trans_iter_exit(trans, &iter); - - return ret; -} - static int __must_check bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *path, struct bkey_i *k, enum btree_update_flags flags, @@ -1747,8 +1726,23 @@ void bch2_trans_commit_hook(struct btree_trans *trans, trans->hooks = h; } -int __bch2_btree_insert(struct btree_trans *trans, - enum btree_id id, +int bch2_btree_insert_nonextent(struct btree_trans *trans, + enum btree_id btree, struct bkey_i *k, + enum btree_update_flags flags) +{ + struct btree_iter iter; + int ret; + + bch2_trans_iter_init(trans, &iter, btree, k->k.p, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, flags); + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int __bch2_btree_insert(struct btree_trans *trans, enum btree_id id, struct bkey_i *k, enum btree_update_flags flags) { struct btree_iter iter; diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c index 05b755a..6285532 100644 --- a/libbcachefs/btree_write_buffer.c +++ b/libbcachefs/btree_write_buffer.c @@ -88,6 +88,8 @@ static union btree_write_buffer_state btree_write_buffer_switch(struct btree_wri while (old.idx == 0 ? wb->state.ref0 : wb->state.ref1) cpu_relax(); + smp_mb(); + return old; } diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index 1dbba7d..2a9dab9 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -89,15 +89,4 @@ struct disk_reservation { unsigned nr_replicas; }; -struct copygc_heap_entry { - u8 dev; - u8 gen; - u8 replicas; - u32 fragmentation; - u32 sectors; - u64 bucket; -}; - -typedef HEAP(struct copygc_heap_entry) copygc_heap; - #endif /* _BUCKETS_TYPES_H */ diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 199e894..de0575f 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -22,9 +22,10 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans, struct bpos new_pos) { struct bch_fs *c = trans->c; - struct btree_iter iter, update_iter; - struct bkey_s_c k; + struct btree_iter iter, iter2; + struct bkey_s_c k, k2; snapshot_id_list s; + struct bkey_i *update; int ret; if (!btree_type_has_snapshots(id)) @@ -32,10 +33,7 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans, darray_init(&s); - if (bkey_eq(old_pos, new_pos)) - return 0; - - if (!snapshot_t(c, old_pos.snapshot)->children[0]) + if (!bch2_snapshot_has_children(c, old_pos.snapshot)) return 0; bch2_trans_iter_init(trans, &iter, id, old_pos, @@ -47,33 +45,39 @@ static int insert_snapshot_whiteouts(struct btree_trans *trans, if (ret) break; + if (!k.k) + break; + if (!bkey_eq(old_pos, k.k->p)) break; - if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) { - struct bkey_i *update; + if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot) && + !snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) { + struct bpos whiteout_pos = new_pos; - if (snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) - continue; + whiteout_pos.snapshot = k.k->p.snapshot; - update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); + bch2_trans_iter_init(trans, &iter2, id, whiteout_pos, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_INTENT); + k2 = bch2_btree_iter_peek_slot(&iter2); + ret = bkey_err(k2); - ret = PTR_ERR_OR_ZERO(update); - if (ret) - break; + if (!ret && k2.k->type == KEY_TYPE_deleted) { + update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); + ret = PTR_ERR_OR_ZERO(update); + if (ret) + break; - bkey_init(&update->k); - update->k.p = new_pos; - update->k.p.snapshot = k.k->p.snapshot; + bkey_init(&update->k); + update->k.p = whiteout_pos; + update->k.type = KEY_TYPE_whiteout; + + ret = bch2_trans_update(trans, &iter2, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + } + bch2_trans_iter_exit(trans, &iter2); - bch2_trans_iter_init(trans, &update_iter, id, update->k.p, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(&update_iter) ?: - bch2_trans_update(trans, &update_iter, update, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); - bch2_trans_iter_exit(trans, &update_iter); if (ret) break; @@ -229,9 +233,21 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, next_pos = insert->k.p; - ret = insert_snapshot_whiteouts(trans, m->btree_id, - k.k->p, insert->k.p) ?: - bch2_trans_update(trans, &iter, insert, + if (!bkey_eq(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { + ret = insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, + bkey_start_pos(&insert->k)); + if (ret) + goto err; + } + + if (!bkey_eq(insert->k.p, k.k->p)) { + ret = insert_snapshot_whiteouts(trans, m->btree_id, + k.k->p, insert->k.p); + if (ret) + goto err; + } + + ret = bch2_trans_update(trans, &iter, insert, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: bch2_trans_commit(trans, &op->res, NULL, diff --git a/libbcachefs/error.c b/libbcachefs/error.c index c2882c5..1dae649 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -98,7 +98,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) INIT_LIST_HEAD(&s->list); s->fmt = fmt; - s->buf = PRINTBUF; list_add(&s->list, &c->fsck_errors); return s; } @@ -111,9 +110,23 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; + va_start(args, fmt); + prt_vprintf(out, fmt, args); + va_end(args); + mutex_lock(&c->fsck_error_lock); s = fsck_err_get(c, fmt); if (s) { + if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { + ret = s->ret; + mutex_unlock(&c->fsck_error_lock); + printbuf_exit(&buf); + return ret; + } + + kfree(s->last_msg); + s->last_msg = kstrdup(buf.buf, GFP_KERNEL); + if (c->opts.ratelimit_errors && !(flags & FSCK_NO_RATELIMIT) && s->nr >= FSCK_ERR_RATELIMIT_NR) { @@ -123,8 +136,6 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) print = false; } - printbuf_reset(&s->buf); - out = &s->buf; s->nr++; } @@ -133,10 +144,6 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) prt_printf(out, bch2_log_msg(c, "")); #endif - va_start(args, fmt); - prt_vprintf(out, fmt, args); - va_end(args); - if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { @@ -190,6 +197,9 @@ int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...) else if (suppressing) bch_err(c, "Ratelimiting new instances of previous error"); + if (s) + s->ret = ret; + mutex_unlock(&c->fsck_error_lock); printbuf_exit(&buf); @@ -214,11 +224,11 @@ void bch2_flush_fsck_errs(struct bch_fs *c) mutex_lock(&c->fsck_error_lock); list_for_each_entry_safe(s, n, &c->fsck_errors, list) { - if (s->ratelimited) - bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf.buf); + if (s->ratelimited && s->last_msg) + bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); list_del(&s->list); - printbuf_exit(&s->buf); + kfree(s->last_msg); kfree(s); } diff --git a/libbcachefs/error.h b/libbcachefs/error.h index 9991879..91c7e4e 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -103,7 +103,8 @@ struct fsck_err_state { const char *fmt; u64 nr; bool ratelimited; - struct printbuf buf; + int ret; + char *last_msg; }; #define FSCK_CAN_FIX (1 << 0) diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 52bb00b..f2768a7 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -605,6 +605,17 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s, : bch2_snapshot_is_ancestor(c, src, dst); } +static int ref_visible2(struct bch_fs *c, + u32 src, struct snapshots_seen *src_seen, + u32 dst, struct snapshots_seen *dst_seen) +{ + if (dst > src) { + swap(dst, src); + swap(dst_seen, src_seen); + } + return key_visible_in_snapshot(c, src_seen, dst, src); +} + #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ (_i)->snapshot <= (_snapshot); _i++) \ @@ -1158,10 +1169,102 @@ fsck_err: return ret; } +struct extent_end { + u32 snapshot; + u64 offset; + struct snapshots_seen seen; +}; + +typedef DARRAY(struct extent_end) extent_ends; + +static int check_overlapping_extents(struct btree_trans *trans, + struct snapshots_seen *seen, + extent_ends *extent_ends, + struct bkey_s_c k, + struct btree_iter *iter) +{ + struct bch_fs *c = trans->c; + struct extent_end *i; + struct printbuf buf = PRINTBUF; + int ret = 0; + + darray_for_each(*extent_ends, i) { + /* duplicate, due to transaction restart: */ + if (i->offset == k.k->p.offset && + i->snapshot == k.k->p.snapshot) + continue; + + if (!ref_visible2(c, + k.k->p.snapshot, seen, + i->snapshot, &i->seen)) + continue; + + if (fsck_err_on(i->offset > bkey_start_offset(k.k), c, + "overlapping extents: extent in snapshot %u ends at %llu overlaps with\n%s", + i->snapshot, + i->offset, + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + struct bkey_i *update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + bkey_reassemble(update, k); + ret = bch2_trans_update_extent(trans, iter, update, 0); + if (!ret) + goto err; + } + } +err: +fsck_err: + printbuf_exit(&buf); + return ret; +} + +static int extent_ends_at(extent_ends *extent_ends, + struct snapshots_seen *seen, + struct bkey_s_c k) +{ + struct extent_end *i, n = (struct extent_end) { + .snapshot = k.k->p.snapshot, + .offset = k.k->p.offset, + .seen = *seen, + }; + + n.seen.ids.data = kmemdup(seen->ids.data, + sizeof(seen->ids.data[0]) * seen->ids.size, + GFP_KERNEL); + if (!n.seen.ids.data) + return -ENOMEM; + + darray_for_each(*extent_ends, i) { + if (i->snapshot == k.k->p.snapshot) { + snapshots_seen_exit(&i->seen); + *i = n; + return 0; + } + + if (i->snapshot >= k.k->p.snapshot) + break; + } + + return darray_insert_item(extent_ends, i - extent_ends->data, n); +} + +static void extent_ends_reset(extent_ends *extent_ends) +{ + struct extent_end *i; + + darray_for_each(*extent_ends, i) + snapshots_seen_exit(&i->seen); + + extent_ends->nr = 0; +} + static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, struct inode_walker *inode, - struct snapshots_seen *s) + struct snapshots_seen *s, + extent_ends *extent_ends) { struct bch_fs *c = trans->c; struct inode_walker_entry *i; @@ -1189,24 +1292,20 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, ret = check_i_sectors(trans, inode); if (ret) goto err; + + extent_ends_reset(extent_ends); } BUG_ON(!iter->path->should_be_locked); -#if 0 - if (bkey_gt(prev.k->k.p, bkey_start_pos(k.k))) { - char buf1[200]; - char buf2[200]; - bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k)); - bch2_bkey_val_to_text(&PBUF(buf2), c, k); + ret = check_overlapping_extents(trans, s, extent_ends, k, iter); + if (ret) + goto err; + + ret = extent_ends_at(extent_ends, s, k); + if (ret) + goto err; - if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) { - ret = fix_overlapping_extent(trans, k, prev.k->k.p) - ?: -BCH_ERR_transaction_restart_nested; - goto out; - } - } -#endif ret = __walk_inode(trans, inode, equiv); if (ret < 0) goto err; @@ -1304,13 +1403,9 @@ static int check_extents(struct bch_fs *c) struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + extent_ends extent_ends = { 0 }; int ret = 0; -#if 0 - struct bkey_buf prev; - bch2_bkey_buf_init(&prev); - prev.k->k = KEY(0, 0, 0); -#endif snapshots_seen_init(&s); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); @@ -1321,10 +1416,10 @@ static int check_extents(struct bch_fs *c) BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, - check_extent(&trans, &iter, k, &w, &s)); -#if 0 - bch2_bkey_buf_exit(&prev, c); -#endif + check_extent(&trans, &iter, k, &w, &s, &extent_ends)); + + extent_ends_reset(&extent_ends); + darray_exit(&extent_ends); inode_walker_exit(&w); bch2_trans_exit(&trans); snapshots_seen_exit(&s); diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index 07fb41c..9eec12a 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -49,7 +49,6 @@ void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru) static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time, unsigned key_type) { - struct btree_iter iter; struct bkey_i *k; int ret = 0; @@ -69,13 +68,7 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, EBUG_ON(lru_pos_time(k->k.p) != time); EBUG_ON(k->k.p.offset != dev_bucket); - bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, - k->k.p, BTREE_ITER_INTENT); - - ret = bch2_btree_iter_traverse(&iter) ?: - bch2_trans_update(trans, &iter, k, 0); - bch2_trans_iter_exit(trans, &iter); - return ret; + return bch2_trans_update_buffered(trans, BTREE_ID_lru, k); } int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) @@ -99,6 +92,13 @@ int bch2_lru_change(struct btree_trans *trans, bch2_lru_set(trans, lru_id, dev_bucket, new_time); } +static const char * const bch2_lru_types[] = { +#define x(n) #n, + BCH_LRU_TYPES() +#undef x + NULL +}; + static int bch2_check_lru_key(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k) @@ -110,7 +110,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, const struct bch_alloc_v4 *a; struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; + enum bch_lru_type type = lru_type(lru_k); struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset); + u64 idx; int ret; if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c, @@ -126,11 +128,21 @@ static int bch2_check_lru_key(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &a_convert); + switch (type) { + case BCH_LRU_read: + idx = alloc_lru_idx_read(*a); + break; + case BCH_LRU_fragmentation: + idx = a->fragmentation_lru; + break; + } + if (fsck_err_on(lru_k.k->type != KEY_TYPE_set || - a->data_type != BCH_DATA_cached || - a->io_time[READ] != lru_pos_time(lru_k.k->p), c, - "incorrect lru entry (time %llu) %s\n" + lru_pos_time(lru_k.k->p) != idx, c, + "incorrect lru entry: lru %s time %llu\n" + " %s\n" " for %s", + bch2_lru_types[type], lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) { diff --git a/libbcachefs/lru.h b/libbcachefs/lru.h index b8d9848..78a6076 100644 --- a/libbcachefs/lru.h +++ b/libbcachefs/lru.h @@ -22,6 +22,27 @@ static inline u64 lru_pos_time(struct bpos pos) return pos.inode & ~(~0ULL << LRU_TIME_BITS); } +#define BCH_LRU_TYPES() \ + x(read) \ + x(fragmentation) + +enum bch_lru_type { +#define x(n) BCH_LRU_##n, + BCH_LRU_TYPES() +#undef x +}; + +#define BCH_LRU_FRAGMENTATION_START ((1U << 16) - 1) + +static inline enum bch_lru_type lru_type(struct bkey_s_c l) +{ + u16 lru_id = l.k->p.inode >> 48; + + if (lru_id == BCH_LRU_FRAGMENTATION_START) + return BCH_LRU_fragmentation; + return BCH_LRU_read; +} + int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/libbcachefs/move.c b/libbcachefs/move.c index e7eb55b..7dac926 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -653,13 +653,13 @@ failed_to_evacuate: printbuf_exit(&buf); } -int __bch2_evacuate_bucket(struct moving_context *ctxt, +int __bch2_evacuate_bucket(struct btree_trans *trans, + struct moving_context *ctxt, struct bpos bucket, int gen, struct data_update_opts _data_opts) { struct bch_fs *c = ctxt->c; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); - struct btree_trans trans; struct btree_iter iter; struct bkey_buf sk; struct bch_backpointer bp; @@ -668,17 +668,17 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, struct bkey_s_c k; struct data_update_opts data_opts; unsigned dirty_sectors, bucket_size; + u64 fragmentation; u64 bp_offset = 0, cur_inum = U64_MAX; int ret = 0; bch2_bkey_buf_init(&sk); - bch2_trans_init(&trans, c, 0, 0); - bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_CACHED); - ret = lockrestart_do(&trans, + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (ret) { bch_err(c, "%s: error looking up alloc key: %s", __func__, bch2_err_str(ret)); @@ -688,17 +688,18 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, a = bch2_alloc_to_v4(k, &a_convert); dirty_sectors = a->dirty_sectors; bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; + fragmentation = a->fragmentation_lru; - ret = bch2_btree_write_buffer_flush(&trans); + ret = bch2_btree_write_buffer_flush(trans); if (ret) { bch_err(c, "%s: error flushing btree write buffer: %s", __func__, bch2_err_str(ret)); goto err; } - while (!(ret = move_ratelimit(&trans, ctxt))) { - bch2_trans_begin(&trans); + while (!(ret = move_ratelimit(trans, ctxt))) { + bch2_trans_begin(trans); - ret = bch2_get_next_backpointer(&trans, bucket, gen, + ret = bch2_get_next_backpointer(trans, bucket, gen, &bp_offset, &bp, BTREE_ITER_CACHED); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -713,7 +714,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, struct bkey_s_c k; unsigned i = 0; - k = bch2_backpointer_get_key(&trans, &iter, + k = bch2_backpointer_get_key(trans, &iter, bucket, bp_offset, bp); ret = bkey_err(k); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -726,9 +727,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum); + ret = move_get_io_opts(trans, &io_opts, k, &cur_inum); if (ret) { - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); continue; } @@ -742,15 +743,15 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, i++; } - ret = bch2_move_extent(&trans, &iter, ctxt, io_opts, + ret = bch2_move_extent(trans, &iter, ctxt, io_opts, bp.btree_id, k, data_opts); - bch2_trans_iter_exit(&trans, &iter); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret == -ENOMEM) { /* memory allocation failure, wait for some IO to finish */ - bch2_move_ctxt_wait_for_io(ctxt, &trans); + bch2_move_ctxt_wait_for_io(ctxt, trans); continue; } if (ret) @@ -762,7 +763,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, } else { struct btree *b; - b = bch2_backpointer_get_node(&trans, &iter, + b = bch2_backpointer_get_node(trans, &iter, bucket, bp_offset, bp); ret = PTR_ERR_OR_ZERO(b); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) @@ -774,8 +775,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, if (!b) goto next; - ret = bch2_btree_node_rewrite(&trans, &iter, b, 0); - bch2_trans_iter_exit(&trans, &iter); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0); + bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; @@ -792,17 +793,16 @@ next: bp_offset++; } - trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, ret); + trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, fragmentation, ret); if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) { - bch2_trans_unlock(&trans); + bch2_trans_unlock(trans); move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); closure_sync(&ctxt->cl); if (!ctxt->write_error) - verify_bucket_evacuated(&trans, bucket, gen); + verify_bucket_evacuated(trans, bucket, gen); } err: - bch2_trans_exit(&trans); bch2_bkey_buf_exit(&sk, c); return ret; } @@ -815,12 +815,15 @@ int bch2_evacuate_bucket(struct bch_fs *c, struct write_point_specifier wp, bool wait_on_copygc) { + struct btree_trans trans; struct moving_context ctxt; int ret; + bch2_trans_init(&trans, c, 0, 0); bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(&ctxt, bucket, gen, data_opts); + ret = __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts); bch2_moving_ctxt_exit(&ctxt); + bch2_trans_exit(&trans); return ret; } diff --git a/libbcachefs/move.h b/libbcachefs/move.h index aef6138..c5a7c0a 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -66,7 +66,8 @@ int bch2_move_data(struct bch_fs *, bool, move_pred_fn, void *); -int __bch2_evacuate_bucket(struct moving_context *, +int __bch2_evacuate_bucket(struct btree_trans *, + struct moving_context *, struct bpos, int, struct data_update_opts); int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int, diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index f0ab65f..1a64643 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -10,6 +10,7 @@ #include "alloc_foreground.h" #include "btree_iter.h" #include "btree_update.h" +#include "btree_write_buffer.h" #include "buckets.h" #include "clock.h" #include "disk_groups.h" @@ -19,6 +20,7 @@ #include "eytzinger.h" #include "io.h" #include "keylist.h" +#include "lru.h" #include "move.h" #include "movinggc.h" #include "super-io.h" @@ -31,138 +33,105 @@ #include #include -static inline int fragmentation_cmp(copygc_heap *heap, - struct copygc_heap_entry l, - struct copygc_heap_entry r) +static int bch2_bucket_is_movable(struct btree_trans *trans, + struct bpos bucket, u64 time, u8 *gen) { - return cmp_int(l.fragmentation, r.fragmentation); -} - -static int find_buckets_to_copygc(struct bch_fs *c) -{ - copygc_heap *h = &c->copygc_heap; - struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; + struct bch_alloc_v4 _a; + const struct bch_alloc_v4 *a; int ret; - bch2_trans_init(&trans, c, 0, 0); + if (bch2_bucket_is_open(trans->c, bucket.inode, bucket.offset)) + return 0; - /* - * Find buckets with lowest sector counts, skipping completely - * empty buckets, by building a maxheap sorted by sector count, - * and repeatedly replacing the maximum element until all - * buckets have been visited. - */ - h->used = 0; - - for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - struct bch_dev *ca = bch_dev_bkey_exists(c, iter.pos.inode); - struct copygc_heap_entry e; - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a; - - a = bch2_alloc_to_v4(k, &a_convert); - - if ((a->data_type != BCH_DATA_btree && - a->data_type != BCH_DATA_user) || - a->dirty_sectors >= ca->mi.bucket_size || - bch2_bucket_is_open(c, iter.pos.inode, iter.pos.offset)) - continue; + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, 0); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + bch2_trans_iter_exit(trans, &iter); + + if (ret) + return ret; - e = (struct copygc_heap_entry) { - .dev = iter.pos.inode, - .gen = a->gen, - .replicas = 1 + a->stripe_redundancy, - .fragmentation = div_u64((u64) a->dirty_sectors * (1ULL << 31), - ca->mi.bucket_size), - .sectors = a->dirty_sectors, - .bucket = iter.pos.offset, - }; - heap_add_or_replace(h, e, -fragmentation_cmp, NULL); + a = bch2_alloc_to_v4(k, &_a); + *gen = a->gen; + ret = (a->data_type == BCH_DATA_btree || + a->data_type == BCH_DATA_user) && + a->fragmentation_lru && + a->fragmentation_lru <= time; + if (ret) { + struct printbuf buf = PRINTBUF; + + bch2_bkey_val_to_text(&buf, trans->c, k); + pr_debug("%s", buf.buf); + printbuf_exit(&buf); } - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); return ret; } +static int bch2_copygc_next_bucket(struct btree_trans *trans, + struct bpos *bucket, u8 *gen, struct bpos *pos) +{ + struct btree_iter iter; + struct bkey_s_c k; + int ret; + + ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru, + bpos_max(*pos, lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0)), + lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), + 0, k, ({ + *bucket = u64_to_bucket(k.k->p.offset); + + bch2_bucket_is_movable(trans, *bucket, lru_pos_time(k.k->p), gen); + })); + + *pos = iter.pos; + if (ret < 0) + return ret; + return ret ? 0 : -ENOENT; +} + static int bch2_copygc(struct bch_fs *c) { - copygc_heap *h = &c->copygc_heap; - struct copygc_heap_entry e; struct bch_move_stats move_stats; - struct bch_dev *ca; - unsigned dev_idx; - size_t heap_size = 0; + struct btree_trans trans; struct moving_context ctxt; struct data_update_opts data_opts = { .btree_insert_flags = BTREE_INSERT_USE_RESERVE|JOURNAL_WATERMARK_copygc, }; + struct bpos bucket; + struct bpos pos; + u8 gen = 0; + unsigned nr_evacuated; int ret = 0; bch2_move_stats_init(&move_stats, "copygc"); - - for_each_rw_member(ca, c, dev_idx) - heap_size += ca->mi.nbuckets >> 7; - - if (h->size < heap_size) { - free_heap(&c->copygc_heap); - if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) { - bch_err(c, "error allocating copygc heap"); - return 0; - } - } - - ret = find_buckets_to_copygc(c); - if (ret) { - bch2_fs_fatal_error(c, "error walking buckets to copygc!"); - return ret; - } - - if (!h->used) { - s64 wait = S64_MAX, dev_wait; - u64 dev_min_wait_fragmented = 0; - u64 dev_min_wait_allowed = 0; - int dev_min_wait = -1; - - for_each_rw_member(ca, c, dev_idx) { - struct bch_dev_usage usage = bch2_dev_usage_read(ca); - s64 allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) * - ca->mi.bucket_size) >> 1); - s64 fragmented = usage.d[BCH_DATA_user].fragmented; - - dev_wait = max(0LL, allowed - fragmented); - - if (dev_min_wait < 0 || dev_wait < wait) { - dev_min_wait = dev_idx; - dev_min_wait_fragmented = fragmented; - dev_min_wait_allowed = allowed; - } - } - - bch_err_ratelimited(c, "copygc requested to run but found no buckets to move! dev %u fragmented %llu allowed %llu", - dev_min_wait, dev_min_wait_fragmented, dev_min_wait_allowed); - return 0; - } - - heap_resort(h, fragmentation_cmp, NULL); - bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, writepoint_ptr(&c->copygc_write_point), false); + bch2_trans_init(&trans, c, 0, 0); + + ret = bch2_btree_write_buffer_flush(&trans); + BUG_ON(ret); - /* not correct w.r.t. device removal */ - while (h->used && !ret) { - BUG_ON(!heap_pop(h, e, -fragmentation_cmp, NULL)); - ret = __bch2_evacuate_bucket(&ctxt, POS(e.dev, e.bucket), e.gen, - data_opts); + for (nr_evacuated = 0, pos = POS_MIN; + nr_evacuated < 32 && !ret; + nr_evacuated++, pos = bpos_nosnap_successor(pos)) { + ret = bch2_copygc_next_bucket(&trans, &bucket, &gen, &pos) ?: + __bch2_evacuate_bucket(&trans, &ctxt, bucket, gen, data_opts); + if (bkey_eq(pos, POS_MAX)) + break; } + bch2_trans_exit(&trans); bch2_moving_ctxt_exit(&ctxt); + /* no entries in LRU btree found, or got to end: */ + if (ret == -ENOENT) + ret = 0; + if (ret < 0 && !bch2_err_matches(ret, EROFS)) bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 178f064..1976d5f 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -1105,6 +1105,9 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; + } else if (c->sb.version < bcachefs_metadata_version_fragmentation_lru) { + bch_info(c, "version prior to backpointers, upgrade required"); + c->opts.version_upgrade = true; } } diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 1805c85..ba28110 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -512,8 +512,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, n->v.pad = 0; SET_BCH_SNAPSHOT_SUBVOL(&n->v, true); - ret = bch2_trans_update(trans, &iter, &n->k_i, 0) ?: - bch2_mark_snapshot(trans, bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0); + ret = bch2_trans_update(trans, &iter, &n->k_i, 0); if (ret) goto err; diff --git a/libbcachefs/subvolume.h b/libbcachefs/subvolume.h index b6740ea..7c488c3 100644 --- a/libbcachefs/subvolume.h +++ b/libbcachefs/subvolume.h @@ -8,15 +8,15 @@ void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c, unsigned, struct printbuf *); +int bch2_mark_snapshot(struct btree_trans *, struct bkey_s_c, + struct bkey_s_c, unsigned); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ .key_invalid = bch2_snapshot_invalid, \ .val_to_text = bch2_snapshot_to_text, \ + .atomic_trigger = bch2_mark_snapshot, \ }) -int bch2_mark_snapshot(struct btree_trans *, struct bkey_s_c, - struct bkey_s_c, unsigned); - static inline struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id) { return genradix_ptr(&c->snapshots, U32_MAX - id); @@ -68,6 +68,13 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances return id == ancestor; } +static inline bool bch2_snapshot_has_children(struct bch_fs *c, u32 id) +{ + struct snapshot_t *t = snapshot_t(c, id); + + return (t->children[0]|t->children[1]) != 0; +} + static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) { u32 *i; diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 8bed118..3b7cf9e 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -488,7 +488,6 @@ static void __bch2_fs_free(struct bch_fs *c) kfree(rcu_dereference_protected(c->disk_groups, 1)); kfree(c->journal_seq_blacklist_table); kfree(c->unused_inode_hints); - free_heap(&c->copygc_heap); if (c->io_complete_wq) destroy_workqueue(c->io_complete_wq); diff --git a/libbcachefs/util.c b/libbcachefs/util.c index a979095..bf5ffb4 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -433,8 +433,8 @@ static const struct time_unit { { "us", NSEC_PER_USEC }, { "ms", NSEC_PER_MSEC }, { "s", NSEC_PER_SEC }, - { "m", NSEC_PER_SEC * 60}, - { "h", NSEC_PER_SEC * 3600}, + { "m", (u64) NSEC_PER_SEC * 60}, + { "h", (u64) NSEC_PER_SEC * 3600}, { "eon", U64_MAX }, }; diff --git a/linux/six.c b/linux/six.c index 41337a7..5a6eadc 100644 --- a/linux/six.c +++ b/linux/six.c @@ -833,19 +833,12 @@ struct six_lock_count six_lock_counts(struct six_lock *lock) { struct six_lock_count ret; - ret.n[SIX_LOCK_read] = 0; + ret.n[SIX_LOCK_read] = !lock->readers + ? lock->state.read_lock + : pcpu_read_count(lock); ret.n[SIX_LOCK_intent] = lock->state.intent_lock + lock->intent_lock_recurse; ret.n[SIX_LOCK_write] = lock->state.seq & 1; - if (!lock->readers) - ret.n[SIX_LOCK_read] += lock->state.read_lock; - else { - int cpu; - - for_each_possible_cpu(cpu) - ret.n[SIX_LOCK_read] += *per_cpu_ptr(lock->readers, cpu); - } - return ret; } EXPORT_SYMBOL_GPL(six_lock_counts);