X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Falloc_foreground.c;h=affddf1f03fff0cb3121ab5f767df65beb1d19b9;hb=da6a35689518599b381c285cd9505ab8d58f7c73;hp=f1cfb90b6d54c9cd6f5088cdbf9b11471ba1b06b;hpb=71111771690f244d13650c73d52ff601ad914d95;p=bcachefs-tools-debian diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index f1cfb90..affddf1 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -58,6 +58,17 @@ const char * const bch2_alloc_reserves[] = { * reference _after_ doing the index update that makes its allocation reachable. */ +void bch2_reset_alloc_cursors(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + + rcu_read_lock(); + for_each_member_device_rcu(ca, c, i, NULL) + ca->alloc_cursor = 0; + rcu_read_unlock(); +} + static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob) { open_bucket_idx_t idx = ob - c->open_buckets; @@ -272,7 +283,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * } spin_unlock(&c->freelist_lock); - return ob; } @@ -418,12 +428,11 @@ bch2_bucket_alloc_early(struct btree_trans *trans, struct btree_iter iter; struct bkey_s_c k; struct open_bucket *ob = NULL; + u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx); + u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor)); int ret; - - s->cur_bucket = max_t(u64, s->cur_bucket, ca->mi.first_bucket); - s->cur_bucket = max_t(u64, s->cur_bucket, ca->new_fs_bucket_idx); - - for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, s->cur_bucket), +again: + for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor), BTREE_ITER_SLOTS, k, ret) { struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; @@ -448,9 +457,17 @@ bch2_bucket_alloc_early(struct btree_trans *trans, } bch2_trans_iter_exit(trans, &iter); - s->cur_bucket = iter.pos.offset; + ca->alloc_cursor = alloc_cursor; + + if (!ob && ret) + ob = ERR_PTR(ret); + + if (!ob && alloc_cursor > alloc_start) { + alloc_cursor = alloc_start; + goto again; + } - return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); + return ob; } static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, @@ -462,33 +479,34 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, struct btree_iter iter; struct bkey_s_c k; struct open_bucket *ob = NULL; + u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(ca->alloc_cursor)); + u64 alloc_cursor = alloc_start; int ret; BUG_ON(ca->new_fs_bucket_idx); - - /* - * XXX: - * On transaction restart, we'd like to restart from the bucket we were - * at previously - */ +again: for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace, - POS(ca->dev_idx, s->cur_bucket), 0, k, ret) { + POS(ca->dev_idx, alloc_cursor), 0, k, ret) { if (k.k->p.inode != ca->dev_idx) break; - for (s->cur_bucket = max(s->cur_bucket, bkey_start_offset(k.k)); - s->cur_bucket < k.k->p.offset; - s->cur_bucket++) { + for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k)); + alloc_cursor < k.k->p.offset; + alloc_cursor++) { ret = btree_trans_too_many_iters(trans); - if (ret) + if (ret) { + ob = ERR_PTR(ret); break; + } s->buckets_seen++; ob = try_alloc_bucket(trans, ca, reserve, - s->cur_bucket, s, k, cl); - if (ob) + alloc_cursor, s, k, cl); + if (ob) { + iter.path->preserve = false; break; + } } if (ob || ret) @@ -496,7 +514,17 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, } bch2_trans_iter_exit(trans, &iter); - return ob ?: ERR_PTR(ret); + ca->alloc_cursor = alloc_cursor; + + if (!ob && ret) + ob = ERR_PTR(ret); + + if (!ob && alloc_start > ca->mi.first_bucket) { + alloc_cursor = alloc_start = ca->mi.first_bucket; + goto again; + } + + return ob; } /** @@ -514,9 +542,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; bool freespace = READ_ONCE(ca->mi.freespace_initialized); - u64 start = freespace ? 0 : ca->bucket_alloc_trans_early_cursor; u64 avail; - struct bucket_alloc_state s = { .cur_bucket = start }; + struct bucket_alloc_state s = { 0 }; bool waiting = false; again: bch2_dev_usage_read_fast(ca, usage); @@ -561,28 +588,31 @@ alloc: if (s.skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); - if (!ob && !freespace && start) { - start = s.cur_bucket = 0; - goto alloc; - } - if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { freespace = false; goto alloc; } - - if (!freespace) - ca->bucket_alloc_trans_early_cursor = s.cur_bucket; err: if (!ob) ob = ERR_PTR(-BCH_ERR_no_buckets_found); if (!IS_ERR(ob)) - trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve], - may_alloc_partial, ob->bucket); + trace_and_count(c, bucket_alloc, ca, + bch2_alloc_reserves[reserve], + may_alloc_partial, + ob->bucket, + usage->d[BCH_DATA_free].buckets, + avail, + bch2_copygc_wait_amount(c), + c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now), + &s, + cl == NULL, + ""); else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart)) - trace_and_count(c, bucket_alloc_fail, - ca, bch2_alloc_reserves[reserve], + trace_and_count(c, bucket_alloc_fail, ca, + bch2_alloc_reserves[reserve], + may_alloc_partial, + 0, usage->d[BCH_DATA_free].buckets, avail, bch2_copygc_wait_amount(c), @@ -683,7 +713,7 @@ static void add_new_bucket(struct bch_fs *c, ob_push(c, ptrs, ob); } -static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, +int bch2_bucket_alloc_set_trans(struct btree_trans *trans, struct open_buckets *ptrs, struct dev_stripe_state *stripe, struct bch_devs_mask *devs_may_alloc, @@ -749,24 +779,6 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, return ret; } -int bch2_bucket_alloc_set(struct bch_fs *c, - struct open_buckets *ptrs, - struct dev_stripe_state *stripe, - struct bch_devs_mask *devs_may_alloc, - unsigned nr_replicas, - unsigned *nr_effective, - bool *have_cache, - enum alloc_reserve reserve, - unsigned flags, - struct closure *cl) -{ - return bch2_trans_do(c, NULL, NULL, 0, - bch2_bucket_alloc_set_trans(&trans, ptrs, stripe, - devs_may_alloc, nr_replicas, - nr_effective, have_cache, reserve, - flags, cl)); -} - /* Allocate from stripes: */ /* @@ -775,7 +787,7 @@ int bch2_bucket_alloc_set(struct bch_fs *c, * it's to a device we don't want: */ -static int bucket_alloc_from_stripe(struct bch_fs *c, +static int bucket_alloc_from_stripe(struct btree_trans *trans, struct open_buckets *ptrs, struct write_point *wp, struct bch_devs_mask *devs_may_alloc, @@ -787,6 +799,7 @@ static int bucket_alloc_from_stripe(struct bch_fs *c, unsigned flags, struct closure *cl) { + struct bch_fs *c = trans->c; struct dev_alloc_list devs_sorted; struct ec_stripe_head *h; struct open_bucket *ob; @@ -802,11 +815,11 @@ static int bucket_alloc_from_stripe(struct bch_fs *c, if (ec_open_bucket(c, ptrs)) return 0; - h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1, + h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, wp == &c->copygc_write_point, cl); if (IS_ERR(h)) - return -PTR_ERR(h); + return PTR_ERR(h); if (!h) return 0; @@ -912,7 +925,7 @@ static int open_bucket_add_buckets(struct btree_trans *trans, } if (!ec_open_bucket(c, ptrs)) { - ret = bucket_alloc_from_stripe(c, ptrs, wp, &devs, + ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs, target, erasure_code, nr_replicas, nr_effective, have_cache, flags, _cl); @@ -1060,7 +1073,7 @@ static bool try_decrease_writepoints(struct bch_fs *c, return true; } -static void bch2_trans_mutex_lock(struct btree_trans *trans, +static void bch2_trans_mutex_lock_norelock(struct btree_trans *trans, struct mutex *lock) { if (!mutex_trylock(lock)) { @@ -1078,7 +1091,7 @@ static struct write_point *writepoint_find(struct btree_trans *trans, if (!(write_point & 1UL)) { wp = (struct write_point *) write_point; - bch2_trans_mutex_lock(trans, &wp->lock); + bch2_trans_mutex_lock_norelock(trans, &wp->lock); return wp; } @@ -1087,7 +1100,7 @@ restart_find: wp = __writepoint_find(head, write_point); if (wp) { lock_wp: - bch2_trans_mutex_lock(trans, &wp->lock); + bch2_trans_mutex_lock_norelock(trans, &wp->lock); if (wp->write_point == write_point) goto out; mutex_unlock(&wp->lock); @@ -1100,8 +1113,8 @@ restart_find_oldest: if (!oldest || time_before64(wp->last_used, oldest->last_used)) oldest = wp; - bch2_trans_mutex_lock(trans, &oldest->lock); - bch2_trans_mutex_lock(trans, &c->write_points_hash_lock); + bch2_trans_mutex_lock_norelock(trans, &oldest->lock); + bch2_trans_mutex_lock_norelock(trans, &c->write_points_hash_lock); if (oldest >= c->write_points + c->write_points_nr || try_increase_writepoints(c)) { mutex_unlock(&c->write_points_hash_lock); @@ -1130,16 +1143,16 @@ out: * Get us an open_bucket we can allocate from, return with it locked: */ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, - unsigned target, - unsigned erasure_code, - struct write_point_specifier write_point, - struct bch_devs_list *devs_have, - unsigned nr_replicas, - unsigned nr_replicas_required, - enum alloc_reserve reserve, - unsigned flags, - struct closure *cl, - struct write_point **wp_ret) + unsigned target, + unsigned erasure_code, + struct write_point_specifier write_point, + struct bch_devs_list *devs_have, + unsigned nr_replicas, + unsigned nr_replicas_required, + enum alloc_reserve reserve, + unsigned flags, + struct closure *cl, + struct write_point **wp_ret) { struct bch_fs *c = trans->c; struct write_point *wp; @@ -1336,3 +1349,33 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) spin_unlock(&ob->lock); } } + +static const char * const bch2_write_point_states[] = { +#define x(n) #n, + WRITE_POINT_STATES() +#undef x + NULL +}; + +void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) +{ + struct write_point *wp; + unsigned i; + + for (wp = c->write_points; + wp < c->write_points + ARRAY_SIZE(c->write_points); + wp++) { + prt_printf(out, "%lu: ", wp->write_point); + prt_human_readable_u64(out, wp->sectors_allocated); + + prt_printf(out, " last wrote: "); + bch2_pr_time_units(out, sched_clock() - wp->last_used); + + for (i = 0; i < WRITE_POINT_STATE_NR; i++) { + prt_printf(out, " %s: ", bch2_write_point_states[i]); + bch2_pr_time_units(out, wp->time[i]); + } + + prt_newline(out); + } +}