X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Fbuckets.c;h=cd297941e6b6f106ff00cda84c1e57b1c20b2295;hb=ae43a58d97fc00e31770142da832fb8a249808eb;hp=51ed9609aeb45dbcbcee4de064d01481a1037161;hpb=498874fdb71973c1856f35414bd607e58be16790;p=bcachefs-tools-debian diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 51ed960..cd29794 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -7,6 +7,7 @@ #include "bcachefs.h" #include "alloc_background.h" +#include "backpointers.h" #include "bset.h" #include "btree_gc.h" #include "btree_update.h" @@ -88,20 +89,17 @@ static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca, : ca->usage[journal_seq & JOURNAL_BUF_MASK]); } -struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) +void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) { struct bch_fs *c = ca->fs; - struct bch_dev_usage ret; unsigned seq, i, u64s = dev_usage_u64s(); do { seq = read_seqcount_begin(&c->usage_lock); - memcpy(&ret, ca->usage_base, u64s * sizeof(u64)); + memcpy(usage, ca->usage_base, u64s * sizeof(u64)); for (i = 0; i < ARRAY_SIZE(ca->usage); i++) - acc_u64s_percpu((u64 *) &ret, (u64 __percpu *) ca->usage[i], u64s); + acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s); } while (read_seqcount_retry(&c->usage_lock, seq)); - - return ret; } static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c, @@ -197,26 +195,26 @@ void bch2_fs_usage_to_text(struct printbuf *out, { unsigned i; - pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity); + prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity); - pr_buf(out, "hidden:\t\t\t\t%llu\n", + prt_printf(out, "hidden:\t\t\t\t%llu\n", fs_usage->u.hidden); - pr_buf(out, "data:\t\t\t\t%llu\n", + prt_printf(out, "data:\t\t\t\t%llu\n", fs_usage->u.data); - pr_buf(out, "cached:\t\t\t\t%llu\n", + prt_printf(out, "cached:\t\t\t\t%llu\n", fs_usage->u.cached); - pr_buf(out, "reserved:\t\t\t%llu\n", + prt_printf(out, "reserved:\t\t\t%llu\n", fs_usage->u.reserved); - pr_buf(out, "nr_inodes:\t\t\t%llu\n", + prt_printf(out, "nr_inodes:\t\t\t%llu\n", fs_usage->u.nr_inodes); - pr_buf(out, "online reserved:\t\t%llu\n", + prt_printf(out, "online reserved:\t\t%llu\n", fs_usage->online_reserved); for (i = 0; i < ARRAY_SIZE(fs_usage->u.persistent_reserved); i++) { - pr_buf(out, "%u replicas:\n", i + 1); - pr_buf(out, "\treserved:\t\t%llu\n", + prt_printf(out, "%u replicas:\n", i + 1); + prt_printf(out, "\treserved:\t\t%llu\n", fs_usage->u.persistent_reserved[i]); } @@ -224,9 +222,9 @@ void bch2_fs_usage_to_text(struct printbuf *out, struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); - pr_buf(out, "\t"); + prt_printf(out, "\t"); bch2_replicas_entry_to_text(out, e); - pr_buf(out, ":\t%llu\n", fs_usage->u.replicas[i]); + prt_printf(out, ":\t%llu\n", fs_usage->u.replicas[i]); } } @@ -279,9 +277,9 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -static inline int is_unavailable_bucket(struct bch_alloc_v4 a) +void bch2_dev_usage_init(struct bch_dev *ca) { - return a.dirty_sectors || a.stripe; + ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket; } static inline int bucket_sectors_fragmented(struct bch_dev *ca, @@ -292,24 +290,6 @@ static inline int bucket_sectors_fragmented(struct bch_dev *ca, : 0; } -static inline enum bch_data_type bucket_type(struct bch_alloc_v4 a) -{ - return a.cached_sectors && !a.dirty_sectors - ? BCH_DATA_cached - : a.data_type; -} - -static inline void account_bucket(struct bch_fs_usage *fs_usage, - struct bch_dev_usage *dev_usage, - enum bch_data_type type, - int nr, s64 size) -{ - if (type == BCH_DATA_sb || type == BCH_DATA_journal) - fs_usage->hidden += size; - - dev_usage->d[type].buckets += nr; -} - static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, struct bch_alloc_v4 old, struct bch_alloc_v4 new, @@ -320,24 +300,25 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, preempt_disable(); fs_usage = fs_usage_ptr(c, journal_seq, gc); - u = dev_usage_ptr(ca, journal_seq, gc); - if (bucket_type(old)) - account_bucket(fs_usage, u, bucket_type(old), - -1, -ca->mi.bucket_size); + if (data_type_is_hidden(old.data_type)) + fs_usage->hidden -= ca->mi.bucket_size; + if (data_type_is_hidden(new.data_type)) + fs_usage->hidden += ca->mi.bucket_size; + + u = dev_usage_ptr(ca, journal_seq, gc); - if (bucket_type(new)) - account_bucket(fs_usage, u, bucket_type(new), - 1, ca->mi.bucket_size); + u->d[old.data_type].buckets--; + u->d[new.data_type].buckets++; - u->buckets_ec += (int) new.stripe - (int) old.stripe; - u->buckets_unavailable += - is_unavailable_bucket(new) - is_unavailable_bucket(old); + u->buckets_ec -= (int) !!old.stripe; + u->buckets_ec += (int) !!new.stripe; u->d[old.data_type].sectors -= old.dirty_sectors; u->d[new.data_type].sectors += new.dirty_sectors; - u->d[BCH_DATA_cached].sectors += - (int) new.cached_sectors - (int) old.cached_sectors; + + u->d[BCH_DATA_cached].sectors += new.cached_sectors; + u->d[BCH_DATA_cached].sectors -= old.cached_sectors; u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old); u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new); @@ -395,10 +376,9 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k, idx = bch2_replicas_entry_idx(c, r); if (idx < 0 && - (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || - fsck_err(c, "no replicas entry\n" - " while marking %s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) { + fsck_err(c, "no replicas entry\n" + " while marking %s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { percpu_up_read(&c->mark_lock); ret = bch2_mark_replicas(c, r); percpu_down_read(&c->mark_lock); @@ -484,7 +464,8 @@ static inline void update_replicas_list(struct btree_trans *trans, n = (void *) d->d + d->used; n->delta = sectors; - memcpy(&n->r, r, replicas_entry_bytes(r)); + memcpy((void *) n + offsetof(struct replicas_delta, r), + r, replicas_entry_bytes(r)); bch2_replicas_entry_sort(&n->r); d->used += b; } @@ -507,14 +488,9 @@ int bch2_mark_alloc(struct btree_trans *trans, u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; struct bch_alloc_v4 old_a, new_a; - struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode); + struct bch_dev *ca; int ret = 0; - if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket || - new.k->p.offset >= ca->mi.nbuckets, trans, - "alloc key outside range of device's buckets")) - return -EIO; - /* * alloc btree is read in by bch2_alloc_read, not gc: */ @@ -522,11 +498,18 @@ int bch2_mark_alloc(struct btree_trans *trans, !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) return 0; + if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans, + "alloc key for invalid device or bucket")) + return -EIO; + + ca = bch_dev_bkey_exists(c, new.k->p.inode); + bch2_alloc_to_v4(old, &old_a); bch2_alloc_to_v4(new, &new_a); if ((flags & BTREE_TRIGGER_INSERT) && - !old_a.data_type != !new_a.data_type && + data_type_is_empty(old_a.data_type) != + data_type_is_empty(new_a.data_type) && new.k->type == KEY_TYPE_alloc_v4) { struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v; @@ -537,14 +520,16 @@ int bch2_mark_alloc(struct btree_trans *trans, * before the bucket became empty again, then the we don't have * to wait on a journal flush before we can reuse the bucket: */ - new_a.journal_seq = !new_a.data_type && + new_a.journal_seq = data_type_is_empty(new_a.data_type) && (journal_seq == v->journal_seq || bch2_journal_noflush_seq(&c->journal, v->journal_seq)) ? 0 : journal_seq; v->journal_seq = new_a.journal_seq; } - if (old_a.data_type && !new_a.data_type && new_a.journal_seq) { + if (!data_type_is_empty(old_a.data_type) && + data_type_is_empty(new_a.data_type) && + new_a.journal_seq) { ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, c->journal.flushed_seq_ondisk, new.k->p.inode, new.k->p.offset, @@ -556,25 +541,6 @@ int bch2_mark_alloc(struct btree_trans *trans, } } - if (!new_a.data_type && - (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) - closure_wake_up(&c->freelist_wait); - - if ((flags & BTREE_TRIGGER_INSERT) && - BCH_ALLOC_V4_NEED_DISCARD(&new_a) && - !new_a.journal_seq) - bch2_do_discards(c); - - if (!old_a.data_type && - new_a.data_type && - should_invalidate_buckets(ca)) - bch2_do_invalidates(c); - - if (bucket_state(new_a) == BUCKET_need_gc_gens) { - atomic_inc(&c->kick_gc); - wake_up_process(c->gc_thread); - } - percpu_down_read(&c->mark_lock); if (!gc && new_a.gen != old_a.gen) *bucket_gen(ca, new.k->p.offset) = new_a.gen; @@ -606,17 +572,31 @@ int bch2_mark_alloc(struct btree_trans *trans, if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) && old_a.cached_sectors) { ret = update_cached_sectors(c, new, ca->dev_idx, - -old_a.cached_sectors, + -((s64) old_a.cached_sectors), journal_seq, gc); if (ret) { - bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors"); + bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", + __func__); return ret; } - - trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset), - old_a.cached_sectors); } + if (new_a.data_type == BCH_DATA_free && + (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) + closure_wake_up(&c->freelist_wait); + + if (new_a.data_type == BCH_DATA_need_discard && + (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk)) + bch2_do_discards(c); + + if (old_a.data_type != BCH_DATA_cached && + new_a.data_type == BCH_DATA_cached && + should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) + bch2_do_invalidates(c); + + if (new_a.data_type == BCH_DATA_need_gc_gens) + bch2_do_gc_gens(c); + return 0; } @@ -674,16 +654,6 @@ err: return ret; } -static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p) -{ - EBUG_ON(sectors < 0); - - return crc_is_compressed(p.crc) - ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size, - p.crc.uncompressed_size) - : sectors; -} - static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, const struct bch_extent_ptr *ptr, @@ -699,6 +669,13 @@ static int check_bucket_ref(struct bch_fs *c, struct printbuf buf = PRINTBUF; int ret = 0; + if (bucket_data_type == BCH_DATA_cached) + bucket_data_type = BCH_DATA_user; + + if ((bucket_data_type == BCH_DATA_stripe && ptr_data_type == BCH_DATA_user) || + (bucket_data_type == BCH_DATA_user && ptr_data_type == BCH_DATA_stripe)) + bucket_data_type = ptr_data_type = BCH_DATA_stripe; + if (gen_after(ptr->gen, b_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" @@ -743,7 +720,8 @@ static int check_bucket_ref(struct bch_fs *c, goto err; } - if (bucket_data_type && ptr_data_type && + if (!data_type_is_empty(bucket_data_type) && + ptr_data_type && bucket_data_type != ptr_data_type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" @@ -943,7 +921,7 @@ int bch2_mark_extent(struct btree_trans *trans, { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -982,7 +960,8 @@ int bch2_mark_extent(struct btree_trans *trans, ret = update_cached_sectors(c, k, p.ptr.dev, disk_sectors, journal_seq, true); if (ret) { - bch2_fs_fatal_error(c, "bch2_mark_extent(): no replicas entry while updating cached sectors"); + bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors", + __func__); return ret; } } @@ -1010,7 +989,7 @@ int bch2_mark_extent(struct btree_trans *trans, struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); - bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf); + bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf); printbuf_exit(&buf); return ret; } @@ -1135,10 +1114,10 @@ int bch2_mark_inode(struct btree_trans *trans, u64 journal_seq = trans->journal_res.seq; if (flags & BTREE_TRIGGER_INSERT) { - struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v; + struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v; BUG_ON(!journal_seq); - BUG_ON(new.k->type != KEY_TYPE_inode_v2); + BUG_ON(new.k->type != KEY_TYPE_inode_v3); v->bi_journal_seq = cpu_to_le64(journal_seq); } @@ -1162,7 +1141,7 @@ int bch2_mark_reservation(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bch_fs_usage __percpu *fs_usage; unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; @@ -1241,7 +1220,7 @@ int bch2_mark_reflink_p(struct btree_trans *trans, unsigned flags) { struct bch_fs *c = trans->c; - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new; struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); struct reflink_gc *ref; size_t l, r, m; @@ -1383,27 +1362,43 @@ need_mark: /* trans_mark: */ static int bch2_trans_mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) + enum btree_id btree_id, unsigned level, + struct bkey_s_c k, struct extent_ptr_decoded p, + unsigned flags) { + bool insert = !(flags & BTREE_TRIGGER_OVERWRITE); struct btree_iter iter; struct bkey_i_alloc_v4 *a; + struct bpos bucket_pos; + struct bch_backpointer bp; + s64 sectors; int ret; - a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr)); + bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket_pos, &bp); + sectors = bp.bucket_len; + if (!insert) + sectors = -sectors; + + a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos); if (IS_ERR(a)) return PTR_ERR(a); - ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type, + ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type, a->v.gen, &a->v.data_type, &a->v.dirty_sectors, &a->v.cached_sectors); if (ret) - goto out; + goto err; + + if (!p.ptr.cached) { + ret = insert + ? bch2_bucket_backpointer_add(trans, a, bp, k) + : bch2_bucket_backpointer_del(trans, a, bp, k); + if (ret) + goto err; + } ret = bch2_trans_update(trans, &iter, &a->k_i, 0); - if (ret) - goto out; -out: +err: bch2_trans_iter_exit(trans, &iter); return ret; } @@ -1465,6 +1460,7 @@ err: } int bch2_trans_mark_extent(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { @@ -1496,8 +1492,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans, if (flags & BTREE_TRIGGER_OVERWRITE) disk_sectors = -disk_sectors; - ret = bch2_trans_mark_pointer(trans, k, p, - disk_sectors, data_type); + ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags); if (ret < 0) return ret; @@ -1603,6 +1598,7 @@ err: } int bch2_trans_mark_stripe(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) { @@ -1673,6 +1669,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans, } int bch2_trans_mark_inode(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) @@ -1689,6 +1686,7 @@ int bch2_trans_mark_inode(struct btree_trans *trans, } int bch2_trans_mark_reservation(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) @@ -1790,6 +1788,7 @@ err: } int bch2_trans_mark_reflink_p(struct btree_trans *trans, + enum btree_id btree_id, unsigned level, struct bkey_s_c old, struct bkey_i *new, unsigned flags) @@ -1865,7 +1864,7 @@ int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - return __bch2_trans_do(trans, NULL, NULL, 0, + return commit_do(trans, NULL, NULL, 0, __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors)); } @@ -1943,8 +1942,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca) { - return bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, - __bch2_trans_mark_dev_sb(&trans, ca)); + return bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca)); } /* Disk reservations: */ @@ -2004,7 +2002,7 @@ recalculate: ret = 0; } else { atomic64_set(&c->sectors_available, sectors_available); - ret = -ENOSPC; + ret = -BCH_ERR_ENOSPC_disk_reservation; } mutex_unlock(&c->sectors_available_lock); @@ -2114,5 +2112,5 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) return -ENOMEM; } - return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);; + return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets); }