]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/buckets.c
Update bcachefs sources to 70fa0c1ff4 fixup! bcachefs: Btree key cache improvements
[bcachefs-tools-debian] / libbcachefs / buckets.c
index 51ed9609aeb45dbcbcee4de064d01481a1037161..cd297941e6b6f106ff00cda84c1e57b1c20b2295 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "bcachefs.h"
 #include "alloc_background.h"
+#include "backpointers.h"
 #include "bset.h"
 #include "btree_gc.h"
 #include "btree_update.h"
@@ -88,20 +89,17 @@ static inline struct bch_dev_usage *dev_usage_ptr(struct bch_dev *ca,
                            : ca->usage[journal_seq & JOURNAL_BUF_MASK]);
 }
 
-struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
+void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage)
 {
        struct bch_fs *c = ca->fs;
-       struct bch_dev_usage ret;
        unsigned seq, i, u64s = dev_usage_u64s();
 
        do {
                seq = read_seqcount_begin(&c->usage_lock);
-               memcpy(&ret, ca->usage_base, u64s * sizeof(u64));
+               memcpy(usage, ca->usage_base, u64s * sizeof(u64));
                for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
-                       acc_u64s_percpu((u64 *) &ret, (u64 __percpu *) ca->usage[i], u64s);
+                       acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage[i], u64s);
        } while (read_seqcount_retry(&c->usage_lock, seq));
-
-       return ret;
 }
 
 static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
@@ -197,26 +195,26 @@ void bch2_fs_usage_to_text(struct printbuf *out,
 {
        unsigned i;
 
-       pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
+       prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity);
 
-       pr_buf(out, "hidden:\t\t\t\t%llu\n",
+       prt_printf(out, "hidden:\t\t\t\t%llu\n",
               fs_usage->u.hidden);
-       pr_buf(out, "data:\t\t\t\t%llu\n",
+       prt_printf(out, "data:\t\t\t\t%llu\n",
               fs_usage->u.data);
-       pr_buf(out, "cached:\t\t\t\t%llu\n",
+       prt_printf(out, "cached:\t\t\t\t%llu\n",
               fs_usage->u.cached);
-       pr_buf(out, "reserved:\t\t\t%llu\n",
+       prt_printf(out, "reserved:\t\t\t%llu\n",
               fs_usage->u.reserved);
-       pr_buf(out, "nr_inodes:\t\t\t%llu\n",
+       prt_printf(out, "nr_inodes:\t\t\t%llu\n",
               fs_usage->u.nr_inodes);
-       pr_buf(out, "online reserved:\t\t%llu\n",
+       prt_printf(out, "online reserved:\t\t%llu\n",
               fs_usage->online_reserved);
 
        for (i = 0;
             i < ARRAY_SIZE(fs_usage->u.persistent_reserved);
             i++) {
-               pr_buf(out, "%u replicas:\n", i + 1);
-               pr_buf(out, "\treserved:\t\t%llu\n",
+               prt_printf(out, "%u replicas:\n", i + 1);
+               prt_printf(out, "\treserved:\t\t%llu\n",
                       fs_usage->u.persistent_reserved[i]);
        }
 
@@ -224,9 +222,9 @@ void bch2_fs_usage_to_text(struct printbuf *out,
                struct bch_replicas_entry *e =
                        cpu_replicas_entry(&c->replicas, i);
 
-               pr_buf(out, "\t");
+               prt_printf(out, "\t");
                bch2_replicas_entry_to_text(out, e);
-               pr_buf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
+               prt_printf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
        }
 }
 
@@ -279,9 +277,9 @@ bch2_fs_usage_read_short(struct bch_fs *c)
        return ret;
 }
 
-static inline int is_unavailable_bucket(struct bch_alloc_v4 a)
+void bch2_dev_usage_init(struct bch_dev *ca)
 {
-       return a.dirty_sectors || a.stripe;
+       ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
 }
 
 static inline int bucket_sectors_fragmented(struct bch_dev *ca,
@@ -292,24 +290,6 @@ static inline int bucket_sectors_fragmented(struct bch_dev *ca,
                : 0;
 }
 
-static inline enum bch_data_type bucket_type(struct bch_alloc_v4 a)
-{
-       return a.cached_sectors && !a.dirty_sectors
-               ? BCH_DATA_cached
-               : a.data_type;
-}
-
-static inline void account_bucket(struct bch_fs_usage *fs_usage,
-                                 struct bch_dev_usage *dev_usage,
-                                 enum bch_data_type type,
-                                 int nr, s64 size)
-{
-       if (type == BCH_DATA_sb || type == BCH_DATA_journal)
-               fs_usage->hidden        += size;
-
-       dev_usage->d[type].buckets      += nr;
-}
-
 static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
                                  struct bch_alloc_v4 old,
                                  struct bch_alloc_v4 new,
@@ -320,24 +300,25 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
 
        preempt_disable();
        fs_usage = fs_usage_ptr(c, journal_seq, gc);
-       u = dev_usage_ptr(ca, journal_seq, gc);
 
-       if (bucket_type(old))
-               account_bucket(fs_usage, u, bucket_type(old),
-                              -1, -ca->mi.bucket_size);
+       if (data_type_is_hidden(old.data_type))
+               fs_usage->hidden -= ca->mi.bucket_size;
+       if (data_type_is_hidden(new.data_type))
+               fs_usage->hidden += ca->mi.bucket_size;
+
+       u = dev_usage_ptr(ca, journal_seq, gc);
 
-       if (bucket_type(new))
-               account_bucket(fs_usage, u, bucket_type(new),
-                              1, ca->mi.bucket_size);
+       u->d[old.data_type].buckets--;
+       u->d[new.data_type].buckets++;
 
-       u->buckets_ec += (int) new.stripe - (int) old.stripe;
-       u->buckets_unavailable +=
-               is_unavailable_bucket(new) - is_unavailable_bucket(old);
+       u->buckets_ec -= (int) !!old.stripe;
+       u->buckets_ec += (int) !!new.stripe;
 
        u->d[old.data_type].sectors -= old.dirty_sectors;
        u->d[new.data_type].sectors += new.dirty_sectors;
-       u->d[BCH_DATA_cached].sectors +=
-               (int) new.cached_sectors - (int) old.cached_sectors;
+
+       u->d[BCH_DATA_cached].sectors += new.cached_sectors;
+       u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
 
        u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old);
        u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
@@ -395,10 +376,9 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
 
        idx = bch2_replicas_entry_idx(c, r);
        if (idx < 0 &&
-           (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-            fsck_err(c, "no replicas entry\n"
-                     "  while marking %s",
-                     (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
+           fsck_err(c, "no replicas entry\n"
+                    "  while marking %s",
+                    (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
                percpu_up_read(&c->mark_lock);
                ret = bch2_mark_replicas(c, r);
                percpu_down_read(&c->mark_lock);
@@ -484,7 +464,8 @@ static inline void update_replicas_list(struct btree_trans *trans,
 
        n = (void *) d->d + d->used;
        n->delta = sectors;
-       memcpy(&n->r, r, replicas_entry_bytes(r));
+       memcpy((void *) n + offsetof(struct replicas_delta, r),
+              r, replicas_entry_bytes(r));
        bch2_replicas_entry_sort(&n->r);
        d->used += b;
 }
@@ -507,14 +488,9 @@ int bch2_mark_alloc(struct btree_trans *trans,
        u64 journal_seq = trans->journal_res.seq;
        struct bch_fs *c = trans->c;
        struct bch_alloc_v4 old_a, new_a;
-       struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
+       struct bch_dev *ca;
        int ret = 0;
 
-       if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket ||
-                                      new.k->p.offset >= ca->mi.nbuckets, trans,
-                                      "alloc key outside range of device's buckets"))
-               return -EIO;
-
        /*
         * alloc btree is read in by bch2_alloc_read, not gc:
         */
@@ -522,11 +498,18 @@ int bch2_mark_alloc(struct btree_trans *trans,
            !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
                return 0;
 
+       if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
+                                      "alloc key for invalid device or bucket"))
+               return -EIO;
+
+       ca = bch_dev_bkey_exists(c, new.k->p.inode);
+
        bch2_alloc_to_v4(old, &old_a);
        bch2_alloc_to_v4(new, &new_a);
 
        if ((flags & BTREE_TRIGGER_INSERT) &&
-           !old_a.data_type != !new_a.data_type &&
+           data_type_is_empty(old_a.data_type) !=
+           data_type_is_empty(new_a.data_type) &&
            new.k->type == KEY_TYPE_alloc_v4) {
                struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
 
@@ -537,14 +520,16 @@ int bch2_mark_alloc(struct btree_trans *trans,
                 * before the bucket became empty again, then the we don't have
                 * to wait on a journal flush before we can reuse the bucket:
                 */
-               new_a.journal_seq = !new_a.data_type &&
+               new_a.journal_seq = data_type_is_empty(new_a.data_type) &&
                        (journal_seq == v->journal_seq ||
                         bch2_journal_noflush_seq(&c->journal, v->journal_seq))
                        ? 0 : journal_seq;
                v->journal_seq = new_a.journal_seq;
        }
 
-       if (old_a.data_type && !new_a.data_type && new_a.journal_seq) {
+       if (!data_type_is_empty(old_a.data_type) &&
+           data_type_is_empty(new_a.data_type) &&
+           new_a.journal_seq) {
                ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
                                c->journal.flushed_seq_ondisk,
                                new.k->p.inode, new.k->p.offset,
@@ -556,25 +541,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
                }
        }
 
-       if (!new_a.data_type &&
-           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
-               closure_wake_up(&c->freelist_wait);
-
-       if ((flags & BTREE_TRIGGER_INSERT) &&
-           BCH_ALLOC_V4_NEED_DISCARD(&new_a) &&
-           !new_a.journal_seq)
-               bch2_do_discards(c);
-
-       if (!old_a.data_type &&
-           new_a.data_type &&
-           should_invalidate_buckets(ca))
-               bch2_do_invalidates(c);
-
-       if (bucket_state(new_a) == BUCKET_need_gc_gens) {
-               atomic_inc(&c->kick_gc);
-               wake_up_process(c->gc_thread);
-       }
-
        percpu_down_read(&c->mark_lock);
        if (!gc && new_a.gen != old_a.gen)
                *bucket_gen(ca, new.k->p.offset) = new_a.gen;
@@ -606,17 +572,31 @@ int bch2_mark_alloc(struct btree_trans *trans,
        if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
            old_a.cached_sectors) {
                ret = update_cached_sectors(c, new, ca->dev_idx,
-                                           -old_a.cached_sectors,
+                                           -((s64) old_a.cached_sectors),
                                            journal_seq, gc);
                if (ret) {
-                       bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
+                       bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors",
+                                           __func__);
                        return ret;
                }
-
-               trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
-                                old_a.cached_sectors);
        }
 
+       if (new_a.data_type == BCH_DATA_free &&
+           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+               closure_wake_up(&c->freelist_wait);
+
+       if (new_a.data_type == BCH_DATA_need_discard &&
+           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+               bch2_do_discards(c);
+
+       if (old_a.data_type != BCH_DATA_cached &&
+           new_a.data_type == BCH_DATA_cached &&
+           should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
+               bch2_do_invalidates(c);
+
+       if (new_a.data_type == BCH_DATA_need_gc_gens)
+               bch2_do_gc_gens(c);
+
        return 0;
 }
 
@@ -674,16 +654,6 @@ err:
        return ret;
 }
 
-static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
-{
-       EBUG_ON(sectors < 0);
-
-       return crc_is_compressed(p.crc)
-               ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
-                                  p.crc.uncompressed_size)
-               : sectors;
-}
-
 static int check_bucket_ref(struct bch_fs *c,
                            struct bkey_s_c k,
                            const struct bch_extent_ptr *ptr,
@@ -699,6 +669,13 @@ static int check_bucket_ref(struct bch_fs *c,
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
+       if (bucket_data_type == BCH_DATA_cached)
+               bucket_data_type = BCH_DATA_user;
+
+       if ((bucket_data_type == BCH_DATA_stripe && ptr_data_type == BCH_DATA_user) ||
+           (bucket_data_type == BCH_DATA_user   && ptr_data_type == BCH_DATA_stripe))
+               bucket_data_type = ptr_data_type = BCH_DATA_stripe;
+
        if (gen_after(ptr->gen, b_gen)) {
                bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
                        "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
@@ -743,7 +720,8 @@ static int check_bucket_ref(struct bch_fs *c,
                goto err;
        }
 
-       if (bucket_data_type && ptr_data_type &&
+       if (!data_type_is_empty(bucket_data_type) &&
+           ptr_data_type &&
            bucket_data_type != ptr_data_type) {
                bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
                        "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
@@ -943,7 +921,7 @@ int bch2_mark_extent(struct btree_trans *trans,
 {
        u64 journal_seq = trans->journal_res.seq;
        struct bch_fs *c = trans->c;
-       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
@@ -982,7 +960,8 @@ int bch2_mark_extent(struct btree_trans *trans,
                                ret = update_cached_sectors(c, k, p.ptr.dev,
                                                disk_sectors, journal_seq, true);
                                if (ret) {
-                                       bch2_fs_fatal_error(c, "bch2_mark_extent(): no replicas entry while updating cached sectors");
+                                       bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors",
+                                                           __func__);
                                        return ret;
                                }
                        }
@@ -1010,7 +989,7 @@ int bch2_mark_extent(struct btree_trans *trans,
                        struct printbuf buf = PRINTBUF;
 
                        bch2_bkey_val_to_text(&buf, c, k);
-                       bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
+                       bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
                        printbuf_exit(&buf);
                        return ret;
                }
@@ -1135,10 +1114,10 @@ int bch2_mark_inode(struct btree_trans *trans,
        u64 journal_seq = trans->journal_res.seq;
 
        if (flags & BTREE_TRIGGER_INSERT) {
-               struct bch_inode_v2 *v = (struct bch_inode_v2 *) new.v;
+               struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
 
                BUG_ON(!journal_seq);
-               BUG_ON(new.k->type != KEY_TYPE_inode_v2);
+               BUG_ON(new.k->type != KEY_TYPE_inode_v3);
 
                v->bi_journal_seq = cpu_to_le64(journal_seq);
        }
@@ -1162,7 +1141,7 @@ int bch2_mark_reservation(struct btree_trans *trans,
                          unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
        struct bch_fs_usage __percpu *fs_usage;
        unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
        s64 sectors = (s64) k.k->size;
@@ -1241,7 +1220,7 @@ int bch2_mark_reflink_p(struct btree_trans *trans,
                        unsigned flags)
 {
        struct bch_fs *c = trans->c;
-       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old : new;
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
        struct reflink_gc *ref;
        size_t l, r, m;
@@ -1383,27 +1362,43 @@ need_mark:
 /* trans_mark: */
 
 static int bch2_trans_mark_pointer(struct btree_trans *trans,
-                       struct bkey_s_c k, struct extent_ptr_decoded p,
-                       s64 sectors, enum bch_data_type data_type)
+                                  enum btree_id btree_id, unsigned level,
+                                  struct bkey_s_c k, struct extent_ptr_decoded p,
+                                  unsigned flags)
 {
+       bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
        struct btree_iter iter;
        struct bkey_i_alloc_v4 *a;
+       struct bpos bucket_pos;
+       struct bch_backpointer bp;
+       s64 sectors;
        int ret;
 
-       a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
+       bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket_pos, &bp);
+       sectors = bp.bucket_len;
+       if (!insert)
+               sectors = -sectors;
+
+       a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos);
        if (IS_ERR(a))
                return PTR_ERR(a);
 
-       ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
+       ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
                             a->v.gen, &a->v.data_type,
                             &a->v.dirty_sectors, &a->v.cached_sectors);
        if (ret)
-               goto out;
+               goto err;
+
+       if (!p.ptr.cached) {
+               ret = insert
+                       ? bch2_bucket_backpointer_add(trans, a, bp, k)
+                       : bch2_bucket_backpointer_del(trans, a, bp, k);
+               if (ret)
+                       goto err;
+       }
 
        ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
-       if (ret)
-               goto out;
-out:
+err:
        bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
@@ -1465,6 +1460,7 @@ err:
 }
 
 int bch2_trans_mark_extent(struct btree_trans *trans,
+                          enum btree_id btree_id, unsigned level,
                           struct bkey_s_c old, struct bkey_i *new,
                           unsigned flags)
 {
@@ -1496,8 +1492,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans,
                if (flags & BTREE_TRIGGER_OVERWRITE)
                        disk_sectors = -disk_sectors;
 
-               ret = bch2_trans_mark_pointer(trans, k, p,
-                                       disk_sectors, data_type);
+               ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
                if (ret < 0)
                        return ret;
 
@@ -1603,6 +1598,7 @@ err:
 }
 
 int bch2_trans_mark_stripe(struct btree_trans *trans,
+                          enum btree_id btree_id, unsigned level,
                           struct bkey_s_c old, struct bkey_i *new,
                           unsigned flags)
 {
@@ -1673,6 +1669,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans,
 }
 
 int bch2_trans_mark_inode(struct btree_trans *trans,
+                         enum btree_id btree_id, unsigned level,
                          struct bkey_s_c old,
                          struct bkey_i *new,
                          unsigned flags)
@@ -1689,6 +1686,7 @@ int bch2_trans_mark_inode(struct btree_trans *trans,
 }
 
 int bch2_trans_mark_reservation(struct btree_trans *trans,
+                               enum btree_id btree_id, unsigned level,
                                struct bkey_s_c old,
                                struct bkey_i *new,
                                unsigned flags)
@@ -1790,6 +1788,7 @@ err:
 }
 
 int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+                             enum btree_id btree_id, unsigned level,
                              struct bkey_s_c old,
                              struct bkey_i *new,
                              unsigned flags)
@@ -1865,7 +1864,7 @@ int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
                                    enum bch_data_type type,
                                    unsigned sectors)
 {
-       return __bch2_trans_do(trans, NULL, NULL, 0,
+       return commit_do(trans, NULL, NULL, 0,
                        __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
 }
 
@@ -1943,8 +1942,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
 
 int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
 {
-       return bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
-                       __bch2_trans_mark_dev_sb(&trans, ca));
+       return bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca));
 }
 
 /* Disk reservations: */
@@ -2004,7 +2002,7 @@ recalculate:
                ret = 0;
        } else {
                atomic64_set(&c->sectors_available, sectors_available);
-               ret = -ENOSPC;
+               ret = -BCH_ERR_ENOSPC_disk_reservation;
        }
 
        mutex_unlock(&c->sectors_available_lock);
@@ -2114,5 +2112,5 @@ int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
                        return -ENOMEM;
        }
 
-       return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
+       return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);
 }