]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/buckets.c
Update bcachefs sources to 24c6361e20 bcachefs: Fix a trans path overflow in bch2_btr...
[bcachefs-tools-debian] / libbcachefs / buckets.c
index 7654ab24a9097bbf166025fd382e7d0645a78bc3..8af0dd022fda267b35fac41b1b797a70b4546f96 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "bcachefs.h"
 #include "alloc_background.h"
+#include "backpointers.h"
 #include "bset.h"
 #include "btree_gc.h"
 #include "btree_update.h"
@@ -197,26 +198,26 @@ void bch2_fs_usage_to_text(struct printbuf *out,
 {
        unsigned i;
 
-       pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
+       prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity);
 
-       pr_buf(out, "hidden:\t\t\t\t%llu\n",
+       prt_printf(out, "hidden:\t\t\t\t%llu\n",
               fs_usage->u.hidden);
-       pr_buf(out, "data:\t\t\t\t%llu\n",
+       prt_printf(out, "data:\t\t\t\t%llu\n",
               fs_usage->u.data);
-       pr_buf(out, "cached:\t\t\t\t%llu\n",
+       prt_printf(out, "cached:\t\t\t\t%llu\n",
               fs_usage->u.cached);
-       pr_buf(out, "reserved:\t\t\t%llu\n",
+       prt_printf(out, "reserved:\t\t\t%llu\n",
               fs_usage->u.reserved);
-       pr_buf(out, "nr_inodes:\t\t\t%llu\n",
+       prt_printf(out, "nr_inodes:\t\t\t%llu\n",
               fs_usage->u.nr_inodes);
-       pr_buf(out, "online reserved:\t\t%llu\n",
+       prt_printf(out, "online reserved:\t\t%llu\n",
               fs_usage->online_reserved);
 
        for (i = 0;
             i < ARRAY_SIZE(fs_usage->u.persistent_reserved);
             i++) {
-               pr_buf(out, "%u replicas:\n", i + 1);
-               pr_buf(out, "\treserved:\t\t%llu\n",
+               prt_printf(out, "%u replicas:\n", i + 1);
+               prt_printf(out, "\treserved:\t\t%llu\n",
                       fs_usage->u.persistent_reserved[i]);
        }
 
@@ -224,9 +225,9 @@ void bch2_fs_usage_to_text(struct printbuf *out,
                struct bch_replicas_entry *e =
                        cpu_replicas_entry(&c->replicas, i);
 
-               pr_buf(out, "\t");
+               prt_printf(out, "\t");
                bch2_replicas_entry_to_text(out, e);
-               pr_buf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
+               prt_printf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
        }
 }
 
@@ -279,9 +280,9 @@ bch2_fs_usage_read_short(struct bch_fs *c)
        return ret;
 }
 
-static inline int is_unavailable_bucket(struct bch_alloc_v4 a)
+void bch2_dev_usage_init(struct bch_dev *ca)
 {
-       return a.dirty_sectors || a.stripe;
+       ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
 }
 
 static inline int bucket_sectors_fragmented(struct bch_dev *ca,
@@ -292,24 +293,6 @@ static inline int bucket_sectors_fragmented(struct bch_dev *ca,
                : 0;
 }
 
-static inline enum bch_data_type bucket_type(struct bch_alloc_v4 a)
-{
-       return a.cached_sectors && !a.dirty_sectors
-               ? BCH_DATA_cached
-               : a.data_type;
-}
-
-static inline void account_bucket(struct bch_fs_usage *fs_usage,
-                                 struct bch_dev_usage *dev_usage,
-                                 enum bch_data_type type,
-                                 int nr, s64 size)
-{
-       if (type == BCH_DATA_sb || type == BCH_DATA_journal)
-               fs_usage->hidden        += size;
-
-       dev_usage->d[type].buckets      += nr;
-}
-
 static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
                                  struct bch_alloc_v4 old,
                                  struct bch_alloc_v4 new,
@@ -320,24 +303,25 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
 
        preempt_disable();
        fs_usage = fs_usage_ptr(c, journal_seq, gc);
-       u = dev_usage_ptr(ca, journal_seq, gc);
 
-       if (bucket_type(old))
-               account_bucket(fs_usage, u, bucket_type(old),
-                              -1, -ca->mi.bucket_size);
+       if (data_type_is_hidden(old.data_type))
+               fs_usage->hidden -= ca->mi.bucket_size;
+       if (data_type_is_hidden(new.data_type))
+               fs_usage->hidden += ca->mi.bucket_size;
+
+       u = dev_usage_ptr(ca, journal_seq, gc);
 
-       if (bucket_type(new))
-               account_bucket(fs_usage, u, bucket_type(new),
-                              1, ca->mi.bucket_size);
+       u->d[old.data_type].buckets--;
+       u->d[new.data_type].buckets++;
 
-       u->buckets_ec += (int) new.stripe - (int) old.stripe;
-       u->buckets_unavailable +=
-               is_unavailable_bucket(new) - is_unavailable_bucket(old);
+       u->buckets_ec -= (int) !!old.stripe;
+       u->buckets_ec += (int) !!new.stripe;
 
        u->d[old.data_type].sectors -= old.dirty_sectors;
        u->d[new.data_type].sectors += new.dirty_sectors;
-       u->d[BCH_DATA_cached].sectors +=
-               (int) new.cached_sectors - (int) old.cached_sectors;
+
+       u->d[BCH_DATA_cached].sectors += new.cached_sectors;
+       u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
 
        u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old);
        u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
@@ -395,10 +379,9 @@ static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
 
        idx = bch2_replicas_entry_idx(c, r);
        if (idx < 0 &&
-           (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-            fsck_err(c, "no replicas entry\n"
-                     "  while marking %s",
-                     (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
+           fsck_err(c, "no replicas entry\n"
+                    "  while marking %s",
+                    (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
                percpu_up_read(&c->mark_lock);
                ret = bch2_mark_replicas(c, r);
                percpu_down_read(&c->mark_lock);
@@ -484,7 +467,8 @@ static inline void update_replicas_list(struct btree_trans *trans,
 
        n = (void *) d->d + d->used;
        n->delta = sectors;
-       memcpy(&n->r, r, replicas_entry_bytes(r));
+       memcpy((void *) n + offsetof(struct replicas_delta, r),
+              r, replicas_entry_bytes(r));
        bch2_replicas_entry_sort(&n->r);
        d->used += b;
 }
@@ -507,14 +491,9 @@ int bch2_mark_alloc(struct btree_trans *trans,
        u64 journal_seq = trans->journal_res.seq;
        struct bch_fs *c = trans->c;
        struct bch_alloc_v4 old_a, new_a;
-       struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
+       struct bch_dev *ca;
        int ret = 0;
 
-       if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket ||
-                                      new.k->p.offset >= ca->mi.nbuckets, trans,
-                                      "alloc key outside range of device's buckets"))
-               return -EIO;
-
        /*
         * alloc btree is read in by bch2_alloc_read, not gc:
         */
@@ -522,11 +501,18 @@ int bch2_mark_alloc(struct btree_trans *trans,
            !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
                return 0;
 
+       if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
+                                      "alloc key for invalid device or bucket"))
+               return -EIO;
+
+       ca = bch_dev_bkey_exists(c, new.k->p.inode);
+
        bch2_alloc_to_v4(old, &old_a);
        bch2_alloc_to_v4(new, &new_a);
 
        if ((flags & BTREE_TRIGGER_INSERT) &&
-           !old_a.data_type != !new_a.data_type &&
+           data_type_is_empty(old_a.data_type) !=
+           data_type_is_empty(new_a.data_type) &&
            new.k->type == KEY_TYPE_alloc_v4) {
                struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
 
@@ -537,14 +523,16 @@ int bch2_mark_alloc(struct btree_trans *trans,
                 * before the bucket became empty again, then the we don't have
                 * to wait on a journal flush before we can reuse the bucket:
                 */
-               new_a.journal_seq = !new_a.data_type &&
+               new_a.journal_seq = data_type_is_empty(new_a.data_type) &&
                        (journal_seq == v->journal_seq ||
                         bch2_journal_noflush_seq(&c->journal, v->journal_seq))
                        ? 0 : journal_seq;
                v->journal_seq = new_a.journal_seq;
        }
 
-       if (old_a.data_type && !new_a.data_type && new_a.journal_seq) {
+       if (!data_type_is_empty(old_a.data_type) &&
+           data_type_is_empty(new_a.data_type) &&
+           new_a.journal_seq) {
                ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
                                c->journal.flushed_seq_ondisk,
                                new.k->p.inode, new.k->p.offset,
@@ -556,25 +544,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
                }
        }
 
-       if (!new_a.data_type &&
-           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
-               closure_wake_up(&c->freelist_wait);
-
-       if ((flags & BTREE_TRIGGER_INSERT) &&
-           BCH_ALLOC_V4_NEED_DISCARD(&new_a) &&
-           !new_a.journal_seq)
-               bch2_do_discards(c);
-
-       if (!old_a.data_type &&
-           new_a.data_type &&
-           should_invalidate_buckets(ca))
-               bch2_do_invalidates(c);
-
-       if (bucket_state(new_a) == BUCKET_need_gc_gens) {
-               atomic_inc(&c->kick_gc);
-               wake_up_process(c->gc_thread);
-       }
-
        percpu_down_read(&c->mark_lock);
        if (!gc && new_a.gen != old_a.gen)
                *bucket_gen(ca, new.k->p.offset) = new_a.gen;
@@ -612,21 +581,34 @@ int bch2_mark_alloc(struct btree_trans *trans,
                        bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
                        return ret;
                }
-
-               trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
-                                old_a.cached_sectors);
        }
 
+       if (new_a.data_type == BCH_DATA_free &&
+           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+               closure_wake_up(&c->freelist_wait);
+
+       if (new_a.data_type == BCH_DATA_need_discard &&
+           (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+               bch2_do_discards(c);
+
+       if (old_a.data_type != BCH_DATA_cached &&
+           new_a.data_type == BCH_DATA_cached &&
+           should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
+               bch2_do_invalidates(c);
+
+       if (new_a.data_type == BCH_DATA_need_gc_gens)
+               bch2_do_gc_gens(c);
+
        return 0;
 }
 
-void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
-                              size_t b, enum bch_data_type data_type,
-                              unsigned sectors, struct gc_pos pos,
-                              unsigned flags)
+int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
+                             size_t b, enum bch_data_type data_type,
+                             unsigned sectors, struct gc_pos pos,
+                             unsigned flags)
 {
        struct bucket old, new, *g;
-       bool overflow;
+       int ret = 0;
 
        BUG_ON(!(flags & BTREE_TRIGGER_GC));
        BUG_ON(data_type != BCH_DATA_sb &&
@@ -636,7 +618,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
         * Backup superblock might be past the end of our normal usable space:
         */
        if (b >= ca->mi.nbuckets)
-               return;
+               return 0;
 
        percpu_down_read(&c->mark_lock);
        g = gc_bucket(ca, b);
@@ -644,38 +626,34 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
        bucket_lock(g);
        old = *g;
 
+       if (bch2_fs_inconsistent_on(g->data_type &&
+                       g->data_type != data_type, c,
+                       "different types of data in same bucket: %s, %s",
+                       bch2_data_types[g->data_type],
+                       bch2_data_types[data_type])) {
+               ret = -EIO;
+               goto err;
+       }
+
+       if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
+                       "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size",
+                       ca->dev_idx, b, g->gen,
+                       bch2_data_types[g->data_type ?: data_type],
+                       g->dirty_sectors, sectors)) {
+               ret = -EIO;
+               goto err;
+       }
+
+
        g->data_type = data_type;
        g->dirty_sectors += sectors;
-       overflow = g->dirty_sectors < sectors;
-
        new = *g;
+err:
        bucket_unlock(g);
-
-       bch2_fs_inconsistent_on(old.data_type &&
-                               old.data_type != data_type, c,
-               "different types of data in same bucket: %s, %s",
-               bch2_data_types[old.data_type],
-               bch2_data_types[data_type]);
-
-       bch2_fs_inconsistent_on(overflow, c,
-               "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX",
-               ca->dev_idx, b, new.gen,
-               bch2_data_types[old.data_type ?: data_type],
-               old.dirty_sectors, sectors);
-
-       bch2_dev_usage_update_m(c, ca, old, new, 0, true);
+       if (!ret)
+               bch2_dev_usage_update_m(c, ca, old, new, 0, true);
        percpu_up_read(&c->mark_lock);
-}
-
-static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
-{
-       EBUG_ON(sectors < 0);
-
-       return p.crc.compression_type &&
-               p.crc.compression_type != BCH_COMPRESSION_TYPE_incompressible
-               ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
-                              p.crc.uncompressed_size)
-               : sectors;
+       return ret;
 }
 
 static int check_bucket_ref(struct bch_fs *c,
@@ -693,6 +671,9 @@ static int check_bucket_ref(struct bch_fs *c,
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
+       if (bucket_data_type == BCH_DATA_cached)
+               bucket_data_type = BCH_DATA_user;
+
        if (gen_after(ptr->gen, b_gen)) {
                bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
                        "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
@@ -737,7 +718,8 @@ static int check_bucket_ref(struct bch_fs *c,
                goto err;
        }
 
-       if (bucket_data_type && ptr_data_type &&
+       if (!data_type_is_empty(bucket_data_type) &&
+           ptr_data_type &&
            bucket_data_type != ptr_data_type) {
                bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
                        "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
@@ -808,25 +790,22 @@ static int mark_stripe_bucket(struct btree_trans *trans,
        old = *g;
 
        ret = check_bucket_ref(c, k, ptr, sectors, data_type,
-                              new.gen, new.data_type,
-                              new.dirty_sectors, new.cached_sectors);
-       if (ret) {
-               bucket_unlock(g);
+                              g->gen, g->data_type,
+                              g->dirty_sectors, g->cached_sectors);
+       if (ret)
                goto err;
-       }
 
-       new.dirty_sectors += sectors;
        if (data_type)
-               new.data_type = data_type;
+               g->data_type = data_type;
+       g->dirty_sectors += sectors;
 
        g->stripe               = k.k->p.offset;
        g->stripe_redundancy    = s->nr_redundant;
-
        new = *g;
-       bucket_unlock(g);
-
-       bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
 err:
+       bucket_unlock(g);
+       if (!ret)
+               bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
        percpu_up_read(&c->mark_lock);
        printbuf_exit(&buf);
        return ret;
@@ -872,29 +851,22 @@ static int bch2_mark_pointer(struct btree_trans *trans,
 
        percpu_down_read(&c->mark_lock);
        g = PTR_GC_BUCKET(ca, &p.ptr);
-
        bucket_lock(g);
        old = *g;
 
        bucket_data_type = g->data_type;
-
        ret = __mark_pointer(trans, k, &p.ptr, sectors,
                             data_type, g->gen,
                             &bucket_data_type,
                             &g->dirty_sectors,
                             &g->cached_sectors);
-       if (ret) {
-               bucket_unlock(g);
-               goto err;
-       }
-
-       g->data_type = bucket_data_type;
+       if (!ret)
+               g->data_type = bucket_data_type;
 
        new = *g;
        bucket_unlock(g);
-
-       bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
-err:
+       if (!ret)
+               bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
        percpu_up_read(&c->mark_lock);
 
        return ret;
@@ -1387,27 +1359,43 @@ need_mark:
 /* trans_mark: */
 
 static int bch2_trans_mark_pointer(struct btree_trans *trans,
-                       struct bkey_s_c k, struct extent_ptr_decoded p,
-                       s64 sectors, enum bch_data_type data_type)
+                                  enum btree_id btree_id, unsigned level,
+                                  struct bkey_s_c k, struct extent_ptr_decoded p,
+                                  unsigned flags)
 {
+       bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
        struct btree_iter iter;
        struct bkey_i_alloc_v4 *a;
+       struct bpos bucket_pos;
+       struct bch_backpointer bp;
+       s64 sectors;
        int ret;
 
-       a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
+       bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket_pos, &bp);
+       sectors = bp.bucket_len;
+       if (!insert)
+               sectors = -sectors;
+
+       a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos);
        if (IS_ERR(a))
                return PTR_ERR(a);
 
-       ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
+       ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
                             a->v.gen, &a->v.data_type,
                             &a->v.dirty_sectors, &a->v.cached_sectors);
        if (ret)
-               goto out;
+               goto err;
+
+       if (!p.ptr.cached) {
+               ret = insert
+                       ? bch2_bucket_backpointer_add(trans, a, bp, k)
+                       : bch2_bucket_backpointer_del(trans, a, bp, k);
+               if (ret)
+                       goto err;
+       }
 
        ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
-       if (ret)
-               goto out;
-out:
+err:
        bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
@@ -1469,6 +1457,7 @@ err:
 }
 
 int bch2_trans_mark_extent(struct btree_trans *trans,
+                          enum btree_id btree_id, unsigned level,
                           struct bkey_s_c old, struct bkey_i *new,
                           unsigned flags)
 {
@@ -1500,8 +1489,7 @@ int bch2_trans_mark_extent(struct btree_trans *trans,
                if (flags & BTREE_TRIGGER_OVERWRITE)
                        disk_sectors = -disk_sectors;
 
-               ret = bch2_trans_mark_pointer(trans, k, p,
-                                       disk_sectors, data_type);
+               ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
                if (ret < 0)
                        return ret;
 
@@ -1607,6 +1595,7 @@ err:
 }
 
 int bch2_trans_mark_stripe(struct btree_trans *trans,
+                          enum btree_id btree_id, unsigned level,
                           struct bkey_s_c old, struct bkey_i *new,
                           unsigned flags)
 {
@@ -1677,6 +1666,7 @@ int bch2_trans_mark_stripe(struct btree_trans *trans,
 }
 
 int bch2_trans_mark_inode(struct btree_trans *trans,
+                         enum btree_id btree_id, unsigned level,
                          struct bkey_s_c old,
                          struct bkey_i *new,
                          unsigned flags)
@@ -1693,6 +1683,7 @@ int bch2_trans_mark_inode(struct btree_trans *trans,
 }
 
 int bch2_trans_mark_reservation(struct btree_trans *trans,
+                               enum btree_id btree_id, unsigned level,
                                struct bkey_s_c old,
                                struct bkey_i *new,
                                unsigned flags)
@@ -1794,6 +1785,7 @@ err:
 }
 
 int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+                             enum btree_id btree_id, unsigned level,
                              struct bkey_s_c old,
                              struct bkey_i *new,
                              unsigned flags)
@@ -1869,7 +1861,7 @@ int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
                                    enum bch_data_type type,
                                    unsigned sectors)
 {
-       return __bch2_trans_do(trans, NULL, NULL, 0,
+       return commit_do(trans, NULL, NULL, 0,
                        __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
 }
 
@@ -1947,8 +1939,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
 
 int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
 {
-       return bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
-                       __bch2_trans_mark_dev_sb(&trans, ca));
+       return bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca));
 }
 
 /* Disk reservations: */
@@ -2008,7 +1999,7 @@ recalculate:
                ret = 0;
        } else {
                atomic64_set(&c->sectors_available, sectors_available);
-               ret = -ENOSPC;
+               ret = -BCH_ERR_ENOSPC_disk_reservation;
        }
 
        mutex_unlock(&c->sectors_available_lock);