]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/extents.c
Update bcachefs sources to bdf6d7c135 fixup! bcachefs: Kill journal buf bloom filter
[bcachefs-tools-debian] / libbcachefs / extents.c
index 563e13057f5f2411cef336f4f5644becec9f058a..2e541a4f55ac73d098ea74c90776d47b726d0c94 100644 (file)
@@ -156,12 +156,16 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
 
 /* KEY_TYPE_btree_ptr: */
 
-const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
+int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                          int rw, struct printbuf *err)
 {
-       if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX)
-               return "value too big";
+       if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) {
+               pr_buf(err, "value too big (%zu > %u)",
+                      bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
+               return -EINVAL;
+       }
 
-       return bch2_bkey_ptrs_invalid(c, k);
+       return bch2_bkey_ptrs_invalid(c, k, rw, err);
 }
 
 void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
@@ -170,25 +174,35 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
        bch2_bkey_ptrs_to_text(out, c, k);
 }
 
-const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
+int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                             int rw, struct printbuf *err)
 {
        struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
 
-       if (bkey_val_bytes(k.k) <= sizeof(*bp.v))
-               return "value too small";
+       if (bkey_val_bytes(k.k) <= sizeof(*bp.v)) {
+               pr_buf(err, "value too small (%zu <= %zu)",
+                      bkey_val_bytes(k.k), sizeof(*bp.v));
+               return -EINVAL;
+       }
 
-       if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
-               return "value too big";
+       if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) {
+               pr_buf(err, "value too big (%zu > %zu)",
+                      bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
+               return -EINVAL;
+       }
 
        if (c->sb.version < bcachefs_metadata_version_snapshot &&
-           bp.v->min_key.snapshot)
-               return "invalid min_key.snapshot";
+           bp.v->min_key.snapshot) {
+               pr_buf(err, "invalid min_key.snapshot (%u != 0)",
+                      bp.v->min_key.snapshot);
+               return -EINVAL;
+       }
 
-       return bch2_bkey_ptrs_invalid(c, k);
+       return bch2_bkey_ptrs_invalid(c, k, rw, err);
 }
 
 void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
-                           struct bkey_s_c k)
+                              struct bkey_s_c k)
 {
        struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
 
@@ -220,17 +234,6 @@ void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
 
 /* KEY_TYPE_extent: */
 
-const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
-{
-       return bch2_bkey_ptrs_invalid(c, k);
-}
-
-void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
-                        struct bkey_s_c k)
-{
-       bch2_bkey_ptrs_to_text(out, c, k);
-}
-
 bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
 {
        struct bkey_ptrs   l_ptrs = bch2_bkey_ptrs(l);
@@ -303,10 +306,22 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
 
                        if (lp.crc.csum_type &&
                            lp.crc.uncompressed_size +
-                           rp.crc.uncompressed_size > c->sb.encoded_extent_max)
+                           rp.crc.uncompressed_size > (c->opts.encoded_extent_max >> 9))
                                return false;
+               }
+
+               en_l = extent_entry_next(en_l);
+               en_r = extent_entry_next(en_r);
+       }
+
+       en_l = l_ptrs.start;
+       en_r = r_ptrs.start;
+       while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
+               if (extent_entry_is_crc(en_l)) {
+                       struct bch_extent_crc_unpacked crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+                       struct bch_extent_crc_unpacked crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
 
-                       if (lp.crc.uncompressed_size + rp.crc.uncompressed_size >
+                       if (crc_l.uncompressed_size + crc_r.uncompressed_size >
                            bch2_crc_field_size_max[extent_entry_type(en_l)])
                                return false;
                }
@@ -363,17 +378,24 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
 
 /* KEY_TYPE_reservation: */
 
-const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k)
+int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                            int rw, struct printbuf *err)
 {
        struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
 
-       if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
-               return "incorrect value size";
+       if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) {
+               pr_buf(err, "incorrect value size (%zu != %zu)",
+                      bkey_val_bytes(k.k), sizeof(*r.v));
+               return -EINVAL;
+       }
 
-       if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
-               return "invalid nr_replicas";
+       if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) {
+               pr_buf(err, "invalid nr_replicas (%u)",
+                      r.v->nr_replicas);
+               return -EINVAL;
+       }
 
-       return NULL;
+       return 0;
 }
 
 void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
@@ -480,7 +502,7 @@ restart_narrow_pointers:
 
        bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
                if (can_narrow_crc(p.crc, n)) {
-                       bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
+                       __bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
                        p.ptr.offset += p.crc.offset;
                        p.crc = n;
                        bch2_extent_ptr_decoded_append(k, &p);
@@ -612,38 +634,6 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k)
        return false;
 }
 
-bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
-                               unsigned nr_replicas, bool compressed)
-{
-       struct btree_trans trans;
-       struct btree_iter *iter;
-       struct bpos end = pos;
-       struct bkey_s_c k;
-       bool ret = true;
-       int err;
-
-       end.offset += size;
-
-       bch2_trans_init(&trans, c, 0, 0);
-
-       for_each_btree_key(&trans, iter, BTREE_ID_extents, pos,
-                          BTREE_ITER_SLOTS, k, err) {
-               if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
-                       break;
-
-               if (nr_replicas > bch2_bkey_replicas(c, k) ||
-                   (!compressed && bch2_bkey_sectors_compressed(k))) {
-                       ret = false;
-                       break;
-               }
-       }
-       bch2_trans_iter_put(&trans, iter);
-
-       bch2_trans_exit(&trans);
-
-       return ret;
-}
-
 unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -817,41 +807,85 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
        return i;
 }
 
-union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
-                                          struct bch_extent_ptr *ptr)
+static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
+{
+       union bch_extent_entry *next = extent_entry_next(entry);
+
+       /* stripes have ptrs, but their layout doesn't work with this code */
+       BUG_ON(k.k->type == KEY_TYPE_stripe);
+
+       memmove_u64s_down(entry, next,
+                         (u64 *) bkey_val_end(k) - (u64 *) next);
+       k.k->u64s -= (u64 *) next - (u64 *) entry;
+}
+
+/*
+ * Returns pointer to the next entry after the one being dropped:
+ */
+union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
+                                            struct bch_extent_ptr *ptr)
 {
        struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
-       union bch_extent_entry *dst, *src, *prev;
+       union bch_extent_entry *entry = to_entry(ptr), *next;
+       union bch_extent_entry *ret = entry;
        bool drop_crc = true;
 
        EBUG_ON(ptr < &ptrs.start->ptr ||
                ptr >= &ptrs.end->ptr);
        EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
 
-       src = extent_entry_next(to_entry(ptr));
-       if (src != ptrs.end &&
-           !extent_entry_is_crc(src))
-               drop_crc = false;
-
-       dst = to_entry(ptr);
-       while ((prev = extent_entry_prev(ptrs, dst))) {
-               if (extent_entry_is_ptr(prev))
+       for (next = extent_entry_next(entry);
+            next != ptrs.end;
+            next = extent_entry_next(next)) {
+               if (extent_entry_is_crc(next)) {
                        break;
-
-               if (extent_entry_is_crc(prev)) {
-                       if (drop_crc)
-                               dst = prev;
+               } else if (extent_entry_is_ptr(next)) {
+                       drop_crc = false;
                        break;
                }
+       }
+
+       extent_entry_drop(k, entry);
+
+       while ((entry = extent_entry_prev(ptrs, entry))) {
+               if (extent_entry_is_ptr(entry))
+                       break;
 
-               dst = prev;
+               if ((extent_entry_is_crc(entry) && drop_crc) ||
+                   extent_entry_is_stripe_ptr(entry)) {
+                       ret = (void *) ret - extent_entry_bytes(entry);
+                       extent_entry_drop(k, entry);
+               }
        }
 
-       memmove_u64s_down(dst, src,
-                         (u64 *) ptrs.end - (u64 *) src);
-       k.k->u64s -= (u64 *) src - (u64 *) dst;
+       return ret;
+}
 
-       return dst;
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
+                                          struct bch_extent_ptr *ptr)
+{
+       bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
+       union bch_extent_entry *ret =
+               __bch2_bkey_drop_ptr(k, ptr);
+
+       /*
+        * If we deleted all the dirty pointers and there's still cached
+        * pointers, we could set the cached pointers to dirty if they're not
+        * stale - but to do that correctly we'd need to grab an open_bucket
+        * reference so that we don't race with bucket reuse:
+        */
+       if (have_dirty &&
+           !bch2_bkey_dirty_devs(k.s_c).nr) {
+               k.k->type = KEY_TYPE_error;
+               set_bkey_val_u64s(k.k, 0);
+               ret = NULL;
+       } else if (!bch2_bkey_nr_ptrs(k.s_c)) {
+               k.k->type = KEY_TYPE_deleted;
+               set_bkey_val_u64s(k.k, 0);
+               ret = NULL;
+       }
+
+       return ret;
 }
 
 void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
@@ -921,10 +955,6 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
                ptr->cached &&
                ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
 
-       /* will only happen if all pointers were cached: */
-       if (!bch2_bkey_nr_ptrs(k.s_c))
-               k.k->type = KEY_TYPE_deleted;
-
        return bkey_deleted(k.k);
 }
 
@@ -946,27 +976,37 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
                switch (__extent_entry_type(entry)) {
                case BCH_EXTENT_ENTRY_ptr:
                        ptr = entry_to_ptr(entry);
-                       ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+                       ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
                                ? bch_dev_bkey_exists(c, ptr->dev)
                                : NULL;
 
-                       pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev,
-                              (u64) ptr->offset, ptr->gen,
-                              ptr->cached ? " cached" : "",
-                              ca && ptr_stale(ca, ptr)
-                              ? " stale" : "");
+                       if (!ca) {
+                               pr_buf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+                                      (u64) ptr->offset, ptr->gen,
+                                      ptr->cached ? " cached" : "");
+                       } else {
+                               u32 offset;
+                               u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
+
+                               pr_buf(out, "ptr: %u:%llu:%u gen %u%s", ptr->dev,
+                                      b, offset, ptr->gen,
+                                      ptr->cached ? " cached" : "");
+
+                               if (ca && ptr_stale(ca, ptr))
+                                       pr_buf(out, " stale");
+                       }
                        break;
                case BCH_EXTENT_ENTRY_crc32:
                case BCH_EXTENT_ENTRY_crc64:
                case BCH_EXTENT_ENTRY_crc128:
                        crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
 
-                       pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
+                       pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
                               crc.compressed_size,
                               crc.uncompressed_size,
                               crc.offset, crc.nonce,
-                              crc.csum_type,
-                              crc.compression_type);
+                              bch2_csum_types[crc.csum_type],
+                              bch2_compression_types[crc.compression_type]);
                        break;
                case BCH_EXTENT_ENTRY_stripe_ptr:
                        ec = &entry->stripe_ptr;
@@ -983,69 +1023,86 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
        }
 }
 
-static const char *extent_ptr_invalid(const struct bch_fs *c,
-                                     struct bkey_s_c k,
-                                     const struct bch_extent_ptr *ptr,
-                                     unsigned size_ondisk,
-                                     bool metadata)
+static int extent_ptr_invalid(const struct bch_fs *c,
+                             struct bkey_s_c k,
+                             const struct bch_extent_ptr *ptr,
+                             unsigned size_ondisk,
+                             bool metadata,
+                             struct printbuf *err)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const struct bch_extent_ptr *ptr2;
+       u64 bucket;
+       u32 bucket_offset;
        struct bch_dev *ca;
 
-       if (!bch2_dev_exists2(c, ptr->dev))
-               return "pointer to invalid device";
+       if (!bch2_dev_exists2(c, ptr->dev)) {
+               pr_buf(err, "pointer to invalid device (%u)", ptr->dev);
+               return -EINVAL;
+       }
 
        ca = bch_dev_bkey_exists(c, ptr->dev);
-       if (!ca)
-               return "pointer to invalid device";
-
        bkey_for_each_ptr(ptrs, ptr2)
-               if (ptr != ptr2 && ptr->dev == ptr2->dev)
-                       return "multiple pointers to same device";
+               if (ptr != ptr2 && ptr->dev == ptr2->dev) {
+                       pr_buf(err, "multiple pointers to same device (%u)", ptr->dev);
+                       return -EINVAL;
+               }
 
-       if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
-               return "offset past end of device";
+       bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset);
 
-       if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
-               return "offset before first bucket";
+       if (bucket >= ca->mi.nbuckets) {
+               pr_buf(err, "pointer past last bucket (%llu > %llu)",
+                      bucket, ca->mi.nbuckets);
+               return -EINVAL;
+       }
 
-       if (bucket_remainder(ca, ptr->offset) +
-           size_ondisk > ca->mi.bucket_size)
-               return "spans multiple buckets";
+       if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) {
+               pr_buf(err, "pointer before first bucket (%llu < %u)",
+                      bucket, ca->mi.first_bucket);
+               return -EINVAL;
+       }
 
-       return NULL;
+       if (bucket_offset + size_ondisk > ca->mi.bucket_size) {
+               pr_buf(err, "pointer spans multiple buckets (%u + %u > %u)",
+                      bucket_offset, size_ondisk, ca->mi.bucket_size);
+               return -EINVAL;
+       }
+
+       return 0;
 }
 
-const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
+int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
+                          int rw, struct printbuf *err)
 {
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-       struct bch_devs_list devs;
        const union bch_extent_entry *entry;
        struct bch_extent_crc_unpacked crc;
        unsigned size_ondisk = k.k->size;
-       const char *reason;
        unsigned nonce = UINT_MAX;
-       unsigned i;
+       int ret;
 
-       if (k.k->type == KEY_TYPE_btree_ptr ||
-           k.k->type == KEY_TYPE_btree_ptr_v2)
-               size_ondisk = c->opts.btree_node_size;
+       if (bkey_is_btree_ptr(k.k))
+               size_ondisk = btree_sectors(c);
 
        bkey_extent_entry_for_each(ptrs, entry) {
-               if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
-                       return "invalid extent entry type";
+               if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) {
+                       pr_buf(err, "invalid extent entry type (got %u, max %u)",
+                              __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX);
+                       return -EINVAL;
+               }
 
-               if (k.k->type == KEY_TYPE_btree_ptr &&
-                   !extent_entry_is_ptr(entry))
-                       return "has non ptr field";
+               if (bkey_is_btree_ptr(k.k) &&
+                   !extent_entry_is_ptr(entry)) {
+                       pr_buf(err, "has non ptr field");
+                       return -EINVAL;
+               }
 
                switch (extent_entry_type(entry)) {
                case BCH_EXTENT_ENTRY_ptr:
-                       reason = extent_ptr_invalid(c, k, &entry->ptr,
-                                                   size_ondisk, false);
-                       if (reason)
-                               return reason;
+                       ret = extent_ptr_invalid(c, k, &entry->ptr, size_ondisk,
+                                                false, err);
+                       if (ret)
+                               return ret;
                        break;
                case BCH_EXTENT_ENTRY_crc32:
                case BCH_EXTENT_ENTRY_crc64:
@@ -1053,22 +1110,30 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
                        crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
 
                        if (crc.offset + crc.live_size >
-                           crc.uncompressed_size)
-                               return "checksum offset + key size > uncompressed size";
+                           crc.uncompressed_size) {
+                               pr_buf(err, "checksum offset + key size > uncompressed size");
+                               return -EINVAL;
+                       }
 
                        size_ondisk = crc.compressed_size;
 
-                       if (!bch2_checksum_type_valid(c, crc.csum_type))
-                               return "invalid checksum type";
+                       if (!bch2_checksum_type_valid(c, crc.csum_type)) {
+                               pr_buf(err, "invalid checksum type");
+                               return -EINVAL;
+                       }
 
-                       if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR)
-                               return "invalid compression type";
+                       if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR) {
+                               pr_buf(err, "invalid compression type");
+                               return -EINVAL;
+                       }
 
                        if (bch2_csum_type_is_encryption(crc.csum_type)) {
                                if (nonce == UINT_MAX)
                                        nonce = crc.offset + crc.nonce;
-                               else if (nonce != crc.offset + crc.nonce)
-                                       return "incorrect nonce";
+                               else if (nonce != crc.offset + crc.nonce) {
+                                       pr_buf(err, "incorrect nonce");
+                                       return -EINVAL;
+                               }
                        }
                        break;
                case BCH_EXTENT_ENTRY_stripe_ptr:
@@ -1076,13 +1141,7 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
                }
        }
 
-       devs = bch2_bkey_devs(k);
-       bubble_sort(devs.devs, devs.nr, u8_cmp);
-       for (i = 0; i + 1 < devs.nr; i++)
-               if (devs.devs[i] == devs.devs[i + 1])
-                       return "multiple ptrs to same device";
-
-       return NULL;
+       return 0;
 }
 
 void bch2_ptr_swab(struct bkey_s k)