]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/fsck.c
Update bcachefs sources to fd6fb298aa bcachefs: Make sure hash info gets initialized...
[bcachefs-tools-debian] / libbcachefs / fsck.c
index 12f2ef4417cb31255a405d5517ee727922ff5623..4e71005777348667bd2875031b07f279e3a618eb 100644 (file)
@@ -3,6 +3,7 @@
 #include "bcachefs.h"
 #include "bkey_buf.h"
 #include "btree_update.h"
+#include "buckets.h"
 #include "darray.h"
 #include "dirent.h"
 #include "error.h"
@@ -31,14 +32,12 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
        u64 sectors = 0;
        int ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_extents,
-                          SPOS(inum, 0, snapshot), 0, k, ret) {
-               if (k.k->p.inode != inum)
-                       break;
-
+       for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
+                               SPOS(inum, 0, snapshot),
+                               POS(inum, U64_MAX),
+                               0, k, ret)
                if (bkey_extent_is_allocation(k.k))
                        sectors += k.k->size;
-       }
 
        bch2_trans_iter_exit(trans, &iter);
 
@@ -54,11 +53,10 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
        u64 subdirs = 0;
        int ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_dirents,
-                          SPOS(inum, 0, snapshot), 0, k, ret) {
-               if (k.k->p.inode != inum)
-                       break;
-
+       for_each_btree_key_upto(trans, iter, BTREE_ID_dirents,
+                               SPOS(inum, 0, snapshot),
+                               POS(inum, U64_MAX),
+                               0, k, ret) {
                if (k.k->type != KEY_TYPE_dirent)
                        continue;
 
@@ -66,7 +64,6 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
                if (d.v->d_type == DT_DIR)
                        subdirs++;
        }
-
        bch2_trans_iter_exit(trans, &iter);
 
        return ret ?: subdirs;
@@ -133,7 +130,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
        if (ret)
                goto err;
 
-       if (!k.k || bkey_cmp(k.k->p, POS(0, inode_nr))) {
+       if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
                ret = -ENOENT;
                goto err;
        }
@@ -321,7 +318,7 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
        bch2_trans_iter_exit(trans, &iter);
 err:
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from __remove_dirent(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -506,7 +503,7 @@ static int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id
                        break;
 
                if (i->equiv == n.equiv) {
-                       bch_err(c, "adding duplicate snapshot in snapshots_seen_add()");
+                       bch_err(c, "%s(): adding duplicate snapshot", __func__);
                        return -EINVAL;
                }
        }
@@ -527,7 +524,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
        };
        int ret = 0;
 
-       if (bkey_cmp(s->pos, pos))
+       if (!bkey_eq(s->pos, pos))
                s->ids.nr = 0;
 
        pos.snapshot = n.equiv;
@@ -609,6 +606,20 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
                : bch2_snapshot_is_ancestor(c, src, dst);
 }
 
+static int ref_visible2(struct bch_fs *c,
+                       u32 src, struct snapshots_seen *src_seen,
+                       u32 dst, struct snapshots_seen *dst_seen)
+{
+       src = bch2_snapshot_equiv(c, src);
+       dst = bch2_snapshot_equiv(c, dst);
+
+       if (dst > src) {
+               swap(dst, src);
+               swap(dst_seen, src_seen);
+       }
+       return key_visible_in_snapshot(c, src_seen, dst, src);
+}
+
 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i)                              \
        for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr &&        \
             (_i)->snapshot <= (_snapshot); _i++)                                       \
@@ -662,10 +673,8 @@ static int __walk_inode(struct btree_trans *trans,
 
        pos.snapshot = bch2_snapshot_equiv(c, pos.snapshot);
 
-       if (pos.inode == w->cur_inum) {
-               w->first_this_inode = false;
+       if (pos.inode == w->cur_inum)
                goto lookup_snapshot;
-       }
 
        w->inodes.nr = 0;
 
@@ -779,12 +788,10 @@ static int hash_redo_key(struct btree_trans *trans,
        if (IS_ERR(delete))
                return PTR_ERR(delete);
 
-       tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       tmp = bch2_bkey_make_mut(trans, k);
        if (IS_ERR(tmp))
                return PTR_ERR(tmp);
 
-       bkey_reassemble(tmp, k);
-
        bkey_init(&delete->k);
        delete->k.p = k_iter->pos;
        return  bch2_btree_iter_traverse(k_iter) ?:
@@ -823,9 +830,9 @@ static int hash_check_key(struct btree_trans *trans,
                goto bad_hash;
 
        for_each_btree_key_norestart(trans, iter, desc.btree_id,
-                                    POS(hash_k.k->p.inode, hash),
+                                    SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
                                     BTREE_ITER_SLOTS, k, ret) {
-               if (!bkey_cmp(k.k->p, hash_k.k->p))
+               if (bkey_eq(k.k->p, hash_k.k->p))
                        break;
 
                if (fsck_err_on(k.k->type == desc.key_type &&
@@ -848,16 +855,15 @@ out:
        printbuf_exit(&buf);
        return ret;
 bad_hash:
-       if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, "
-                    "hashed to %llu\n%s",
+       if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
                     bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
                     (printbuf_reset(&buf),
                      bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
                ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
-               if (ret) {
+               if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        bch_err(c, "hash_redo_key err %s", bch2_err_str(ret));
+               if (ret)
                        return ret;
-               }
                ret = -BCH_ERR_transaction_restart_nested;
        }
 fsck_err:
@@ -947,11 +953,11 @@ static int check_inode(struct btree_trans *trans,
                                     iter->pos.snapshot),
                                POS(u.bi_inum, U64_MAX),
                                0, NULL);
-               if (ret) {
+               if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        bch_err(c, "error in fsck: error truncating inode: %s",
                                bch2_err_str(ret));
+               if (ret)
                        return ret;
-               }
 
                /*
                 * We truncated without our normal sector accounting hook, just
@@ -1000,7 +1006,7 @@ static int check_inode(struct btree_trans *trans,
 err:
 fsck_err:
        if (ret)
-               bch_err(c, "error from check_inode(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1026,7 +1032,7 @@ static int check_inodes(struct bch_fs *c, bool full)
        bch2_trans_exit(&trans);
        snapshots_seen_exit(&s);
        if (ret)
-               bch_err(c, "error from check_inodes(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1159,16 +1165,108 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
        }
 fsck_err:
        if (ret)
-               bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        if (!ret && trans_was_restarted(trans, restart_count))
                ret = -BCH_ERR_transaction_restart_nested;
        return ret;
 }
 
+struct extent_end {
+       u32                     snapshot;
+       u64                     offset;
+       struct snapshots_seen   seen;
+};
+
+typedef DARRAY(struct extent_end) extent_ends;
+
+static int check_overlapping_extents(struct btree_trans *trans,
+                             struct snapshots_seen *seen,
+                             extent_ends *extent_ends,
+                             struct bkey_s_c k,
+                             struct btree_iter *iter)
+{
+       struct bch_fs *c = trans->c;
+       struct extent_end *i;
+       struct printbuf buf = PRINTBUF;
+       int ret = 0;
+
+       darray_for_each(*extent_ends, i) {
+               /* duplicate, due to transaction restart: */
+               if (i->offset   == k.k->p.offset &&
+                   i->snapshot == k.k->p.snapshot)
+                       continue;
+
+               if (!ref_visible2(c,
+                                 k.k->p.snapshot, seen,
+                                 i->snapshot, &i->seen))
+                       continue;
+
+               if (fsck_err_on(i->offset > bkey_start_offset(k.k), c,
+                               "overlapping extents: extent in snapshot %u ends at %llu overlaps with\n%s",
+                               i->snapshot,
+                               i->offset,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+                       struct bkey_i *update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+                       if ((ret = PTR_ERR_OR_ZERO(update)))
+                               goto err;
+                       bkey_reassemble(update, k);
+                       ret = bch2_trans_update_extent(trans, iter, update, 0);
+                       if (!ret)
+                               goto err;
+               }
+       }
+err:
+fsck_err:
+       printbuf_exit(&buf);
+       return ret;
+}
+
+static int extent_ends_at(extent_ends *extent_ends,
+                         struct snapshots_seen *seen,
+                         struct bkey_s_c k)
+{
+       struct extent_end *i, n = (struct extent_end) {
+               .snapshot       = k.k->p.snapshot,
+               .offset         = k.k->p.offset,
+               .seen           = *seen,
+       };
+
+       n.seen.ids.data = kmemdup(seen->ids.data,
+                             sizeof(seen->ids.data[0]) * seen->ids.size,
+                             GFP_KERNEL);
+       if (!n.seen.ids.data)
+               return -BCH_ERR_ENOMEM_fsck_extent_ends_at;
+
+       darray_for_each(*extent_ends, i) {
+               if (i->snapshot == k.k->p.snapshot) {
+                       snapshots_seen_exit(&i->seen);
+                       *i = n;
+                       return 0;
+               }
+
+               if (i->snapshot >= k.k->p.snapshot)
+                       break;
+       }
+
+       return darray_insert_item(extent_ends, i - extent_ends->data, n);
+}
+
+static void extent_ends_reset(extent_ends *extent_ends)
+{
+       struct extent_end *i;
+
+       darray_for_each(*extent_ends, i)
+               snapshots_seen_exit(&i->seen);
+
+       extent_ends->nr = 0;
+}
+
 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        struct bkey_s_c k,
                        struct inode_walker *inode,
-                       struct snapshots_seen *s)
+                       struct snapshots_seen *s,
+                       extent_ends *extent_ends)
 {
        struct bch_fs *c = trans->c;
        struct inode_walker_entry *i;
@@ -1196,24 +1294,20 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                ret = check_i_sectors(trans, inode);
                if (ret)
                        goto err;
+
+               extent_ends_reset(extent_ends);
        }
 
        BUG_ON(!iter->path->should_be_locked);
-#if 0
-       if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
-               char buf1[200];
-               char buf2[200];
 
-               bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
-               bch2_bkey_val_to_text(&PBUF(buf2), c, k);
+       ret = check_overlapping_extents(trans, s, extent_ends, k, iter);
+       if (ret)
+               goto err;
+
+       ret = extent_ends_at(extent_ends, s, k);
+       if (ret)
+               goto err;
 
-               if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
-                       ret = fix_overlapping_extent(trans, k, prev.k->k.p)
-                               ?: -BCH_ERR_transaction_restart_nested;
-                       goto out;
-               }
-       }
-#endif
        ret = __walk_inode(trans, inode, equiv);
        if (ret < 0)
                goto err;
@@ -1258,8 +1352,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        continue;
 
                if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-                               k.k->type != KEY_TYPE_reservation &&
-                               k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
+                               k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
+                               !bkey_extent_is_reservation(k), c,
                                "extent type past end of inode %llu:%u, i_size %llu\n  %s",
                                i->inode.bi_inum, i->snapshot, i->inode.bi_size,
                                (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
@@ -1295,7 +1389,7 @@ fsck_err:
        printbuf_exit(&buf);
 
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from check_extent(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1311,13 +1405,10 @@ static int check_extents(struct bch_fs *c)
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
+       extent_ends extent_ends = { 0 };
+       struct disk_reservation res = { 0 };
        int ret = 0;
 
-#if 0
-       struct bkey_buf prev;
-       bch2_bkey_buf_init(&prev);
-       prev.k->k = KEY(0, 0, 0);
-#endif
        snapshots_seen_init(&s);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
@@ -1326,18 +1417,21 @@ static int check_extents(struct bch_fs *c)
        ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents,
                        POS(BCACHEFS_ROOT_INO, 0),
                        BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
-                       NULL, NULL,
-                       BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-               check_extent(&trans, &iter, k, &w, &s));
-#if 0
-       bch2_bkey_buf_exit(&prev, c);
-#endif
+                       &res, NULL,
+                       BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, ({
+               bch2_disk_reservation_put(c, &res);
+               check_extent(&trans, &iter, k, &w, &s, &extent_ends);
+       }));
+
+       bch2_disk_reservation_put(c, &res);
+       extent_ends_reset(&extent_ends);
+       darray_exit(&extent_ends);
        inode_walker_exit(&w);
        bch2_trans_exit(&trans);
        snapshots_seen_exit(&s);
 
        if (ret)
-               bch_err(c, "error from check_extents(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1376,7 +1470,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
        }
 fsck_err:
        if (ret)
-               bch_err(c, "error from check_subdir_count(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        if (!ret && trans_was_restarted(trans, restart_count))
                ret = -BCH_ERR_transaction_restart_nested;
        return ret;
@@ -1497,7 +1591,7 @@ fsck_err:
        printbuf_exit(&buf);
 
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from check_target(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1543,6 +1637,10 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
        if (ret < 0)
                goto err;
 
+       if (dir->first_this_inode)
+               *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
+       dir->first_this_inode = false;
+
        if (fsck_err_on(ret == INT_MAX, c,
                        "dirent in nonexisting directory:\n%s",
                        (printbuf_reset(&buf),
@@ -1569,11 +1667,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
                goto out;
        }
 
-       if (dir->first_this_inode)
-               *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
-
-       ret = hash_check_key(trans, bch2_dirent_hash_desc,
-                            hash_info, iter, k);
+       ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k);
        if (ret < 0)
                goto err;
        if (ret) {
@@ -1667,7 +1761,7 @@ fsck_err:
        printbuf_exit(&buf);
 
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from check_dirent(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1706,7 +1800,7 @@ static int check_dirents(struct bch_fs *c)
        inode_walker_exit(&target);
 
        if (ret)
-               bch_err(c, "error from check_dirents(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1726,6 +1820,10 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
        if (ret < 0)
                return ret;
 
+       if (inode->first_this_inode)
+               *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode);
+       inode->first_this_inode = false;
+
        if (fsck_err_on(ret == INT_MAX, c,
                        "xattr for missing inode %llu",
                        k.k->p.inode))
@@ -1736,13 +1834,10 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
 
        ret = 0;
 
-       if (inode->first_this_inode)
-               *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode);
-
        ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
 fsck_err:
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from check_xattr(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1774,7 +1869,7 @@ static int check_xattrs(struct bch_fs *c)
        bch2_trans_exit(&trans);
 
        if (ret)
-               bch_err(c, "error from check_xattrs(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1804,7 +1899,8 @@ static int check_root_trans(struct btree_trans *trans)
                ret = commit_do(trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
                                      BTREE_INSERT_LAZY_RW,
-                       __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i));
+                       __bch2_btree_insert(trans, BTREE_ID_subvolumes,
+                                           &root_subvol.k_i, 0));
                if (ret) {
                        bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
                        goto err;
@@ -2044,11 +2140,12 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t,
 {
        if (t->nr == t->size) {
                size_t new_size = max_t(size_t, 128UL, t->size * 2);
-               void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
+               void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
+
                if (!d) {
                        bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
                                new_size);
-                       return -ENOMEM;
+                       return -BCH_ERR_ENOMEM_fsck_add_nlink;
                }
 
                if (t->d)