]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/fsck.c
Update bcachefs sources to 72405e7ff8 bcachefs: Fix bch2_check_extents_to_backpointers()
[bcachefs-tools-debian] / libbcachefs / fsck.c
index 1a841146e379fb129ed2ce5de1c55ea147aa88a8..5e6dc6c316d12052d0bdaf85d8d7bab2a130efbe 100644 (file)
 
 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
 
+/*
+ * XXX: this is handling transaction restarts without returning
+ * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore:
+ */
 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
                                    u32 snapshot)
 {
@@ -27,14 +31,12 @@ static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum,
        u64 sectors = 0;
        int ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_extents,
-                          SPOS(inum, 0, snapshot), 0, k, ret) {
-               if (k.k->p.inode != inum)
-                       break;
-
+       for_each_btree_key_upto(trans, iter, BTREE_ID_extents,
+                               SPOS(inum, 0, snapshot),
+                               POS(inum, U64_MAX),
+                               0, k, ret)
                if (bkey_extent_is_allocation(k.k))
                        sectors += k.k->size;
-       }
 
        bch2_trans_iter_exit(trans, &iter);
 
@@ -50,11 +52,10 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
        u64 subdirs = 0;
        int ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_dirents,
-                          SPOS(inum, 0, snapshot), 0, k, ret) {
-               if (k.k->p.inode != inum)
-                       break;
-
+       for_each_btree_key_upto(trans, iter, BTREE_ID_dirents,
+                               SPOS(inum, 0, snapshot),
+                               POS(inum, U64_MAX),
+                               0, k, ret) {
                if (k.k->type != KEY_TYPE_dirent)
                        continue;
 
@@ -62,7 +63,6 @@ static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum,
                if (d.v->d_type == DT_DIR)
                        subdirs++;
        }
-
        bch2_trans_iter_exit(trans, &iter);
 
        return ret ?: subdirs;
@@ -129,7 +129,7 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
        if (ret)
                goto err;
 
-       if (!k.k || bkey_cmp(k.k->p, POS(0, inode_nr))) {
+       if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
                ret = -ENOENT;
                goto err;
        }
@@ -239,18 +239,20 @@ static int fsck_inode_rm(struct btree_trans *trans, u64 inum, u32 snapshot)
        struct bkey_s_c k;
        int ret;
 
-       ret   = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
-                                             SPOS(inum, 0, snapshot),
-                                             SPOS(inum, U64_MAX, snapshot),
-                                             0, NULL) ?:
-               bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
-                                             SPOS(inum, 0, snapshot),
-                                             SPOS(inum, U64_MAX, snapshot),
-                                             0, NULL) ?:
-               bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
-                                             SPOS(inum, 0, snapshot),
-                                             SPOS(inum, U64_MAX, snapshot),
-                                             0, NULL);
+       do {
+               ret   = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
+                                                     SPOS(inum, 0, snapshot),
+                                                     SPOS(inum, U64_MAX, snapshot),
+                                                     0, NULL) ?:
+                       bch2_btree_delete_range_trans(trans, BTREE_ID_dirents,
+                                                     SPOS(inum, 0, snapshot),
+                                                     SPOS(inum, U64_MAX, snapshot),
+                                                     0, NULL) ?:
+                       bch2_btree_delete_range_trans(trans, BTREE_ID_xattrs,
+                                                     SPOS(inum, 0, snapshot),
+                                                     SPOS(inum, U64_MAX, snapshot),
+                                                     0, NULL);
+       } while (ret == -BCH_ERR_transaction_restart_nested);
        if (ret)
                goto err;
 retry:
@@ -315,7 +317,7 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
        bch2_trans_iter_exit(trans, &iter);
 err:
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from __remove_dirent(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -500,7 +502,7 @@ static int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id
                        break;
 
                if (i->equiv == n.equiv) {
-                       bch_err(c, "adding duplicate snapshot in snapshots_seen_add()");
+                       bch_err(c, "%s(): adding duplicate snapshot", __func__);
                        return -EINVAL;
                }
        }
@@ -521,7 +523,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
        };
        int ret = 0;
 
-       if (bkey_cmp(s->pos, pos))
+       if (!bkey_eq(s->pos, pos))
                s->ids.nr = 0;
 
        pos.snapshot = n.equiv;
@@ -603,6 +605,20 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
                : bch2_snapshot_is_ancestor(c, src, dst);
 }
 
+static int ref_visible2(struct bch_fs *c,
+                       u32 src, struct snapshots_seen *src_seen,
+                       u32 dst, struct snapshots_seen *dst_seen)
+{
+       src = bch2_snapshot_equiv(c, src);
+       dst = bch2_snapshot_equiv(c, dst);
+
+       if (dst > src) {
+               swap(dst, src);
+               swap(dst_seen, src_seen);
+       }
+       return key_visible_in_snapshot(c, src_seen, dst, src);
+}
+
 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i)                              \
        for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr &&        \
             (_i)->snapshot <= (_snapshot); _i++)                                       \
@@ -722,7 +738,7 @@ static int __get_visible_inodes(struct btree_trans *trans,
 
        w->inodes.nr = 0;
 
-       for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
                           BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
                u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
 
@@ -766,9 +782,6 @@ static int hash_redo_key(struct btree_trans *trans,
                         struct bch_hash_info *hash_info,
                         struct btree_iter *k_iter, struct bkey_s_c k)
 {
-       bch_err(trans->c, "hash_redo_key() not implemented yet");
-       return -EINVAL;
-#if 0
        struct bkey_i *delete;
        struct bkey_i *tmp;
 
@@ -776,18 +789,22 @@ static int hash_redo_key(struct btree_trans *trans,
        if (IS_ERR(delete))
                return PTR_ERR(delete);
 
-       tmp = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       tmp = bch2_bkey_make_mut(trans, k);
        if (IS_ERR(tmp))
                return PTR_ERR(tmp);
 
-       bkey_reassemble(tmp, k);
-
        bkey_init(&delete->k);
        delete->k.p = k_iter->pos;
        return  bch2_btree_iter_traverse(k_iter) ?:
                bch2_trans_update(trans, k_iter, delete, 0) ?:
-               bch2_hash_set(trans, desc, hash_info, k_iter->pos.inode, tmp, 0);
-#endif
+               bch2_hash_set_snapshot(trans, desc, hash_info,
+                                      (subvol_inum) { 0, k.k->p.inode },
+                                      k.k->p.snapshot, tmp,
+                                      BCH_HASH_SET_MUST_CREATE,
+                                      BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+               bch2_trans_commit(trans, NULL, NULL,
+                                 BTREE_INSERT_NOFAIL|
+                                 BTREE_INSERT_LAZY_RW);
 }
 
 static int hash_check_key(struct btree_trans *trans,
@@ -814,9 +831,9 @@ static int hash_check_key(struct btree_trans *trans,
                goto bad_hash;
 
        for_each_btree_key_norestart(trans, iter, desc.btree_id,
-                                    POS(hash_k.k->p.inode, hash),
+                                    SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
                                     BTREE_ITER_SLOTS, k, ret) {
-               if (!bkey_cmp(k.k->p, hash_k.k->p))
+               if (bkey_eq(k.k->p, hash_k.k->p))
                        break;
 
                if (fsck_err_on(k.k->type == desc.key_type &&
@@ -839,8 +856,7 @@ out:
        printbuf_exit(&buf);
        return ret;
 bad_hash:
-       if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, "
-                    "hashed to %llu\n%s",
+       if (fsck_err(c, "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
                     bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
                     (printbuf_reset(&buf),
                      bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
@@ -938,11 +954,11 @@ static int check_inode(struct btree_trans *trans,
                                     iter->pos.snapshot),
                                POS(u.bi_inum, U64_MAX),
                                0, NULL);
-               if (ret) {
+               if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        bch_err(c, "error in fsck: error truncating inode: %s",
                                bch2_err_str(ret));
+               if (ret)
                        return ret;
-               }
 
                /*
                 * We truncated without our normal sector accounting hook, just
@@ -991,7 +1007,7 @@ static int check_inode(struct btree_trans *trans,
 err:
 fsck_err:
        if (ret)
-               bch_err(c, "error from check_inode(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1017,7 +1033,7 @@ static int check_inodes(struct bch_fs *c, bool full)
        bch2_trans_exit(&trans);
        snapshots_seen_exit(&s);
        if (ret)
-               bch_err(c, "error from check_inodes(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1150,16 +1166,108 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
        }
 fsck_err:
        if (ret)
-               bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        if (!ret && trans_was_restarted(trans, restart_count))
                ret = -BCH_ERR_transaction_restart_nested;
        return ret;
 }
 
+struct extent_end {
+       u32                     snapshot;
+       u64                     offset;
+       struct snapshots_seen   seen;
+};
+
+typedef DARRAY(struct extent_end) extent_ends;
+
+static int check_overlapping_extents(struct btree_trans *trans,
+                             struct snapshots_seen *seen,
+                             extent_ends *extent_ends,
+                             struct bkey_s_c k,
+                             struct btree_iter *iter)
+{
+       struct bch_fs *c = trans->c;
+       struct extent_end *i;
+       struct printbuf buf = PRINTBUF;
+       int ret = 0;
+
+       darray_for_each(*extent_ends, i) {
+               /* duplicate, due to transaction restart: */
+               if (i->offset   == k.k->p.offset &&
+                   i->snapshot == k.k->p.snapshot)
+                       continue;
+
+               if (!ref_visible2(c,
+                                 k.k->p.snapshot, seen,
+                                 i->snapshot, &i->seen))
+                       continue;
+
+               if (fsck_err_on(i->offset > bkey_start_offset(k.k), c,
+                               "overlapping extents: extent in snapshot %u ends at %llu overlaps with\n%s",
+                               i->snapshot,
+                               i->offset,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+                       struct bkey_i *update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+                       if ((ret = PTR_ERR_OR_ZERO(update)))
+                               goto err;
+                       bkey_reassemble(update, k);
+                       ret = bch2_trans_update_extent(trans, iter, update, 0);
+                       if (!ret)
+                               goto err;
+               }
+       }
+err:
+fsck_err:
+       printbuf_exit(&buf);
+       return ret;
+}
+
+static int extent_ends_at(extent_ends *extent_ends,
+                         struct snapshots_seen *seen,
+                         struct bkey_s_c k)
+{
+       struct extent_end *i, n = (struct extent_end) {
+               .snapshot       = k.k->p.snapshot,
+               .offset         = k.k->p.offset,
+               .seen           = *seen,
+       };
+
+       n.seen.ids.data = kmemdup(seen->ids.data,
+                             sizeof(seen->ids.data[0]) * seen->ids.size,
+                             GFP_KERNEL);
+       if (!n.seen.ids.data)
+               return -ENOMEM;
+
+       darray_for_each(*extent_ends, i) {
+               if (i->snapshot == k.k->p.snapshot) {
+                       snapshots_seen_exit(&i->seen);
+                       *i = n;
+                       return 0;
+               }
+
+               if (i->snapshot >= k.k->p.snapshot)
+                       break;
+       }
+
+       return darray_insert_item(extent_ends, i - extent_ends->data, n);
+}
+
+static void extent_ends_reset(extent_ends *extent_ends)
+{
+       struct extent_end *i;
+
+       darray_for_each(*extent_ends, i)
+               snapshots_seen_exit(&i->seen);
+
+       extent_ends->nr = 0;
+}
+
 static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        struct bkey_s_c k,
                        struct inode_walker *inode,
-                       struct snapshots_seen *s)
+                       struct snapshots_seen *s,
+                       extent_ends *extent_ends)
 {
        struct bch_fs *c = trans->c;
        struct inode_walker_entry *i;
@@ -1187,24 +1295,20 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                ret = check_i_sectors(trans, inode);
                if (ret)
                        goto err;
+
+               extent_ends_reset(extent_ends);
        }
 
        BUG_ON(!iter->path->should_be_locked);
-#if 0
-       if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
-               char buf1[200];
-               char buf2[200];
 
-               bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev.k));
-               bch2_bkey_val_to_text(&PBUF(buf2), c, k);
+       ret = check_overlapping_extents(trans, s, extent_ends, k, iter);
+       if (ret)
+               goto err;
+
+       ret = extent_ends_at(extent_ends, s, k);
+       if (ret)
+               goto err;
 
-               if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
-                       ret = fix_overlapping_extent(trans, k, prev.k->k.p)
-                               ?: -BCH_ERR_transaction_restart_nested;
-                       goto out;
-               }
-       }
-#endif
        ret = __walk_inode(trans, inode, equiv);
        if (ret < 0)
                goto err;
@@ -1249,8 +1353,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        continue;
 
                if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-                               k.k->type != KEY_TYPE_reservation &&
-                               k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
+                               k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
+                               !bkey_extent_is_reservation(k), c,
                                "extent type past end of inode %llu:%u, i_size %llu\n  %s",
                                i->inode.bi_inum, i->snapshot, i->inode.bi_size,
                                (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
@@ -1286,7 +1390,7 @@ fsck_err:
        printbuf_exit(&buf);
 
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from check_extent(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1302,13 +1406,9 @@ static int check_extents(struct bch_fs *c)
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
+       extent_ends extent_ends = { 0 };
        int ret = 0;
 
-#if 0
-       struct bkey_buf prev;
-       bch2_bkey_buf_init(&prev);
-       prev.k->k = KEY(0, 0, 0);
-#endif
        snapshots_seen_init(&s);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
@@ -1319,16 +1419,16 @@ static int check_extents(struct bch_fs *c)
                        BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
                        NULL, NULL,
                        BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
-               check_extent(&trans, &iter, k, &w, &s));
-#if 0
-       bch2_bkey_buf_exit(&prev, c);
-#endif
+               check_extent(&trans, &iter, k, &w, &s, &extent_ends));
+
+       extent_ends_reset(&extent_ends);
+       darray_exit(&extent_ends);
        inode_walker_exit(&w);
        bch2_trans_exit(&trans);
        snapshots_seen_exit(&s);
 
        if (ret)
-               bch_err(c, "error from check_extents(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1366,13 +1466,11 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
                }
        }
 fsck_err:
-       if (ret) {
-               bch_err(c, "error from check_subdir_count(): %s", bch2_err_str(ret));
-               return ret;
-       }
-       if (trans_was_restarted(trans, restart_count))
-               return -BCH_ERR_transaction_restart_nested;
-       return 0;
+       if (ret)
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
+       if (!ret && trans_was_restarted(trans, restart_count))
+               ret = -BCH_ERR_transaction_restart_nested;
+       return ret;
 }
 
 static int check_dirent_target(struct btree_trans *trans,
@@ -1490,7 +1588,7 @@ fsck_err:
        printbuf_exit(&buf);
 
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from check_target(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1660,7 +1758,7 @@ fsck_err:
        printbuf_exit(&buf);
 
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from check_dirent(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1699,7 +1797,7 @@ static int check_dirents(struct bch_fs *c)
        inode_walker_exit(&target);
 
        if (ret)
-               bch_err(c, "error from check_dirents(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1735,7 +1833,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
        ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
 fsck_err:
        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               bch_err(c, "error from check_xattr(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1767,7 +1865,7 @@ static int check_xattrs(struct bch_fs *c)
        bch2_trans_exit(&trans);
 
        if (ret)
-               bch_err(c, "error from check_xattrs(): %s", bch2_err_str(ret));
+               bch_err(c, "%s(): error %s", __func__, bch2_err_str(ret));
        return ret;
 }
 
@@ -1797,7 +1895,8 @@ static int check_root_trans(struct btree_trans *trans)
                ret = commit_do(trans, NULL, NULL,
                                      BTREE_INSERT_NOFAIL|
                                      BTREE_INSERT_LAZY_RW,
-                       __bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i));
+                       __bch2_btree_insert(trans, BTREE_ID_subvolumes,
+                                           &root_subvol.k_i, 0));
                if (ret) {
                        bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
                        goto err;
@@ -2037,7 +2136,8 @@ static int add_nlink(struct bch_fs *c, struct nlink_table *t,
 {
        if (t->nr == t->size) {
                size_t new_size = max_t(size_t, 128UL, t->size * 2);
-               void *d = kvmalloc(new_size * sizeof(t->d[0]), GFP_KERNEL);
+               void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL);
+
                if (!d) {
                        bch_err(c, "fsck: error allocating memory for nlink_table, size %zu",
                                new_size);