]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to bee7b5a4fa21 bcachefs: Pin btree cache in ram for random...
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 10 Feb 2024 02:30:46 +0000 (21:30 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 10 Feb 2024 02:32:46 +0000 (21:32 -0500)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
36 files changed:
.bcachefs_revision
include/linux/sort.h
libbcachefs/backpointers.c
libbcachefs/bbpos_types.h
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/bkey_methods.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_types.h
libbcachefs/btree_update.c
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/buckets.c
libbcachefs/dirent.c
libbcachefs/dirent.h
libbcachefs/errcode.h
libbcachefs/error.c
libbcachefs/error.h
libbcachefs/fs-common.c
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/lru.c
libbcachefs/opts.h
libbcachefs/recovery.c
libbcachefs/recovery_types.h
libbcachefs/sb-downgrade.c
libbcachefs/sb-errors_types.h
libbcachefs/subvolume.c
libbcachefs/subvolume.h
libbcachefs/subvolume_format.h
linux/mean_and_variance.c

index d3c500bc50343ac2b9dce45aa7b5f7a9c93ede57..1aa1789b35452cd6d17decc9ef3aecd2aca32ae9 100644 (file)
@@ -1 +1 @@
-50847e296b34efabe199e408ec4d72f10a866c39
+bee7b5a4fa2135c9ec9d1c9424018ee494500bb5
index afea044576e3e8799c9312b5a13710a91d9546f1..17c6ba3487f2379ab77838de818c6f59781d6a43 100644 (file)
@@ -2,6 +2,12 @@
 #define _LINUX_SORT_H
 
 #include <stdlib.h>
+#include <linux/types.h>
+
+void sort_r(void *base, size_t num, size_t size,
+           cmp_r_func_t cmp_func,
+           swap_r_func_t swap_func,
+           const void *priv);
 
 static inline void sort(void *base, size_t num, size_t size,
                        int (*cmp_func)(const void *, const void *),
index b4dc319bcb2bc0a5363e74f6d2096d3b5652599d..23fe9378fb40aa76d39d58e176614de50bd078e1 100644 (file)
@@ -129,8 +129,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
        printbuf_exit(&buf);
 
        if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
-               bch2_inconsistent_error(c);
-               return -EIO;
+               return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
        } else {
                return 0;
        }
@@ -553,60 +552,61 @@ static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
        };
 }
 
-static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+static u64 mem_may_pin_bytes(struct bch_fs *c)
 {
        struct sysinfo i;
-       u64 mem_bytes;
-
        si_meminfo(&i);
-       mem_bytes = i.totalram * i.mem_unit;
-       return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
+
+       u64 mem_bytes = i.totalram * i.mem_unit;
+       return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
+}
+
+static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+{
+       return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
 }
 
 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
-                                       unsigned btree_leaf_mask,
-                                       unsigned btree_interior_mask,
+                                       u64 btree_leaf_mask,
+                                       u64 btree_interior_mask,
                                        struct bbpos start, struct bbpos *end)
 {
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
-       enum btree_id btree;
+       struct bch_fs *c = trans->c;
+       s64 mem_may_pin = mem_may_pin_bytes(c);
        int ret = 0;
 
-       for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
-               unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
+       btree_interior_mask |= btree_leaf_mask;
+
+       c->btree_cache.pinned_nodes_leaf_mask           = btree_leaf_mask;
+       c->btree_cache.pinned_nodes_interior_mask       = btree_interior_mask;
+       c->btree_cache.pinned_nodes_start               = start;
+       c->btree_cache.pinned_nodes_end                 = *end = BBPOS_MAX;
+
+       for (enum btree_id btree = start.btree;
+            btree < BTREE_ID_NR && !ret;
+            btree++) {
+               unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
+               struct btree_iter iter;
+               struct btree *b;
 
                if (!((1U << btree) & btree_leaf_mask) &&
                    !((1U << btree) & btree_interior_mask))
                        continue;
 
-               bch2_trans_node_iter_init(trans, &iter, btree,
-                                         btree == start.btree ? start.pos : POS_MIN,
-                                         0, depth, 0);
-               /*
-                * for_each_btree_key_contineu() doesn't check the return value
-                * from bch2_btree_iter_advance(), which is needed when
-                * iterating over interior nodes where we'll see keys at
-                * SPOS_MAX:
-                */
-               do {
-                       k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
-                       ret = bkey_err(k);
-                       if (!k.k || ret)
-                               break;
-
-                       --btree_nodes;
-                       if (!btree_nodes) {
-                               *end = BBPOS(btree, k.k->p);
+               __for_each_btree_node(trans, iter, btree,
+                                     btree == start.btree ? start.pos : POS_MIN,
+                                     0, depth, BTREE_ITER_PREFETCH, b, ret) {
+                       mem_may_pin -= btree_buf_bytes(b);
+                       if (mem_may_pin <= 0) {
+                               c->btree_cache.pinned_nodes_end = *end =
+                                       BBPOS(btree, b->key.k.p);
                                bch2_trans_iter_exit(trans, &iter);
                                return 0;
                        }
-               } while (bch2_btree_iter_advance(&iter));
+               }
                bch2_trans_iter_exit(trans, &iter);
        }
 
-       *end = BBPOS_MAX;
        return ret;
 }
 
@@ -664,62 +664,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
        return 0;
 }
 
-static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
-                                        struct bpos bucket)
-{
-       return bch2_dev_exists2(c, bucket.inode)
-               ? bucket_pos_to_bp(c, bucket, 0)
-               : bucket;
-}
-
-static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
-                                       struct bpos start, struct bpos *end)
-{
-       struct btree_iter alloc_iter;
-       struct btree_iter bp_iter;
-       struct bkey_s_c alloc_k, bp_k;
-       size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
-       bool alloc_end = false, bp_end = false;
-       int ret = 0;
-
-       bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
-                                 start, 0, 1, 0);
-       bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
-                                 bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
-       while (1) {
-               alloc_k = !alloc_end
-                       ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
-                       : bkey_s_c_null;
-               bp_k = !bp_end
-                       ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
-                       : bkey_s_c_null;
-
-               ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
-               if ((!alloc_k.k && !bp_k.k) || ret) {
-                       *end = SPOS_MAX;
-                       break;
-               }
-
-               --btree_nodes;
-               if (!btree_nodes) {
-                       *end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
-                       break;
-               }
-
-               if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
-                   bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
-                       if (!bch2_btree_iter_advance(&alloc_iter))
-                               alloc_end = true;
-               } else {
-                       if (!bch2_btree_iter_advance(&bp_iter))
-                               bp_end = true;
-               }
-       }
-       bch2_trans_iter_exit(trans, &bp_iter);
-       bch2_trans_iter_exit(trans, &alloc_iter);
-       return ret;
-}
-
 int bch2_check_extents_to_backpointers(struct bch_fs *c)
 {
        struct btree_trans *trans = bch2_trans_get(c);
@@ -730,10 +674,16 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
        bkey_init(&s.last_flushed.k->k);
 
        while (1) {
-               ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end);
+               struct bbpos end;
+               ret = bch2_get_btree_in_memory_pos(trans,
+                               BIT_ULL(BTREE_ID_backpointers),
+                               BIT_ULL(BTREE_ID_backpointers),
+                               BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
                if (ret)
                        break;
 
+               s.bucket_end = end.pos;
+
                if ( bpos_eq(s.bucket_start, POS_MIN) &&
                    !bpos_eq(s.bucket_end, SPOS_MAX))
                        bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
@@ -761,6 +711,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
        bch2_trans_put(trans);
        bch2_bkey_buf_exit(&s.last_flushed, c);
 
+       c->btree_cache.pinned_nodes_leaf_mask = 0;
+       c->btree_cache.pinned_nodes_interior_mask = 0;
+
        bch_err_fn(c, ret);
        return ret;
 }
@@ -866,6 +819,9 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
        }
        bch2_trans_put(trans);
 
+       c->btree_cache.pinned_nodes_leaf_mask = 0;
+       c->btree_cache.pinned_nodes_interior_mask = 0;
+
        bch_err_fn(c, ret);
        return ret;
 }
index 5198e94cf3b89c09f88c1304bc1aa2ff5f7cc35a..f63893344f80aa721554ba3f95124cfc824edbee 100644 (file)
@@ -13,6 +13,6 @@ static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
 }
 
 #define BBPOS_MIN      BBPOS(0, POS_MIN)
-#define BBPOS_MAX      BBPOS(BTREE_ID_NR - 1, POS_MAX)
+#define BBPOS_MAX      BBPOS(BTREE_ID_NR - 1, SPOS_MAX)
 
 #endif /* _BCACHEFS_BBPOS_TYPES_H */
index 70369495be335f14c677d14fb05d8d6bb0e22a84..b53b321b687f680c7a5533c1c78eed8d0a0c5631 100644 (file)
@@ -505,6 +505,7 @@ enum gc_phase {
        GC_PHASE_BTREE_deleted_inodes,
        GC_PHASE_BTREE_logged_ops,
        GC_PHASE_BTREE_rebalance_work,
+       GC_PHASE_BTREE_subvolume_children,
 
        GC_PHASE_PENDING_DELETE,
 };
index 14f613617913e1a3ef0e93c51caa041041f822c2..1bb24aa7352800a9660c513c028a865055e61ab0 100644 (file)
@@ -840,7 +840,9 @@ struct bch_sb_field_downgrade {
        x(snapshot_skiplists,           BCH_VERSION(1,  1))             \
        x(deleted_inodes,               BCH_VERSION(1,  2))             \
        x(rebalance_work,               BCH_VERSION(1,  3))             \
-       x(member_seq,                   BCH_VERSION(1,  4))
+       x(member_seq,                   BCH_VERSION(1,  4))             \
+       x(subvolume_fs_parent,          BCH_VERSION(1,  5))             \
+       x(btree_subvolume_children,     BCH_VERSION(1,  6))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
@@ -1488,7 +1490,9 @@ enum btree_id_flags {
          BIT_ULL(KEY_TYPE_logged_op_truncate)|                                 \
          BIT_ULL(KEY_TYPE_logged_op_finsert))                                  \
        x(rebalance_work,       18,     BTREE_ID_SNAPSHOT_FIELD,                \
-         BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
+         BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))                       \
+       x(subvolume_children,   19,     0,                                      \
+         BIT_ULL(KEY_TYPE_set))
 
 enum btree_id {
 #define x(name, nr, ...) BTREE_ID_##name = nr,
index 03efe8ee565a90672367c2146e3ff44ceb0db526..f8217b24c9fd1a8a21f15f7dc4575a86937c2112 100644 (file)
@@ -78,6 +78,7 @@ bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 
 enum btree_update_flags {
        __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
+       __BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE,
        __BTREE_UPDATE_NOJOURNAL,
        __BTREE_UPDATE_KEY_CACHE_RECLAIM,
 
@@ -91,6 +92,8 @@ enum btree_update_flags {
 };
 
 #define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
+#define BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE     \
+                                       (1U << __BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE)
 #define BTREE_UPDATE_NOJOURNAL         (1U << __BTREE_UPDATE_NOJOURNAL)
 #define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
 
index 9b7ea1227069e6d73d53ef15fa0d1ee3afaadd5e..799750464969a57b158247cfc0259c0a7f5d84c7 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bbpos.h"
 #include "bkey_buf.h"
 #include "btree_cache.h"
 #include "btree_io.h"
@@ -208,6 +209,18 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
        int ret = 0;
 
        lockdep_assert_held(&bc->lock);
+
+       struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
+
+       u64 mask = b->c.level
+               ? bc->pinned_nodes_interior_mask
+               : bc->pinned_nodes_leaf_mask;
+
+       if ((mask & BIT_ULL(b->c.btree_id)) &&
+           bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
+           bbpos_cmp(bc->pinned_nodes_end, pos) >= 0)
+               return -BCH_ERR_ENOMEM_btree_node_reclaim;
+
 wait_on_io:
        if (b->flags & ((1U << BTREE_NODE_dirty)|
                        (1U << BTREE_NODE_read_in_flight)|
@@ -905,7 +918,7 @@ retry:
 
        if (unlikely(btree_node_read_error(b))) {
                six_unlock_type(&b->c.lock, lock_type);
-               return ERR_PTR(-EIO);
+               return ERR_PTR(-BCH_ERR_btree_node_read_error);
        }
 
        EBUG_ON(b->c.btree_id != path->btree_id);
@@ -996,7 +1009,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
 
        if (unlikely(btree_node_read_error(b))) {
                six_unlock_type(&b->c.lock, lock_type);
-               return ERR_PTR(-EIO);
+               return ERR_PTR(-BCH_ERR_btree_node_read_error);
        }
 
        EBUG_ON(b->c.btree_id != path->btree_id);
@@ -1079,7 +1092,7 @@ lock_node:
 
        if (unlikely(btree_node_read_error(b))) {
                six_unlock_read(&b->c.lock);
-               b = ERR_PTR(-EIO);
+               b = ERR_PTR(-BCH_ERR_btree_node_read_error);
                goto out;
        }
 
index eb92526bb9b64cee6468f3b35a908e9807d85403..6c52f116098f7d24723771d79e5374880d743a56 100644 (file)
@@ -407,7 +407,7 @@ again:
                printbuf_reset(&buf);
                bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
 
-               if (mustfix_fsck_err_on(ret == -EIO, c,
+               if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), c,
                                btree_node_unreadable,
                                "Topology repair: unreadable btree node at btree %s level %u:\n"
                                "  %s",
@@ -979,7 +979,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
                                                false);
                        ret = PTR_ERR_OR_ZERO(child);
 
-                       if (ret == -EIO) {
+                       if (bch2_err_matches(ret, EIO)) {
                                bch2_topology_error(c);
 
                                if (__fsck_err(c,
index 61b6093805eaf2fc433e49dec5d4ad7228b352d7..86415701b824076f10fc79e8bee220ed7075e328 100644 (file)
@@ -581,8 +581,7 @@ static int __btree_err(int ret,
                break;
        case -BCH_ERR_btree_node_read_err_bad_node:
                bch2_print_string_as_lines(KERN_ERR, out.buf);
-               bch2_topology_error(c);
-               ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO;
+               ret = bch2_topology_error(c);
                break;
        case -BCH_ERR_btree_node_read_err_incompatible:
                bch2_print_string_as_lines(KERN_ERR, out.buf);
@@ -1737,7 +1736,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
                list_move(&b->list, &c->btree_cache.freeable);
                mutex_unlock(&c->btree_cache.lock);
 
-               ret = -EIO;
+               ret = -BCH_ERR_btree_node_read_error;
                goto err;
        }
 
@@ -1841,7 +1840,7 @@ static void btree_node_write_work(struct work_struct *work)
                bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
 
        if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
-               ret = -BCH_ERR_btree_write_all_failed;
+               ret = -BCH_ERR_btree_node_write_all_failed;
                goto err;
        }
 
index 3aac6ed5446ebd8d322d37d67276b41215150a36..07b1de5cdee6e62325b5657d33313e7688e47783 100644 (file)
@@ -2303,7 +2303,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
                btree_iter_path(trans, iter)->level);
 
        if (iter->flags & BTREE_ITER_WITH_JOURNAL)
-               return bkey_s_c_err(-EIO);
+               return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported);
 
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
@@ -2501,6 +2501,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                        k = bch2_btree_iter_peek_upto(&iter2, end);
 
                        if (k.k && !bkey_err(k)) {
+                               swap(iter->key_cache_path, iter2.key_cache_path);
                                iter->k = iter2.k;
                                k.k = &iter->k;
                        }
@@ -2760,6 +2761,9 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
        struct btree_trans *trans = src->trans;
 
        *dst = *src;
+#ifdef TRACK_PATH_ALLOCATED
+       dst->ip_allocated = _RET_IP_;
+#endif
        if (src->path)
                __btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
        if (src->update_path)
index 0d5eecbd3e9cfb92d95ff9215ad0710bae7fb54b..b2ebf143c3b7d648f3fe0a90d5f046b7c854323f 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 
+#include "bbpos_types.h"
 #include "btree_key_cache_types.h"
 #include "buckets_types.h"
 #include "errcode.h"
@@ -173,6 +174,11 @@ struct btree_cache {
         */
        struct task_struct      *alloc_lock;
        struct closure_waitlist alloc_wait;
+
+       struct bbpos            pinned_nodes_start;
+       struct bbpos            pinned_nodes_end;
+       u64                     pinned_nodes_leaf_mask;
+       u64                     pinned_nodes_interior_mask;
 };
 
 struct btree_node_iter {
@@ -654,6 +660,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
         BIT_ULL(BKEY_TYPE_inodes)|                     \
         BIT_ULL(BKEY_TYPE_stripes)|                    \
         BIT_ULL(BKEY_TYPE_reflink)|                    \
+        BIT_ULL(BKEY_TYPE_subvolumes)|                 \
         BIT_ULL(BKEY_TYPE_btree))
 
 #define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS            \
@@ -727,7 +734,7 @@ struct btree_root {
        __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
        u8                      level;
        u8                      alive;
-       s                     error;
+       s16                     error;
 };
 
 enum btree_gc_coalesce_fail_reason {
index e5193116b092f6b7120ac2f7e3c16f09846f2f59..d3d625d4977aaa7c8fdff4e1c0d8ac6ee72818db 100644 (file)
@@ -82,40 +82,169 @@ static noinline int extent_back_merge(struct btree_trans *trans,
        return 0;
 }
 
+static struct bkey_s_c peek_slot_including_whiteouts(struct btree_trans *trans, struct btree_iter *iter,
+                                                    enum btree_id btree, struct bpos pos)
+{
+       struct bkey_s_c k;
+       int ret;
+
+       for_each_btree_key_norestart(trans, *iter, btree, pos,
+                          BTREE_ITER_ALL_SNAPSHOTS|
+                          BTREE_ITER_NOPRESERVE, k, ret) {
+               if (!bkey_eq(k.k->p, pos))
+                       break;
+               if (bch2_snapshot_is_ancestor(trans->c, pos.snapshot, k.k->p.snapshot))
+                       return k;
+       }
+       bch2_trans_iter_exit(trans, iter);
+
+       return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
+}
+
 /*
  * When deleting, check if we need to emit a whiteout (because we're overwriting
  * something in an ancestor snapshot)
  */
-static int need_whiteout_for_snapshot(struct btree_trans *trans,
-                                     enum btree_id btree_id, struct bpos pos)
+static int need_whiteout_for_snapshot(struct btree_trans *trans, enum btree_id btree, struct bpos pos)
+{
+       pos.snapshot = bch2_snapshot_parent(trans->c, pos.snapshot);
+       if (!pos.snapshot)
+               return 0;
+
+       struct btree_iter iter;
+       struct bkey_s_c k = peek_slot_including_whiteouts(trans, &iter, btree, pos);
+       int ret = bkey_err(k) ?: k.k && !bkey_whiteout(k.k);
+       bch2_trans_iter_exit(trans, &iter);
+
+       return ret;
+}
+
+/*
+ * We're overwriting a key at @pos in snapshot @snapshot, so we need to insert a
+ * whiteout: that might be in @snapshot, or if there are overwites in sibling
+ * snapshots, find the common ancestor where @pos is overwritten in every
+ * descendent and insert the whiteout there - which might be at @pos.
+ */
+static int delete_interior_snapshot_key(struct btree_trans *trans,
+                                       enum btree_id btree,
+                                       struct bpos whiteout, bool deleting,
+                                       struct bpos overwrite, bool old_is_whiteout)
 {
+       struct bch_fs *c = trans->c;
+       struct bpos orig_whiteout = whiteout, sib = whiteout;
        struct btree_iter iter;
        struct bkey_s_c k;
-       u32 snapshot = pos.snapshot;
        int ret;
 
-       if (!bch2_snapshot_parent(trans->c, pos.snapshot))
-               return 0;
+       sib.snapshot = bch2_snapshot_sibling(c, sib.snapshot);
 
-       pos.snapshot++;
+       for_each_btree_key_norestart(trans, iter, btree, sib,
+                                    BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_INTENT, k, ret) {
+               BUG_ON(bpos_gt(k.k->p, overwrite));
 
-       for_each_btree_key_norestart(trans, iter, btree_id, pos,
-                          BTREE_ITER_ALL_SNAPSHOTS|
-                          BTREE_ITER_NOPRESERVE, k, ret) {
-               if (!bkey_eq(k.k->p, pos))
+               if (bpos_lt(k.k->p, sib)) /* unrelated branch - skip */
+                       continue;
+               if (bpos_gt(k.k->p, sib)) /* did not find @sib */
                        break;
 
-               if (bch2_snapshot_is_ancestor(trans->c, snapshot,
-                                             k.k->p.snapshot)) {
-                       ret = !bkey_whiteout(k.k);
+               /* @overwrite is also written in @sib, now check parent */
+               whiteout.snapshot = bch2_snapshot_parent(c, whiteout.snapshot);
+               if (bpos_eq(whiteout, overwrite))
                        break;
-               }
+
+               sib = whiteout;
+               sib.snapshot = bch2_snapshot_sibling(c, sib.snapshot);
        }
-       bch2_trans_iter_exit(trans, &iter);
 
+       if (ret)
+               goto err;
+
+       if (!deleting && bpos_eq(whiteout, orig_whiteout))
+               goto out;
+
+       if (!bpos_eq(iter.pos, whiteout)) {
+               bch2_trans_iter_exit(trans, &iter);
+               bch2_trans_iter_init(trans, &iter, btree, whiteout, BTREE_ITER_INTENT);
+               k = bch2_btree_iter_peek_slot(&iter);
+               ret = bkey_err(k);
+               if (ret)
+                       goto err;
+       }
+
+       iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
+       iter.flags |= BTREE_ITER_FILTER_SNAPSHOTS;
+
+       struct bkey_i *delete = bch2_trans_kmalloc(trans, sizeof(*delete));
+       ret = PTR_ERR_OR_ZERO(delete);
+       if (ret)
+               goto err;
+
+       bkey_init(&delete->k);
+       delete->k.p = whiteout;
+
+       ret = !bpos_eq(whiteout, overwrite)
+               ? !old_is_whiteout
+               : need_whiteout_for_snapshot(trans, btree, whiteout);
+       if (ret < 0)
+               goto err;
+       if (ret)
+               delete->k.type = KEY_TYPE_whiteout;
+
+       ret = bch2_trans_update(trans, &iter, delete,
+                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+                               BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE);
+out:
+err:
+       bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
 
+/*
+ * We're overwriting a key in a snapshot that has ancestors: if we're
+ * overwriting a key in a different snapshot, we need to check if it is now
+ * fully overritten and can be deleted, and if we're deleting a key in the
+ * current snapshot we need to check if we need to leave a whiteout.
+ */
+static noinline int
+overwrite_interior_snapshot_key(struct btree_trans *trans,
+                               struct btree_iter *iter,
+                               struct bkey_i *k)
+{
+       struct bkey_s_c old = bch2_btree_iter_peek_slot(iter);
+
+       int ret = bkey_err(old);
+       if (ret)
+               return ret;
+
+       if (!bkey_deleted(old.k)) {
+               if (old.k->p.snapshot != k->k.p.snapshot) {
+                       /*
+                        * We're overwriting a key in a different snapshot:
+                        * check if it's also been overwritten in siblings
+                        */
+                       ret = delete_interior_snapshot_key(trans, iter->btree_id,
+                                                          k->k.p,   bkey_deleted(&k->k),
+                                                          old.k->p, bkey_whiteout(old.k));
+                       if (ret)
+                               return ret;
+                       if (bkey_deleted(&k->k))
+                               return 1;
+               } else if (bkey_deleted(&k->k)) {
+                       /*
+                        * We're deleting a key in the current snapshot:
+                        * check if we need to leave a whiteout
+                        */
+                       ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
+                       if (unlikely(ret < 0))
+                               return ret;
+                       if (ret)
+                               k->k.type = KEY_TYPE_whiteout;
+               }
+       }
+
+       return 0;
+}
+
 int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
                                   enum btree_id id,
                                   struct bpos old_pos,
@@ -503,32 +632,29 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans,
 int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
                                   struct bkey_i *k, enum btree_update_flags flags)
 {
-       btree_path_idx_t path_idx = iter->update_path ?: iter->path;
-       int ret;
-
        if (iter->flags & BTREE_ITER_IS_EXTENTS)
                return bch2_trans_update_extent(trans, iter, k, flags);
 
-       if (bkey_deleted(&k->k) &&
-           !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
-           (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
-               ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p);
-               if (unlikely(ret < 0))
-                       return ret;
-
+       if (!(flags & (BTREE_UPDATE_SNAPSHOT_WHITEOUT_CHECKS_DONE|
+                      BTREE_UPDATE_KEY_CACHE_RECLAIM)) &&
+           (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
+           bch2_snapshot_parent(trans->c, k->k.p.snapshot)) {
+               int ret = overwrite_interior_snapshot_key(trans, iter, k);
                if (ret)
-                       k->k.type = KEY_TYPE_whiteout;
+                       return ret < 0 ? ret : 0;
        }
 
        /*
         * Ensure that updates to cached btrees go to the key cache:
         */
+       btree_path_idx_t path_idx = iter->update_path ?: iter->path;
        struct btree_path *path = trans->paths + path_idx;
+
        if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) &&
            !path->cached &&
            !path->level &&
            btree_id_cached(trans->c, path->btree_id)) {
-               ret = bch2_trans_update_get_key_cache(trans, iter, path);
+               int ret = bch2_trans_update_get_key_cache(trans, iter, path);
                if (ret)
                        return ret;
 
@@ -789,6 +915,27 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
 
 int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
                       struct bpos pos, bool set)
+{
+       struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
+       int ret = PTR_ERR_OR_ZERO(k);
+       if (ret)
+               return ret;
+
+       bkey_init(&k->k);
+       k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
+       k->k.p = pos;
+
+       struct btree_iter iter;
+       bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
+
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, k, 0);
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
+                               struct bpos pos, bool set)
 {
        struct bkey_i k;
 
index b9382b7b288b6a6189d191886511a3ee57187634..cc7c53e83f89dd5cdc8ccccab214ae3cbfd3fb72 100644 (file)
@@ -63,11 +63,12 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
                            struct bpos, struct bpos, unsigned, u64 *);
 
 int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
+int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool);
 
 static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
                                                enum btree_id btree, struct bpos pos)
 {
-       return bch2_btree_bit_mod(trans, btree, pos, false);
+       return bch2_btree_bit_mod_buffered(trans, btree, pos, false);
 }
 
 int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
index 030291cc8f97dd57a0f9c9f3c9aaf9b94ac57213..5fbea33f6747432ef9c0624f82d766c114a573d3 100644 (file)
@@ -1844,8 +1844,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
                        __func__, buf1.buf, buf2.buf);
                printbuf_exit(&buf1);
                printbuf_exit(&buf2);
-               bch2_topology_error(c);
-               ret = -EIO;
+               ret = bch2_topology_error(c);
                goto err;
        }
 
index 7dca10ba70d253fe1e0619e738ea7826d1ea1ca1..c2f46b267b3ad50c796690320f0a700411940931 100644 (file)
@@ -1053,7 +1053,8 @@ int bch2_trigger_extent(struct btree_trans *trans,
                          (int) bch2_bkey_needs_rebalance(c, old);
 
                if (mod) {
-                       int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0);
+                       int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
+                                                             new.k->p, mod > 0);
                        if (ret)
                                return ret;
                }
index 97773cffccae8da7fd67c58c144463af479fc48d..b5ee11b50f5c0945a58ef74dcdd07aa9fe98225c 100644 (file)
@@ -201,17 +201,17 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
 }
 
 int bch2_dirent_create_snapshot(struct btree_trans *trans,
-                       u64 dir, u32 snapshot,
+                       u32 dir_subvol, u64 dir, u32 snapshot,
                        const struct bch_hash_info *hash_info,
                        u8 type, const struct qstr *name, u64 dst_inum,
                        u64 *dir_offset,
                        bch_str_hash_flags_t str_hash_flags)
 {
-       subvol_inum zero_inum = { 0 };
+       subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir };
        struct bkey_i_dirent *dirent;
        int ret;
 
-       dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum);
+       dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum);
        ret = PTR_ERR_OR_ZERO(dirent);
        if (ret)
                return ret;
@@ -220,7 +220,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
        dirent->k.p.snapshot    = snapshot;
 
        ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
-                                       zero_inum, snapshot,
+                                       dir_inum, snapshot,
                                        &dirent->k_i, str_hash_flags,
                                        BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
        *dir_offset = dirent->k.p.offset;
@@ -522,7 +522,7 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
                           SPOS(dir, 0, snapshot),
                           POS(dir, U64_MAX), 0, k, ret)
                if (k.k->type == KEY_TYPE_dirent) {
-                       ret = -ENOTEMPTY;
+                       ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
                        break;
                }
        bch2_trans_iter_exit(trans, &iter);
index f1dd7208a58e05e8acf481e00ab5bc93731d2f74..34cb8e18eaf8976ff69972c49d9e2eea72cfa21c 100644 (file)
@@ -35,7 +35,7 @@ static inline unsigned dirent_val_u64s(unsigned len)
 int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
                            struct bkey_s_c_dirent, subvol_inum *);
 
-int bch2_dirent_create_snapshot(struct btree_trans *, u64, u32,
+int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
                        const struct bch_hash_info *, u8,
                        const struct qstr *, u64, u64 *,
                        bch_str_hash_flags_t);
index 3fd33b307a77f943bea59882de951995d27987c5..fe3fc14d3c9a19452b7a4ad54f589156d523258b 100644 (file)
        x(ENOENT,                       ENOENT_dirent_doesnt_match_inode)       \
        x(ENOENT,                       ENOENT_dev_not_found)                   \
        x(ENOENT,                       ENOENT_dev_idx_not_found)               \
+       x(ENOTEMPTY,                    ENOTEMPTY_dir_not_empty)                \
+       x(ENOTEMPTY,                    ENOTEMPTY_subvol_not_empty)             \
        x(0,                            open_buckets_empty)                     \
        x(0,                            freelist_empty)                         \
        x(BCH_ERR_freelist_empty,       no_buckets_found)                       \
        x(EINVAL,                       opt_parse_error)                        \
        x(EINVAL,                       remove_with_metadata_missing_unimplemented)\
        x(EINVAL,                       remove_would_lose_data)                 \
+       x(EINVAL,                       btree_iter_with_journal_not_supported)  \
        x(EROFS,                        erofs_trans_commit)                     \
        x(EROFS,                        erofs_no_writes)                        \
        x(EROFS,                        erofs_journal_err)                      \
        x(BCH_ERR_operation_blocked,    nocow_lock_blocked)                     \
        x(EIO,                          btree_node_read_err)                    \
        x(EIO,                          sb_not_downgraded)                      \
-       x(EIO,                          btree_write_all_failed)                 \
+       x(EIO,                          btree_node_write_all_failed)            \
+       x(EIO,                          btree_node_read_error)                  \
+       x(EIO,                          btree_node_read_validate_error)         \
+       x(EIO,                          btree_need_topology_repair)             \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_fixable)            \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_want_retry)         \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_must_retry)         \
index 70a125395974076c9e7c7be95cfe9ab1c9980585..8ae95b218e8b0c9c1f8ba3a0ab1c2a58cad2b66d 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "error.h"
+#include "recovery.h"
 #include "super.h"
 #include <linux/thread_with_file.h>
 
@@ -25,11 +26,16 @@ bool bch2_inconsistent_error(struct bch_fs *c)
        }
 }
 
-void bch2_topology_error(struct bch_fs *c)
+int bch2_topology_error(struct bch_fs *c)
 {
        set_bit(BCH_FS_topology_error, &c->flags);
-       if (!test_bit(BCH_FS_fsck_running, &c->flags))
+       if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
                bch2_inconsistent_error(c);
+               return -BCH_ERR_btree_need_topology_repair;
+       } else {
+               return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
+                       -BCH_ERR_btree_node_read_validate_error;
+       }
 }
 
 void bch2_fatal_error(struct bch_fs *c)
index fec17d1353d18042ca77132106d2e2318de2be01..94491190e09e9d5085ca1ef87c5764c4192d6a24 100644 (file)
@@ -30,7 +30,7 @@ struct work_struct;
 
 bool bch2_inconsistent_error(struct bch_fs *);
 
-void bch2_topology_error(struct bch_fs *);
+int bch2_topology_error(struct bch_fs *);
 
 #define bch2_fs_inconsistent(c, ...)                                   \
 ({                                                                     \
index 523507e38887bf9fd4aaacf6ece326d04e6edd16..3d43c036c5e0f85eb6b016250e6a774b6834c34f 100644 (file)
@@ -107,6 +107,7 @@ int bch2_create_trans(struct btree_trans *trans,
                u32 new_subvol, dir_snapshot;
 
                ret = bch2_subvolume_create(trans, new_inode->bi_inum,
+                                           dir.subvol,
                                            snapshot_src.subvol,
                                            &new_subvol, &snapshot,
                                            (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
@@ -242,7 +243,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
                      struct bch_inode_unpacked *dir_u,
                      struct bch_inode_unpacked *inode_u,
                      const struct qstr *name,
-                     bool deleting_snapshot)
+                     bool deleting_subvol)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter dir_iter = { NULL };
@@ -270,18 +271,25 @@ int bch2_unlink_trans(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) {
+       if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
                ret = bch2_empty_dir_trans(trans, inum);
                if (ret)
                        goto err;
        }
 
-       if (deleting_snapshot && !inode_u->bi_subvol) {
+       if (deleting_subvol && !inode_u->bi_subvol) {
                ret = -BCH_ERR_ENOENT_not_subvol;
                goto err;
        }
 
-       if (deleting_snapshot || inode_u->bi_subvol) {
+       if (inode_u->bi_subvol) {
+               /* Recursive subvolume destroy not allowed (yet?) */
+               ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
+               if (ret)
+                       goto err;
+       }
+
+       if (deleting_subvol || inode_u->bi_subvol) {
                ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
                if (ret)
                        goto err;
index 77ea61090e913555624c2a093b5d851475887797..4445fa2f53d02b341add2899e7df044150abcee8 100644 (file)
@@ -503,7 +503,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
                bch2_subvol_is_ro(c, inode->ei_subvol) ?:
                __bch2_link(c, inode, dir, dentry);
        if (unlikely(ret))
-               return ret;
+               return bch2_err_class(ret);
 
        ihold(&inode->v);
        d_instantiate(dentry, &inode->v);
@@ -555,8 +555,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
        struct bch_inode_info *dir= to_bch_ei(vdir);
        struct bch_fs *c = dir->v.i_sb->s_fs_info;
 
-       return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
+       int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
                __bch2_unlink(vdir, dentry, false);
+       return bch2_err_class(ret);
 }
 
 static int bch2_symlink(struct mnt_idmap *idmap,
@@ -591,7 +592,7 @@ static int bch2_symlink(struct mnt_idmap *idmap,
        return 0;
 err:
        iput(&inode->v);
-       return ret;
+       return bch2_err_class(ret);
 }
 
 static int bch2_mkdir(struct mnt_idmap *idmap,
index e4a8a14c46bc922983e91edcdc9ece6fe717d3eb..dfd54708d2a0212f4c7263654c849057886d2d2a 100644 (file)
@@ -252,7 +252,7 @@ create_lostfound:
                goto err;
 
        ret =   bch2_dirent_create_snapshot(trans,
-                               root_inode.bi_inum, snapshot, &root_hash_info,
+                               0, root_inode.bi_inum, snapshot, &root_hash_info,
                                mode_to_type(lostfound->bi_mode),
                                &lostfound_str,
                                lostfound->bi_inum,
@@ -275,9 +275,24 @@ static int reattach_inode(struct btree_trans *trans,
        char name_buf[20];
        struct qstr name;
        u64 dir_offset = 0;
+       u32 dirent_snapshot = inode_snapshot;
        int ret;
 
-       ret = lookup_lostfound(trans, inode_snapshot, &lostfound);
+       if (inode->bi_subvol) {
+               inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
+
+               u64 root_inum;
+               ret = subvol_lookup(trans, inode->bi_parent_subvol,
+                                   &dirent_snapshot, &root_inum);
+               if (ret)
+                       return ret;
+
+               snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol);
+       } else {
+               snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
+       }
+
+       ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
        if (ret)
                return ret;
 
@@ -291,14 +306,16 @@ static int reattach_inode(struct btree_trans *trans,
 
        dir_hash = bch2_hash_info_init(trans->c, &lostfound);
 
-       snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
        name = (struct qstr) QSTR(name_buf);
 
        ret = bch2_dirent_create_snapshot(trans,
-                               lostfound.bi_inum, inode_snapshot,
+                               inode->bi_parent_subvol, lostfound.bi_inum,
+                               dirent_snapshot,
                                &dir_hash,
                                inode_d_type(inode),
-                               &name, inode->bi_inum, &dir_offset,
+                               &name,
+                               inode->bi_subvol ?: inode->bi_inum,
+                               &dir_offset,
                                BCH_HASH_SET_MUST_CREATE);
        if (ret)
                return ret;
@@ -564,13 +581,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
 }
 
 static struct inode_walker_entry *
-lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w,
-                         u32 snapshot, bool is_whiteout)
+lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
 {
-       struct inode_walker_entry *i;
-
-       snapshot = bch2_snapshot_equiv(c, snapshot);
+       bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
+       u32 snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
 
+       struct inode_walker_entry *i;
        __darray_for_each(w->inodes, i)
                if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
                        goto found;
@@ -581,20 +597,24 @@ found:
 
        if (snapshot != i->snapshot && !is_whiteout) {
                struct inode_walker_entry new = *i;
-               size_t pos;
-               int ret;
 
                new.snapshot = snapshot;
                new.count = 0;
 
-               bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
-                        w->last_pos.inode, snapshot, i->snapshot);
+               struct printbuf buf = PRINTBUF;
+               bch2_bkey_val_to_text(&buf, c, k);
+
+               bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
+                        "unexpected because we should always update the inode when we update a key in that inode\n"
+                        "%s",
+                        w->last_pos.inode, snapshot, i->snapshot, buf.buf);
+               printbuf_exit(&buf);
 
                while (i > w->inodes.data && i[-1].snapshot > snapshot)
                        --i;
 
-               pos = i - w->inodes.data;
-               ret = darray_insert_item(&w->inodes, pos, new);
+               size_t pos = i - w->inodes.data;
+               int ret = darray_insert_item(&w->inodes, pos, new);
                if (ret)
                        return ERR_PTR(ret);
 
@@ -605,21 +625,21 @@ found:
 }
 
 static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
-                                            struct inode_walker *w, struct bpos pos,
-                                            bool is_whiteout)
+                                            struct inode_walker *w,
+                                            struct bkey_s_c k)
 {
-       if (w->last_pos.inode != pos.inode) {
-               int ret = get_inodes_all_snapshots(trans, w, pos.inode);
+       if (w->last_pos.inode != k.k->p.inode) {
+               int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
                if (ret)
                        return ERR_PTR(ret);
-       } else if (bkey_cmp(w->last_pos, pos)) {
+       } else if (bkey_cmp(w->last_pos, k.k->p)) {
                darray_for_each(w->inodes, i)
                        i->seen_this_pos = false;
        }
 
-       w->last_pos = pos;
+       w->last_pos = k.k->p;
 
-       return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout);
+       return lookup_inode_for_snapshot(trans->c, w, k);
 }
 
 static int __get_visible_inodes(struct btree_trans *trans,
@@ -767,6 +787,43 @@ fsck_err:
        goto out;
 }
 
+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
+                                               struct btree_iter *iter,
+                                               struct bpos pos)
+{
+       return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
+}
+
+static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
+                                              struct btree_iter *iter,
+                                              struct bch_inode_unpacked *inode,
+                                              u32 *snapshot)
+{
+       if (inode->bi_subvol) {
+               u64 inum;
+               int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum);
+               if (ret)
+                       return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) });
+       }
+
+       return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
+}
+
+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
+                                  struct bkey_s_c_dirent d)
+{
+       return  inode->bi_dir           == d.k->p.inode &&
+               inode->bi_dir_offset    == d.k->p.offset;
+}
+
+static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
+                                  struct bch_inode_unpacked *inode)
+{
+       return d.v->d_type == DT_SUBVOL
+               ? le32_to_cpu(d.v->d_child_subvol)      == inode->bi_subvol
+               : le64_to_cpu(d.v->d_inum)              == inode->bi_inum;
+}
+
 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
 {
        struct btree_iter iter;
@@ -779,6 +836,49 @@ static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
        return k.k->type == KEY_TYPE_set;
 }
 
+static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k,
+                                   struct bch_inode_unpacked *inode,
+                                   u32 inode_snapshot, bool *write_inode)
+{
+       struct bch_fs *c = trans->c;
+       struct printbuf buf = PRINTBUF;
+
+       struct btree_iter dirent_iter = {};
+       struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
+       int ret = bkey_err(d);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(ret,
+                       c, inode_points_to_missing_dirent,
+                       "inode points to missing dirent\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) ||
+           fsck_err_on(!ret && !dirent_points_to_inode(d, inode),
+                       c, inode_points_to_wrong_dirent,
+                       "inode points to dirent that does not point back:\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, inode_k),
+                        prt_newline(&buf),
+                        bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+               /*
+                * We just clear the backpointer fields for now. If we find a
+                * dirent that points to this inode in check_dirents(), we'll
+                * update it then; then when we get to check_path() if the
+                * backpointer is still 0 we'll reattach it.
+                */
+               inode->bi_dir = 0;
+               inode->bi_dir_offset = 0;
+               inode->bi_flags &= ~BCH_INODE_backptr_untrusted;
+               *write_inode = true;
+       }
+
+       ret = 0;
+fsck_err:
+       bch2_trans_iter_exit(trans, &dirent_iter);
+       printbuf_exit(&buf);
+       bch_err_fn(c, ret);
+       return ret;
+}
+
 static int check_inode(struct btree_trans *trans,
                       struct btree_iter *iter,
                       struct bkey_s_c k,
@@ -923,6 +1023,22 @@ static int check_inode(struct btree_trans *trans,
                do_update = true;
        }
 
+       if (u.bi_dir || u.bi_dir_offset) {
+               ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update);
+               if (ret)
+                       goto err;
+       }
+
+       if (fsck_err_on(u.bi_parent_subvol &&
+                       (u.bi_subvol == 0 ||
+                        u.bi_subvol == BCACHEFS_ROOT_SUBVOL),
+                       c, inode_bi_parent_nonzero,
+                       "inode %llu:%u has subvol %u but nonzero parent subvol %u",
+                       u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) {
+               u.bi_parent_subvol = 0;
+               do_update = true;
+       }
+
        if (u.bi_subvol) {
                struct bch_subvolume s;
 
@@ -980,28 +1096,6 @@ int bch2_check_inodes(struct bch_fs *c)
        return ret;
 }
 
-static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
-                                               struct btree_iter *iter,
-                                               struct bpos pos)
-{
-       return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
-}
-
-static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
-                                  struct bkey_s_c_dirent d)
-{
-       return  inode->bi_dir           == d.k->p.inode &&
-               inode->bi_dir_offset    == d.k->p.offset;
-}
-
-static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
-                                  struct bch_inode_unpacked *inode)
-{
-       return d.v->d_type == DT_SUBVOL
-               ? le32_to_cpu(d.v->d_child_subvol)      == inode->bi_subvol
-               : le64_to_cpu(d.v->d_inum)              == inode->bi_inum;
-}
-
 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
 {
        struct bch_fs *c = trans->c;
@@ -1310,7 +1404,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        goto err;
        }
 
-       i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, inode, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret)
                goto err;
@@ -1489,84 +1583,82 @@ fsck_err:
        return ret ?: trans_was_restarted(trans, restart_count);
 }
 
-static int check_inode_backpointer(struct btree_trans *trans,
+static int check_dirent_inode_dirent(struct btree_trans *trans,
                                   struct btree_iter *iter,
                                   struct bkey_s_c_dirent d,
                                   struct bch_inode_unpacked *target,
                                   u32 target_snapshot)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter bp_iter = { NULL };
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
+       if (inode_points_to_dirent(target, d))
+               return 0;
+
        if (!target->bi_dir &&
            !target->bi_dir_offset) {
                target->bi_dir          = d.k->p.inode;
                target->bi_dir_offset   = d.k->p.offset;
-
-               ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
-               if (ret)
-                       goto err;
+               return __bch2_fsck_write_inode(trans, target, target_snapshot);
        }
 
-       if (!inode_points_to_dirent(target, d)) {
-               struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
-                                     SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
-               ret = bkey_err(bp_dirent);
-               if (ret && !bch2_err_matches(ret, ENOENT))
-                       goto err;
-
-               bool backpointer_exists = !ret;
-               ret = 0;
-
-               bch2_bkey_val_to_text(&buf, c, d.s_c);
-               prt_newline(&buf);
-               if (backpointer_exists)
-                       bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+       struct btree_iter bp_iter = { NULL };
+       struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
+                             SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
+       ret = bkey_err(bp_dirent);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               goto err;
 
-               if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
-                               c, inode_dir_multiple_links,
-                               "directory %llu:%u with multiple links\n%s",
-                               target->bi_inum, target_snapshot, buf.buf)) {
-                       ret = __remove_dirent(trans, d.k->p);
-                       goto out;
-               }
+       bool backpointer_exists = !ret;
+       ret = 0;
+
+       if (fsck_err_on(!backpointer_exists,
+                       c, inode_wrong_backpointer,
+                       "inode %llu:%u has wrong backpointer:\n"
+                       "got       %llu:%llu\n"
+                       "should be %llu:%llu",
+                       target->bi_inum, target_snapshot,
+                       target->bi_dir,
+                       target->bi_dir_offset,
+                       d.k->p.inode,
+                       d.k->p.offset)) {
+               target->bi_dir          = d.k->p.inode;
+               target->bi_dir_offset   = d.k->p.offset;
+               ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+               goto out;
+       }
 
-               /*
-                * hardlinked file with nlink 0:
-                * We're just adjusting nlink here so check_nlinks() will pick
-                * it up, it ignores inodes with nlink 0
-                */
-               if (fsck_err_on(backpointer_exists && !target->bi_nlink,
-                               c, inode_multiple_links_but_nlink_0,
-                               "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
-                               target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
-                       target->bi_nlink++;
-                       target->bi_flags &= ~BCH_INODE_unlinked;
-
-                       ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
-                       if (ret)
-                               goto err;
-               }
+       bch2_bkey_val_to_text(&buf, c, d.s_c);
+       prt_newline(&buf);
+       if (backpointer_exists)
+               bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+
+       if (fsck_err_on(backpointer_exists &&
+                       (S_ISDIR(target->bi_mode) ||
+                        target->bi_subvol),
+                       c, inode_dir_multiple_links,
+                       "%s %llu:%u with multiple links\n%s",
+                       S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
+                       target->bi_inum, target_snapshot, buf.buf)) {
+               ret = __remove_dirent(trans, d.k->p);
+               goto out;
+       }
 
-               if (fsck_err_on(!backpointer_exists,
-                               c, inode_wrong_backpointer,
-                               "inode %llu:%u has wrong backpointer:\n"
-                               "got       %llu:%llu\n"
-                               "should be %llu:%llu",
-                               target->bi_inum, target_snapshot,
-                               target->bi_dir,
-                               target->bi_dir_offset,
-                               d.k->p.inode,
-                               d.k->p.offset)) {
-                       target->bi_dir          = d.k->p.inode;
-                       target->bi_dir_offset   = d.k->p.offset;
-
-                       ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
-                       if (ret)
-                               goto err;
-               }
+       /*
+        * hardlinked file with nlink 0:
+        * We're just adjusting nlink here so check_nlinks() will pick
+        * it up, it ignores inodes with nlink 0
+        */
+       if (fsck_err_on(backpointer_exists && !target->bi_nlink,
+                       c, inode_multiple_links_but_nlink_0,
+                       "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
+                       target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
+               target->bi_nlink++;
+               target->bi_flags &= ~BCH_INODE_unlinked;
+               ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+               if (ret)
+                       goto err;
        }
 out:
 err:
@@ -1588,7 +1680,7 @@ static int check_dirent_target(struct btree_trans *trans,
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       ret = check_inode_backpointer(trans, iter, d, target, target_snapshot);
+       ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot);
        if (ret)
                goto err;
 
@@ -1606,27 +1698,12 @@ static int check_dirent_target(struct btree_trans *trans,
 
                bkey_reassemble(&n->k_i, d.s_c);
                n->v.d_type = inode_d_type(target);
-
-               ret = bch2_trans_update(trans, iter, &n->k_i, 0);
-               if (ret)
-                       goto err;
-
-               d = dirent_i_to_s_c(n);
-       }
-
-       if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
-                       target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
-                       c, dirent_d_parent_subvol_wrong,
-                       "dirent has wrong d_parent_subvol field: got %u, should be %u",
-                       le32_to_cpu(d.v->d_parent_subvol),
-                       target->bi_parent_subvol)) {
-               n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
-               ret = PTR_ERR_OR_ZERO(n);
-               if (ret)
-                       goto err;
-
-               bkey_reassemble(&n->k_i, d.s_c);
-               n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
+               if (n->v.d_type == DT_SUBVOL) {
+                       n->v.d_parent_subvol = target->bi_parent_subvol;
+                       n->v.d_child_subvol = target->bi_subvol;
+               } else {
+                       n->v.d_inum = target->bi_inum;
+               }
 
                ret = bch2_trans_update(trans, iter, &n->k_i, 0);
                if (ret)
@@ -1641,45 +1718,113 @@ fsck_err:
        return ret;
 }
 
-static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *iter,
-                              struct bkey_s_c_dirent d)
+/* find a subvolume that's a descendent of @snapshot: */
+static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) {
+               if (k.k->type != KEY_TYPE_subvolume)
+                       continue;
+
+               struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+               if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) {
+                       bch2_trans_iter_exit(trans, &iter);
+                       *subvolid = k.k->p.offset;
+                       goto found;
+               }
+       }
+       if (!ret)
+               ret = -ENOENT;
+found:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter,
+                                 struct bkey_s_c_dirent d)
 {
        struct bch_fs *c = trans->c;
+       struct btree_iter subvol_iter = {};
        struct bch_inode_unpacked subvol_root;
+       u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
        u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
-       u32 target_snapshot;
-       u64 target_inum;
+       u32 parent_snapshot;
+       u64 parent_inum;
+       struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       ret = subvol_lookup(trans, target_subvol,
-                             &target_snapshot, &target_inum);
+       ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum);
        if (ret && !bch2_err_matches(ret, ENOENT))
                return ret;
 
-       if (fsck_err_on(ret, c, dirent_to_missing_subvol,
-                       "dirent points to missing subvolume %u",
-                       le32_to_cpu(d.v->d_child_subvol)))
-               return __remove_dirent(trans, d.k->p);
+       if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
+                       "dirent parent_subvol points to missing subvolume\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
+           fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot),
+                       c, dirent_not_visible_in_parent_subvol,
+                       "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
+                       parent_snapshot,
+                       (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+               u32 new_parent_subvol;
+               ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
+               if (ret)
+                       goto err;
 
-       ret = lookup_inode(trans, target_inum,
-                          &subvol_root, &target_snapshot);
+               struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
+               ret = PTR_ERR_OR_ZERO(new_dirent);
+               if (ret)
+                       goto err;
+
+               new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol);
+       }
+
+       struct bkey_s_c_subvolume s =
+               bch2_bkey_get_iter_typed(trans, &subvol_iter,
+                                        BTREE_ID_subvolumes, POS(0, target_subvol),
+                                        0, subvolume);
+       ret = bkey_err(s.s_c);
        if (ret && !bch2_err_matches(ret, ENOENT))
                return ret;
 
-       if (fsck_err_on(ret, c, subvol_to_missing_root,
-                       "subvolume %u points to missing subvolume root %llu",
-                       target_subvol,
-                       target_inum)) {
-               bch_err(c, "repair not implemented yet");
-               return -EINVAL;
+       if (ret) {
+               if (fsck_err(c, dirent_to_missing_subvol,
+                            "dirent points to missing subvolume\n%s",
+                            (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
+                       return __remove_dirent(trans, d.k->p);
+               ret = 0;
+               goto out;
        }
 
-       if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
-                       c, subvol_root_wrong_bi_subvol,
-                       "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
+       if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol,
+                       c, subvol_fs_path_parent_wrong,
+                       "subvol with wrong fs_path_parent, should be be %u\n%s",
+                       parent_subvol,
+                       (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
+               struct bkey_i_subvolume *n =
+                       bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
+               ret = PTR_ERR_OR_ZERO(n);
+               if (ret)
+                       goto err;
+
+               n->v.fs_path_parent = le32_to_cpu(parent_subvol);
+       }
+
+       u64 target_inum = le64_to_cpu(s.v->inode);
+       u32 target_snapshot = le32_to_cpu(s.v->snapshot);
+
+       ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
+                       c, inode_bi_parent_wrong,
+                       "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
                        target_inum,
-                       subvol_root.bi_subvol, target_subvol)) {
-               subvol_root.bi_subvol = target_subvol;
+                       subvol_root.bi_parent_subvol, parent_subvol)) {
+               subvol_root.bi_parent_subvol = parent_subvol;
                ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
                if (ret)
                        return ret;
@@ -1689,7 +1834,11 @@ static int check_subvol_dirent(struct btree_trans *trans, struct btree_iter *ite
                                  target_snapshot);
        if (ret)
                return ret;
+out:
+err:
 fsck_err:
+       bch2_trans_iter_exit(trans, &subvol_iter);
+       printbuf_exit(&buf);
        return ret;
 }
 
@@ -1731,7 +1880,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 
        BUG_ON(!btree_iter_path(trans, iter)->should_be_locked);
 
-       i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, dir, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret < 0)
                goto err;
@@ -1777,7 +1926,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
        d = bkey_s_c_to_dirent(k);
 
        if (d.v->d_type == DT_SUBVOL) {
-               ret = check_subvol_dirent(trans, iter, d);
+               ret = check_dirent_to_subvol(trans, iter, d);
                if (ret)
                        goto err;
        } else {
@@ -1858,7 +2007,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
        if (ret)
                return ret;
 
-       i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, inode, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret)
                return ret;
@@ -1997,62 +2146,52 @@ static int path_down(struct bch_fs *c, pathbuf *p,
  *
  * XXX: we should also be verifying that inodes are in the right subvolumes
  */
-static int check_path(struct btree_trans *trans,
-                     pathbuf *p,
-                     struct bch_inode_unpacked *inode,
-                     u32 snapshot)
+static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
 {
        struct bch_fs *c = trans->c;
+       struct btree_iter inode_iter = {};
+       struct bch_inode_unpacked inode;
+       struct printbuf buf = PRINTBUF;
+       u32 snapshot = bch2_snapshot_equiv(c, inode_k.k->p.snapshot);
        int ret = 0;
 
-       snapshot = bch2_snapshot_equiv(c, snapshot);
        p->nr = 0;
 
-       while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
-                inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
+       BUG_ON(bch2_inode_unpack(inode_k, &inode));
+
+       while (!(inode.bi_inum == BCACHEFS_ROOT_INO &&
+                inode.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
                struct btree_iter dirent_iter;
                struct bkey_s_c_dirent d;
                u32 parent_snapshot = snapshot;
 
-               if (inode->bi_subvol) {
-                       u64 inum;
-
-                       ret = subvol_lookup(trans, inode->bi_parent_subvol,
-                                           &parent_snapshot, &inum);
-                       if (ret)
-                               break;
-               }
-
-               d = dirent_get_by_pos(trans, &dirent_iter,
-                                     SPOS(inode->bi_dir, inode->bi_dir_offset,
-                                          parent_snapshot));
+               d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
                ret = bkey_err(d.s_c);
                if (ret && !bch2_err_matches(ret, ENOENT))
                        break;
 
-               if (!ret && !dirent_points_to_inode(d, inode)) {
+               if (!ret && !dirent_points_to_inode(d, &inode)) {
                        bch2_trans_iter_exit(trans, &dirent_iter);
                        ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
                }
 
                if (bch2_err_matches(ret, ENOENT)) {
-                       if (fsck_err(c,  inode_unreachable,
-                                    "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
-                                    inode->bi_inum, snapshot,
-                                    bch2_d_type_str(inode_d_type(inode)),
-                                    inode->bi_nlink,
-                                    inode->bi_dir,
-                                    inode->bi_dir_offset))
-                               ret = reattach_inode(trans, inode, snapshot);
-                       break;
+                       ret = 0;
+                       if (fsck_err(c, inode_unreachable,
+                                    "unreachable inode\n%s",
+                                    (printbuf_reset(&buf),
+                                     bch2_bkey_val_to_text(&buf, c, inode_k),
+                                     buf.buf)))
+                               ret = reattach_inode(trans, &inode, snapshot);
+                       goto out;
                }
 
                bch2_trans_iter_exit(trans, &dirent_iter);
 
-               if (!S_ISDIR(inode->bi_mode))
+               if (!S_ISDIR(inode.bi_mode))
                        break;
 
-               ret = path_down(c, p, inode->bi_inum, snapshot);
+               ret = path_down(c, p, inode.bi_inum, snapshot);
                if (ret) {
                        bch_err(c, "memory allocation failure");
                        return ret;
@@ -2060,7 +2199,12 @@ static int check_path(struct btree_trans *trans,
 
                snapshot = parent_snapshot;
 
-               ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
+               bch2_trans_iter_exit(trans, &inode_iter);
+               inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
+                                            SPOS(0, inode.bi_dir, snapshot), 0);
+               ret = bkey_err(inode_k) ?:
+                       !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
+                       : bch2_inode_unpack(inode_k, &inode);
                if (ret) {
                        /* Should have been caught in dirents pass */
                        if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -2068,30 +2212,35 @@ static int check_path(struct btree_trans *trans,
                        break;
                }
 
-               if (path_is_dup(p, inode->bi_inum, snapshot)) {
+               snapshot = inode_k.k->p.snapshot;
+
+               if (path_is_dup(p, inode.bi_inum, snapshot)) {
                        /* XXX print path */
                        bch_err(c, "directory structure loop");
 
                        darray_for_each(*p, i)
                                pr_err("%llu:%u", i->inum, i->snapshot);
-                       pr_err("%llu:%u", inode->bi_inum, snapshot);
+                       pr_err("%llu:%u", inode.bi_inum, snapshot);
 
                        if (!fsck_err(c, dir_loop, "directory structure loop"))
                                return 0;
 
-                       ret = remove_backpointer(trans, inode);
+                       ret = remove_backpointer(trans, &inode);
                        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                bch_err_msg(c, ret, "removing dirent");
                        if (ret)
                                break;
 
-                       ret = reattach_inode(trans, inode, snapshot);
+                       ret = reattach_inode(trans, &inode, snapshot);
                        if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-                               bch_err_msg(c, ret, "reattaching inode %llu", inode->bi_inum);
+                               bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
                        break;
                }
        }
+out:
 fsck_err:
+       bch2_trans_iter_exit(trans, &inode_iter);
+       printbuf_exit(&buf);
        bch_err_fn(c, ret);
        return ret;
 }
@@ -2103,7 +2252,6 @@ fsck_err:
  */
 int bch2_check_directory_structure(struct bch_fs *c)
 {
-       struct bch_inode_unpacked u;
        pathbuf path = { 0, };
        int ret;
 
@@ -2116,12 +2264,10 @@ int bch2_check_directory_structure(struct bch_fs *c)
                        if (!bkey_is_inode(k.k))
                                continue;
 
-                       BUG_ON(bch2_inode_unpack(k, &u));
-
-                       if (u.bi_flags & BCH_INODE_unlinked)
+                       if (bch2_inode_flags(k) & BCH_INODE_unlinked)
                                continue;
 
-                       check_path(trans, &path, &u, iter.pos.snapshot);
+                       check_path(trans, &path, k);
                })));
        darray_exit(&path);
 
index dbe37ccc751958d351d622bcb145b56150fb9629..414aebe17fd335247c274ce8c640d857eb5f2566 100644 (file)
@@ -620,7 +620,8 @@ int bch2_trigger_inode(struct btree_trans *trans,
                bool old_deleted = bkey_is_deleted_inode(old);
                bool new_deleted = bkey_is_deleted_inode(new.s_c);
                if (old_deleted != new_deleted) {
-                       int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted);
+                       int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes,
+                                                             new.k->p, new_deleted);
                        if (ret)
                                return ret;
                }
@@ -1169,7 +1170,7 @@ fsck_err:
        bch2_trans_iter_exit(trans, &inode_iter);
        return ret;
 delete:
-       ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
+       ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false);
        goto out;
 }
 
index 9a9353c001c2a5fa62e80dc1e2b2705cc8534ab5..056298050550f9ecf4ce1e000cb32ce501f5bc62 100644 (file)
@@ -177,6 +177,20 @@ static inline u8 inode_d_type(struct bch_inode_unpacked *inode)
        return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
 }
 
+static inline u32 bch2_inode_flags(struct bkey_s_c k)
+{
+       switch (k.k->type) {
+       case KEY_TYPE_inode:
+               return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
+       case KEY_TYPE_inode_v2:
+               return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
+       case KEY_TYPE_inode_v3:
+               return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
+       default:
+               return 0;
+       }
+}
+
 /* i_nlink: */
 
 static inline unsigned nlink_bias(umode_t mode)
index 7a4ca5a28b3eac83ead3d5e585e8886db5c456c9..ed7577cdb2124c946b54fef45b265d99126032e1 100644 (file)
@@ -44,8 +44,8 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
                          u64 dev_bucket, u64 time, bool set)
 {
        return time
-               ? bch2_btree_bit_mod(trans, BTREE_ID_lru,
-                                    lru_pos(lru_id, dev_bucket, time), set)
+               ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru,
+                                             lru_pos(lru_id, dev_bucket, time), set)
                : 0;
 }
 
index 9a4b7faa376503993f1c2da8f8d1e5963ef6ca5a..f8c2341e8d3d32aff0e1536efcc189dd1088e863 100644 (file)
@@ -332,6 +332,11 @@ enum fsck_err_opts {
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Run fsck on mount")                            \
+       x(fsck_memory_usage_percent,    u8,                             \
+         OPT_FS|OPT_MOUNT,                                             \
+         OPT_UINT(20, 70),                                             \
+         BCH2_NO_SB_OPT,               50,                             \
+         NULL,         "Maximum percentage of system ram fsck is allowed to pin")\
        x(fix_errors,                   u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
          OPT_FN(bch2_opt_fix_errors),                                  \
index 9127d0e3ca2f6a3fd44e076b42f01ee6f7736427..4f8782d65d2e0d1399e33868a2ea07cf03b64f20 100644 (file)
@@ -264,7 +264,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
                        bkey_copy(&r->key, (struct bkey_i *) entry->start);
                        r->error = 0;
                } else {
-                       r->error = -EIO;
+                       r->error = -BCH_ERR_btree_node_read_error;
                }
                r->alive = true;
                break;
index fa0c8efd2a1b42450535474079b791aa2e6e9938..f0fc1dbb7239296af6712bb0ecc6cc666cf9d36b 100644 (file)
@@ -34,6 +34,7 @@
        x(check_snapshot_trees,                 18, PASS_ONLINE|PASS_FSCK)      \
        x(check_snapshots,                      19, PASS_ONLINE|PASS_FSCK)      \
        x(check_subvols,                        20, PASS_ONLINE|PASS_FSCK)      \
+       x(check_subvol_children,                35, PASS_ONLINE|PASS_FSCK)      \
        x(delete_dead_snapshots,                21, PASS_ONLINE|PASS_FSCK)      \
        x(fs_upgrade_for_subvolumes,            22, 0)                          \
        x(resume_logged_ops,                    23, PASS_ALWAYS)                \
index 626eaaea5b01d7923a8fc79d5bda3c48876b3c92..3337419faeff3b0cba88f4baa3cbc2030fe8be06 100644 (file)
          BIT_ULL(BCH_RECOVERY_PASS_check_inodes),              \
          BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list)      \
        x(rebalance_work,                                       \
-         BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
+         BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))    \
+       x(subvolume_fs_parent,                                  \
+         BIT_ULL(BCH_RECOVERY_PASS_check_dirents),             \
+         BCH_FSCK_ERR_subvol_fs_path_parent_wrong)             \
+       x(btree_subvolume_children,                             \
+         BIT_ULL(BCH_RECOVERY_PASS_check_subvols),             \
+         BCH_FSCK_ERR_subvol_children_not_set)
 
 #define DOWNGRADE_TABLE()
 
index 63f18c7f30885e4a6f95b992e5a448b81f7fbdc4..1530bd35b94572c85314e031b42aca38395f8dcd 100644 (file)
        x(dirent_name_dot_or_dotdot,                            223)    \
        x(dirent_name_has_slash,                                224)    \
        x(dirent_d_type_wrong,                                  225)    \
-       x(dirent_d_parent_subvol_wrong,                         226)    \
+       x(inode_bi_parent_wrong,                                226)    \
        x(dirent_in_missing_dir_inode,                          227)    \
        x(dirent_in_non_dir_inode,                              228)    \
        x(dirent_to_missing_inode,                              229)    \
        x(reflink_p_front_pad_bad,                              245)    \
        x(journal_entry_dup_same_device,                        246)    \
        x(inode_bi_subvol_missing,                              247)    \
-       x(inode_bi_subvol_wrong,                                248)
+       x(inode_bi_subvol_wrong,                                248)    \
+       x(inode_points_to_missing_dirent,                       249)    \
+       x(inode_points_to_wrong_dirent,                         250)    \
+       x(inode_bi_parent_nonzero,                              251)    \
+       x(dirent_to_missing_parent_subvol,                      252)    \
+       x(dirent_not_visible_in_parent_subvol,                  253)    \
+       x(subvol_fs_path_parent_wrong,                          254)    \
+       x(subvol_root_fs_path_parent_nonzero,                   255)    \
+       x(subvol_children_not_set,                              256)    \
+       x(subvol_children_bad,                                  257)
 
 enum bch_sb_error_id {
 #define x(t, n) BCH_FSCK_ERR_##t = n,
index e7ee52c39990cc8dff2dafbd928a0da51f0f6d50..ce7aed12194238071f8fbf37aa111160ced286c9 100644 (file)
 
 static int bch2_subvolume_delete(struct btree_trans *, u32);
 
+static struct bpos subvolume_children_pos(struct bkey_s_c k)
+{
+       if (k.k->type != KEY_TYPE_subvolume)
+               return POS_MIN;
+
+       struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+       if (!s.v->fs_path_parent)
+               return POS_MIN;
+       return POS(le32_to_cpu(s.v->fs_path_parent), s.k->p.offset);
+}
+
 static int check_subvol(struct btree_trans *trans,
                        struct btree_iter *iter,
                        struct bkey_s_c k)
 {
        struct bch_fs *c = trans->c;
        struct bkey_s_c_subvolume subvol;
+       struct btree_iter subvol_children_iter = {};
        struct bch_snapshot snapshot;
+       struct printbuf buf = PRINTBUF;
        unsigned snapid;
        int ret = 0;
 
@@ -42,6 +55,42 @@ static int check_subvol(struct btree_trans *trans,
                return ret ?: -BCH_ERR_transaction_restart_nested;
        }
 
+       if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
+                       subvol.v->fs_path_parent,
+                       c, subvol_root_fs_path_parent_nonzero,
+                       "root subvolume has nonzero fs_path_parent\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+               struct bkey_i_subvolume *n =
+                       bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
+               ret = PTR_ERR_OR_ZERO(n);
+               if (ret)
+                       goto err;
+
+               n->v.fs_path_parent = 0;
+       }
+
+       if (subvol.v->fs_path_parent) {
+               struct bpos pos = subvolume_children_pos(k);
+
+               struct bkey_s_c subvol_children_k =
+                       bch2_bkey_get_iter(trans, &subvol_children_iter,
+                                          BTREE_ID_subvolume_children, pos, 0);
+               ret = bkey_err(subvol_children_k);
+               if (ret)
+                       goto err;
+
+               if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set,
+                               c, subvol_children_not_set,
+                               "subvolume not set in subvolume_children btree at %llu:%llu\n%s",
+                               pos.inode, pos.offset,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+                       ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, true);
+                       if (ret)
+                               goto err;
+               }
+       }
+
        struct bch_inode_unpacked inode;
        struct btree_iter inode_iter = {};
        ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
@@ -102,9 +151,10 @@ static int check_subvol(struct btree_trans *trans,
                        SET_BCH_SUBVOLUME_SNAP(&s->v, true);
                }
        }
-
 err:
 fsck_err:
+       bch2_trans_iter_exit(trans, &subvol_children_iter);
+       printbuf_exit(&buf);
        return ret;
 }
 
@@ -119,6 +169,42 @@ int bch2_check_subvols(struct bch_fs *c)
        return ret;
 }
 
+static int check_subvol_child(struct btree_trans *trans,
+                             struct btree_iter *child_iter,
+                             struct bkey_s_c child_k)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_subvolume s;
+       int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, child_k.k->p.offset),
+                                         0, subvolume, &s);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(ret ||
+                       le32_to_cpu(s.fs_path_parent) != child_k.k->p.inode,
+                       c, subvol_children_bad,
+                       "incorrect entry in subvolume_children btree %llu:%llu",
+                       child_k.k->p.inode, child_k.k->p.offset)) {
+               ret = bch2_btree_delete_at(trans, child_iter, 0);
+               if (ret)
+                       goto err;
+       }
+err:
+fsck_err:
+       return ret;
+}
+
+int bch2_check_subvol_children(struct bch_fs *c)
+{
+       int ret = bch2_trans_run(c,
+               for_each_btree_key_commit(trans, iter,
+                               BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_PREFETCH, k,
+                               NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+                       check_subvol_child(trans, &iter, k)));
+       bch_err_fn(c, ret);
+       return 0;
+}
+
 /* Subvolumes: */
 
 int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
@@ -143,8 +229,50 @@ void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
                   le64_to_cpu(s.v->inode),
                   le32_to_cpu(s.v->snapshot));
 
-       if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
-               prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
+       if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, creation_parent)) {
+               prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent));
+               prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent));
+       }
+}
+
+static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set)
+{
+       return !bpos_eq(pos, POS_MIN)
+               ? bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, set)
+               : 0;
+}
+
+int bch2_subvolume_trigger(struct btree_trans *trans,
+                          enum btree_id btree_id, unsigned level,
+                          struct bkey_s_c old, struct bkey_s new,
+                          unsigned flags)
+{
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               struct bpos children_pos_old = subvolume_children_pos(old);
+               struct bpos children_pos_new = subvolume_children_pos(new.s_c);
+
+               if (!bpos_eq(children_pos_old, children_pos_new)) {
+                       int ret = subvolume_children_mod(trans, children_pos_old, false) ?:
+                                 subvolume_children_mod(trans, children_pos_new, true);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
+{
+       struct btree_iter iter;
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0);
+       struct bkey_s_c k = bch2_btree_iter_peek(&iter);
+       bch2_trans_iter_exit(trans, &iter);
+
+       return bkey_err(k) ?: k.k && k.k->p.inode == subvol
+               ? -BCH_ERR_ENOTEMPTY_subvol_not_empty
+               : 0;
 }
 
 static __always_inline int
@@ -228,8 +356,8 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
        if (k.k->type != KEY_TYPE_subvolume)
                return 0;
 
-       if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
-           le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
+       if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, creation_parent) &&
+           le32_to_cpu(bkey_s_c_to_subvolume(k).v->creation_parent) != old_parent)
                return 0;
 
        s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
@@ -237,7 +365,7 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       s->v.parent = cpu_to_le32(new_parent);
+       s->v.creation_parent = cpu_to_le32(new_parent);
        return 0;
 }
 
@@ -260,7 +388,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
                                BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
                                NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
                        bch2_subvolume_reparent(trans, &iter, k,
-                                       subvolid_to_delete, le32_to_cpu(s.parent)));
+                                       subvolid_to_delete, le32_to_cpu(s.creation_parent)));
 }
 
 /*
@@ -391,6 +519,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
 }
 
 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
+                         u32 parent_subvolid,
                          u32 src_subvolid,
                          u32 *new_subvolid,
                          u32 *new_snapshotid,
@@ -447,12 +576,13 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
        if (ret)
                goto err;
 
-       new_subvol->v.flags     = 0;
-       new_subvol->v.snapshot  = cpu_to_le32(new_nodes[0]);
-       new_subvol->v.inode     = cpu_to_le64(inode);
-       new_subvol->v.parent    = cpu_to_le32(src_subvolid);
-       new_subvol->v.otime.lo  = cpu_to_le64(bch2_current_time(c));
-       new_subvol->v.otime.hi  = 0;
+       new_subvol->v.flags             = 0;
+       new_subvol->v.snapshot          = cpu_to_le32(new_nodes[0]);
+       new_subvol->v.inode             = cpu_to_le64(inode);
+       new_subvol->v.creation_parent   = cpu_to_le32(src_subvolid);
+       new_subvol->v.fs_path_parent    = cpu_to_le32(parent_subvolid);
+       new_subvol->v.otime.lo          = cpu_to_le64(bch2_current_time(c));
+       new_subvol->v.otime.hi          = 0;
 
        SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
        SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
index 3ca1d183369c5f439f42ef222cacd28be34b6c98..4045a180154a366fc19fdb1ae75a3bd6f3504c8f 100644 (file)
@@ -7,17 +7,22 @@
 enum bkey_invalid_flags;
 
 int bch2_check_subvols(struct bch_fs *);
+int bch2_check_subvol_children(struct bch_fs *);
 
 int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
                           enum bkey_invalid_flags, struct printbuf *);
 void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned,
+                          struct bkey_s_c, struct bkey_s, unsigned);
 
 #define bch2_bkey_ops_subvolume ((struct bkey_ops) {           \
        .key_invalid    = bch2_subvolume_invalid,               \
        .val_to_text    = bch2_subvolume_to_text,               \
+       .trigger        = bch2_subvolume_trigger,               \
        .min_val_size   = 16,                                   \
 })
 
+int bch2_subvol_has_children(struct btree_trans *, u32);
 int bch2_subvolume_get(struct btree_trans *, unsigned,
                       bool, int, struct bch_subvolume *);
 int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
@@ -29,8 +34,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *);
 void bch2_delete_dead_snapshots_async(struct bch_fs *);
 
 int bch2_subvolume_unlink(struct btree_trans *, u32);
-int bch2_subvolume_create(struct btree_trans *, u64, u32,
-                         u32 *, u32 *, bool);
+int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
 
 int bch2_fs_subvolumes_init(struct bch_fs *);
 
index af79134b07d6ad304e7af22b838d9709e777a41b..e029df7ba89f5244b65c99d021252d753207d3bd 100644 (file)
@@ -19,8 +19,8 @@ struct bch_subvolume {
         * This is _not_ necessarily the subvolume of the directory containing
         * this subvolume:
         */
-       __le32                  parent;
-       __le32                  pad;
+       __le32                  creation_parent;
+       __le32                  fs_path_parent;
        bch_le128               otime;
 };
 
index b93d150ddf801628f4d08d0b6b6ef3892ed74c24..21ec6afc6788413a35c815cef827ef8a25891329 100644 (file)
@@ -102,6 +102,8 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
  * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
  * @s: mean and variance number of samples and their sums
  * @x: new value to include in the &mean_and_variance_weighted
+ * @initted: caller must track whether this is the first use or not
+ * @weight: ewma weight
  *
  * see linked pdf: function derived from equations 140-143 where alpha = 2^w.
  * values are stored bitshifted for performance and added precision.
@@ -132,6 +134,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
 /**
  * mean_and_variance_weighted_get_mean() - get mean from @s
  * @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
  */
 s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
                u8 weight)
@@ -143,6 +146,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
 /**
  * mean_and_variance_weighted_get_variance() -- get variance from @s
  * @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
  */
 u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
                u8 weight)
@@ -155,6 +159,7 @@ EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
 /**
  * mean_and_variance_weighted_get_stddev() - get standard deviation from @s
  * @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
  */
 u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
                u8 weight)