]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 5e73602f6c bcachefs: Fix for fsck hanging
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 12 Feb 2021 21:02:38 +0000 (16:02 -0500)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 12 Feb 2021 21:02:52 +0000 (16:02 -0500)
17 files changed:
.bcachefs_revision
include/linux/wait.h
libbcachefs/bcachefs_ioctl.h
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_update_leaf.c
libbcachefs/fs-io.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/journal.c
libbcachefs/opts.h
libbcachefs/recovery.c
libbcachefs/replicas.c
libbcachefs/replicas.h
libbcachefs/super-io.c
libbcachefs/super.c
libbcachefs/sysfs.c

index 37ce41cc3fd2a81900ae64e2f9cc4c675cf72442..ee42a247249827bd2442f8693d986203557d456d 100644 (file)
@@ -1 +1 @@
-79847e4824278463f7eb826dfd78221979e29a8b
+5e73602f6c2569e7b81b3ea658502ac9b546cb61
index 62d15e5d73c65f74629e6195b5c9c69939a72078..c3d982421883ac640a56c5005a6d467502b5e86e 100644 (file)
@@ -91,6 +91,7 @@ do {                                                                  \
 } while (0)
 
 #define wait_event_killable(wq, condition)     ({wait_event(wq, condition); 0; })
+#define wait_event_interruptible(wq, condition)        ({wait_event(wq, condition); 0; })
 
 #define __wait_event_timeout(wq, condition, timeout)                   \
        ___wait_event(wq, ___wait_cond_timeout(condition),              \
index 0e626b098d9140c1093fa458a6b121250c59f5f5..f1cb5d40512996c67e115d51c5bda15e83c4db39 100644 (file)
@@ -14,6 +14,9 @@
 #define BCH_FORCE_IF_DATA_DEGRADED     (1 << 2)
 #define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
 
+#define BCH_FORCE_IF_LOST                      \
+       (BCH_FORCE_IF_DATA_LOST|                \
+        BCH_FORCE_IF_METADATA_LOST)
 #define BCH_FORCE_IF_DEGRADED                  \
        (BCH_FORCE_IF_DATA_DEGRADED|            \
         BCH_FORCE_IF_METADATA_DEGRADED)
index 401dfd2c450a48a8b7c82c4962fe19b4d34b39b9..146ad2f531abb356af80ba6e0f63127c281e047f 100644 (file)
@@ -516,12 +516,7 @@ static void bch2_btree_iter_verify_level(struct btree_iter *iter,
        if (!bch2_btree_node_relock(iter, level))
                return;
 
-       /*
-        * Ideally this invariant would always be true, and hopefully in the
-        * future it will be, but for now set_pos_same_leaf() breaks it:
-        */
-       BUG_ON(iter->uptodate < BTREE_ITER_NEED_TRAVERSE &&
-              !btree_iter_pos_in_node(iter, l->b));
+       BUG_ON(!btree_iter_pos_in_node(iter, l->b));
 
        /*
         * node iterators don't use leaf node iterator:
@@ -1457,36 +1452,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
 
 /* Iterate across keys (in leaf nodes only) */
 
-void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_pos)
-{
-       struct btree_iter_level *l = &iter->l[0];
-
-       EBUG_ON(iter->level != 0);
-       EBUG_ON(bkey_cmp(new_pos, iter->pos) < 0);
-       EBUG_ON(!btree_node_locked(iter, 0));
-       EBUG_ON(bkey_cmp(new_pos, l->b->key.k.p) > 0);
-
-       bkey_init(&iter->k);
-       iter->k.p = iter->pos = new_pos;
-       btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-
-       btree_iter_advance_to_pos(iter, l, -1);
-
-       /*
-        * XXX:
-        * keeping a node locked that's outside (even just outside) iter->pos
-        * breaks __bch2_btree_node_lock(). This seems to only affect
-        * bch2_btree_node_get_sibling so for now it's fixed there, but we
-        * should try to get rid of this corner case.
-        *
-        * (this behaviour is currently needed for BTREE_INSERT_NOUNLOCK)
-        */
-
-       if (bch2_btree_node_iter_end(&l->iter) &&
-           btree_iter_pos_after_node(iter, l->b))
-               btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
-}
-
 static void btree_iter_pos_changed(struct btree_iter *iter, int cmp)
 {
        unsigned l = iter->level;
@@ -1552,40 +1517,57 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
        btree_iter_pos_changed(iter, cmp);
 }
 
-static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
+static inline bool bch2_btree_iter_advance_pos(struct btree_iter *iter)
 {
-       struct btree_iter_level *l = &iter->l[0];
-       bool ret;
+       struct bpos pos = iter->k.p;
 
-       bkey_init(&iter->k);
-       iter->k.p = iter->pos = l->b->key.k.p;
+       if (unlikely(!bkey_cmp(pos, POS_MAX)))
+               return false;
+
+       if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
+               pos = bkey_successor(pos);
+       bch2_btree_iter_set_pos(iter, pos);
+       return true;
+}
+
+static inline bool bch2_btree_iter_rewind_pos(struct btree_iter *iter)
+{
+       struct bpos pos = bkey_start_pos(&iter->k);
+
+       if (unlikely(!bkey_cmp(pos, POS_MIN)))
+               return false;
+
+       if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
+               pos = bkey_predecessor(pos);
+       bch2_btree_iter_set_pos(iter, pos);
+       return true;
+}
+
+static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
+{
+       struct bpos next_pos = iter->l[0].b->key.k.p;
+       bool ret = bkey_cmp(next_pos, POS_MAX) != 0;
 
-       ret = bkey_cmp(iter->pos, POS_MAX) != 0;
        if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
-               iter->k.p = iter->pos = bkey_successor(iter->pos);
+               next_pos = bkey_successor(next_pos);
 
-       btree_iter_pos_changed(iter, 1);
+       bch2_btree_iter_set_pos(iter, next_pos);
        return ret;
 }
 
 static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
 {
-       struct btree_iter_level *l = &iter->l[0];
-       bool ret;
-
-       bkey_init(&iter->k);
-       iter->k.p = iter->pos = l->b->data->min_key;
-       iter->uptodate  = BTREE_ITER_NEED_TRAVERSE;
+       struct bpos next_pos = iter->l[0].b->data->min_key;
+       bool ret = bkey_cmp(next_pos, POS_MIN) != 0;
 
-       ret = bkey_cmp(iter->pos, POS_MIN) != 0;
        if (ret) {
-               iter->k.p = iter->pos = bkey_predecessor(iter->pos);
+               next_pos = bkey_predecessor(next_pos);
 
                if (iter->flags & BTREE_ITER_IS_EXTENTS)
-                       iter->k.p = iter->pos = bkey_predecessor(iter->pos);
+                       next_pos = bkey_predecessor(next_pos);
        }
 
-       btree_iter_pos_changed(iter, -1);
+       bch2_btree_iter_set_pos(iter, next_pos);
        return ret;
 }
 
@@ -1651,8 +1633,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
         * iter->pos should always be equal to the key we just
         * returned - except extents can straddle iter->pos:
         */
-       if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
-           bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+       if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
                iter->pos = bkey_start_pos(k.k);
 
        iter->uptodate = BTREE_ITER_UPTODATE;
@@ -1667,14 +1648,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
  */
 struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
 {
-       if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+       if (!bch2_btree_iter_advance_pos(iter))
                return bkey_s_c_null;
 
-       bch2_btree_iter_set_pos(iter,
-               (iter->flags & BTREE_ITER_IS_EXTENTS)
-               ? iter->k.p
-               : bkey_successor(iter->k.p));
-
        return bch2_btree_iter_peek(iter);
 }
 
@@ -1726,10 +1702,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
                k = __bch2_btree_iter_peek_with_updates(iter);
 
                if (k.k && bkey_deleted(k.k)) {
-                       bch2_btree_iter_set_pos(iter,
-                               (iter->flags & BTREE_ITER_IS_EXTENTS)
-                               ? iter->k.p
-                               : bkey_successor(iter->k.p));
+                       bch2_btree_iter_advance_pos(iter);
                        continue;
                }
 
@@ -1744,8 +1717,7 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
         * iter->pos should always be equal to the key we just
         * returned - except extents can straddle iter->pos:
         */
-       if (!(iter->flags & BTREE_ITER_IS_EXTENTS) ||
-           bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+       if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
                iter->pos = bkey_start_pos(k.k);
 
        iter->uptodate = BTREE_ITER_UPTODATE;
@@ -1754,14 +1726,9 @@ struct bkey_s_c bch2_btree_iter_peek_with_updates(struct btree_iter *iter)
 
 struct bkey_s_c bch2_btree_iter_next_with_updates(struct btree_iter *iter)
 {
-       if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+       if (!bch2_btree_iter_advance_pos(iter))
                return bkey_s_c_null;
 
-       bch2_btree_iter_set_pos(iter,
-               (iter->flags & BTREE_ITER_IS_EXTENTS)
-               ? iter->k.p
-               : bkey_successor(iter->k.p));
-
        return bch2_btree_iter_peek_with_updates(iter);
 }
 
@@ -1789,7 +1756,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
                        return bkey_s_c_err(ret);
 
                k = __btree_iter_peek(iter, l);
-               if (!k.k || bkey_cmp(bkey_start_pos(k.k), pos) > 0)
+               if (!k.k ||
+                   ((iter->flags & BTREE_ITER_IS_EXTENTS)
+                    ? bkey_cmp(bkey_start_pos(k.k), pos) >= 0
+                    : bkey_cmp(bkey_start_pos(k.k), pos) > 0))
                        k = __btree_iter_prev(iter, l);
 
                if (likely(k.k))
@@ -1800,8 +1770,13 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
        }
 
        EBUG_ON(bkey_cmp(bkey_start_pos(k.k), pos) > 0);
-       iter->pos       = bkey_start_pos(k.k);
+
+       /* Extents can straddle iter->pos: */
+       if (bkey_cmp(k.k->p, pos) < 0)
+               iter->pos = k.k->p;
        iter->uptodate  = BTREE_ITER_UPTODATE;
+
+       bch2_btree_iter_verify_level(iter, 0);
        return k;
 }
 
@@ -1811,16 +1786,9 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
  */
 struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
 {
-       struct bpos pos = bkey_start_pos(&iter->k);
-
-       EBUG_ON(btree_iter_type(iter) != BTREE_ITER_KEYS);
-       bch2_btree_iter_checks(iter);
-
-       if (unlikely(!bkey_cmp(pos, POS_MIN)))
+       if (!bch2_btree_iter_rewind_pos(iter))
                return bkey_s_c_null;
 
-       bch2_btree_iter_set_pos(iter, bkey_predecessor(pos));
-
        return bch2_btree_iter_peek_prev(iter);
 }
 
@@ -1926,14 +1894,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 
 struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
 {
-       if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+       if (!bch2_btree_iter_advance_pos(iter))
                return bkey_s_c_null;
 
-       bch2_btree_iter_set_pos(iter,
-               (iter->flags & BTREE_ITER_IS_EXTENTS)
-               ? iter->k.p
-               : bkey_successor(iter->k.p));
-
        return bch2_btree_iter_peek_slot(iter);
 }
 
index 9a7f8d0197eca7c9e2d9fb1d3838e3c3dfbb2d72..12c519ae2a6095fde4ad49817c74a83482d3f365 100644 (file)
@@ -174,7 +174,6 @@ struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
 
 struct bkey_s_c bch2_btree_iter_peek_cached(struct btree_iter *);
 
-void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
 void __bch2_btree_iter_set_pos(struct btree_iter *, struct bpos, bool);
 void bch2_btree_iter_set_pos(struct btree_iter *, struct bpos);
 
index 967e1e4d962042f321a5051403d2e1592db4023e..d09124fc46f295f2f8b70f2608684a3b228b4f49 100644 (file)
@@ -951,12 +951,8 @@ retry:
 
        trans_for_each_iter(trans, iter)
                if ((trans->iters_live & (1ULL << iter->idx)) &&
-                   (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT)) {
-                       if (trans->flags & BTREE_INSERT_NOUNLOCK)
-                               bch2_btree_iter_set_pos_same_leaf(iter, iter->pos_after_commit);
-                       else
-                               bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
-               }
+                   (iter->flags & BTREE_ITER_SET_POS_AFTER_COMMIT))
+                       bch2_btree_iter_set_pos(iter, iter->pos_after_commit);
 out:
        bch2_journal_preres_put(&trans->c->journal, &trans->journal_preres);
 
index af7f8791a879a232693b0be04d7117a5951da723..56cfb0d60c03266ad92ee3468e5d9c14174279b7 100644 (file)
@@ -2440,7 +2440,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
        struct address_space *mapping = inode->v.i_mapping;
        struct bkey_buf copy;
        struct btree_trans trans;
-       struct btree_iter *src, *dst;
+       struct btree_iter *src, *dst, *del;
        loff_t shift, new_size;
        u64 src_start;
        int ret;
@@ -2510,6 +2510,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
                        POS(inode->v.i_ino, src_start >> 9),
                        BTREE_ITER_INTENT);
        dst = bch2_trans_copy_iter(&trans, src);
+       del = bch2_trans_copy_iter(&trans, src);
 
        while (1) {
                struct disk_reservation disk_res =
@@ -2530,8 +2531,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
                if (!k.k || k.k->p.inode != inode->v.i_ino)
                        break;
 
-               BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
-
                if (insert &&
                    bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
                        break;
@@ -2563,6 +2562,7 @@ reassemble:
                delete.k.p = copy.k->k.p;
                delete.k.size = copy.k->k.size;
                delete.k.p.offset -= shift >> 9;
+               bch2_btree_iter_set_pos(del, bkey_start_pos(&delete.k));
 
                next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
 
@@ -2583,9 +2583,7 @@ reassemble:
                        BUG_ON(ret);
                }
 
-               bch2_btree_iter_set_pos(src, bkey_start_pos(&delete.k));
-
-               ret =   bch2_trans_update(&trans, src, &delete, trigger_flags) ?:
+               ret =   bch2_trans_update(&trans, del, &delete, trigger_flags) ?:
                        bch2_trans_update(&trans, dst, copy.k, trigger_flags) ?:
                        bch2_trans_commit(&trans, &disk_res,
                                          &inode->ei_journal_seq,
index df0f00f10bd73cad36e9feb2443469df95a1b39d..c3f8396063d6f2b2f0b0791d578ec235f2ddeeac 100644 (file)
@@ -193,7 +193,7 @@ static int hash_redo_key(const struct bch_hash_desc desc,
        bch2_trans_update(trans, k_iter, &delete, 0);
 
        return bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
-                            tmp, BCH_HASH_SET_MUST_CREATE);
+                            tmp, 0);
 }
 
 static int fsck_hash_delete_at(struct btree_trans *trans,
@@ -1072,6 +1072,11 @@ static void inc_link(struct bch_fs *c, nlink_table *links,
        if (inum < range_start || inum >= *range_end)
                return;
 
+       if (inum - range_start >= SIZE_MAX / sizeof(struct nlink)) {
+               *range_end = inum;
+               return;
+       }
+
        link = genradix_ptr_alloc(links, inum - range_start, GFP_KERNEL);
        if (!link) {
                bch_verbose(c, "allocation failed during fsck - will need another pass");
@@ -1353,16 +1358,17 @@ peek_nlinks:    link = genradix_iter_peek(&nlinks_iter, links);
                        break;
 
                nlinks_pos = range_start + nlinks_iter.pos;
-               if (iter->pos.offset > nlinks_pos) {
+
+               if (link && nlinks_pos < iter->pos.offset) {
                        /* Should have been caught by dirents pass: */
-                       need_fsck_err_on(link && link->count, c,
+                       need_fsck_err_on(link->count, c,
                                "missing inode %llu (nlink %u)",
                                nlinks_pos, link->count);
                        genradix_iter_advance(&nlinks_iter, links);
                        goto peek_nlinks;
                }
 
-               if (iter->pos.offset < nlinks_pos || !link)
+               if (!link || nlinks_pos > iter->pos.offset)
                        link = &zero_links;
 
                if (k.k && k.k->type == KEY_TYPE_inode) {
index bf1c7319669c93342cc36ec1e8d7219addbbc4f9..746173f15ae3f95b09f1b3dd2d7a70d38c064b64 100644 (file)
@@ -479,7 +479,7 @@ int bch2_inode_create(struct btree_trans *trans,
        u64 min, max, start, *hint;
        int ret;
 
-       unsigned cpu = raw_smp_processor_id();
+       u64 cpu = raw_smp_processor_id();
        unsigned bits = (c->opts.inodes_32bit
                ? 31 : 63) - c->inode_shard_bits;
 
index c4cb4f05a66f0a58e46ddc1132a52d2d2047fd03..395021b5ac8e9e4c0e103415c1e858c1a4769931 100644 (file)
@@ -575,6 +575,8 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
 
        spin_lock(&j->lock);
 
+       BUG_ON(seq > journal_cur_seq(j));
+
        /* Recheck under lock: */
        if (j->err_seq && seq >= j->err_seq) {
                ret = -EIO;
@@ -640,9 +642,10 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
        u64 start_time = local_clock();
        int ret, ret2;
 
-       ret = wait_event_killable(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
+       ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
 
-       bch2_time_stats_update(j->flush_seq_time, start_time);
+       if (!ret)
+               bch2_time_stats_update(j->flush_seq_time, start_time);
 
        return ret ?: ret2 < 0 ? ret2 : 0;
 }
@@ -1158,6 +1161,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
               "seq:\t\t\t%llu\n"
               "last_seq:\t\t%llu\n"
               "last_seq_ondisk:\t%llu\n"
+              "flushed_seq_ondisk:\t%llu\n"
               "prereserved:\t\t%u/%u\n"
               "nr flush writes:\t%llu\n"
               "nr noflush writes:\t%llu\n"
@@ -1170,6 +1174,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
               journal_cur_seq(j),
               journal_last_seq(j),
               j->last_seq_ondisk,
+              j->flushed_seq_ondisk,
               j->prereserved.reserved,
               j->prereserved.remaining,
               j->nr_flush_writes,
index d835a85338c65b887c21661ccc9dd07887b5610f..c123c42630a6543a974dd64e385bfd067331546b 100644 (file)
@@ -222,6 +222,11 @@ enum opt_type {
          OPT_BOOL(),                                                   \
          NO_SB_OPT,                    false,                          \
          NULL,         "Allow mounting in degraded mode")              \
+       x(very_degraded,                u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Allow mounting in when data will be missing")  \
        x(discard,                      u8,                             \
          OPT_MOUNT|OPT_DEVICE,                                         \
          OPT_BOOL(),                                                   \
index 7ba098adcab9b20f2e70e8aeeaaf28a868828cb9..8560023b4c7ad007fae235ce0efc4f7a67069597 100644 (file)
@@ -1088,6 +1088,13 @@ int bch2_fs_recovery(struct bch_fs *c)
                bch_info(c, "recovering from clean shutdown, journal seq %llu",
                         le64_to_cpu(clean->journal_seq));
 
+       if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) {
+               bch_info(c, "alloc_v2 feature bit not set, fsck required");
+               c->opts.fsck = true;
+               c->opts.fix_errors = FSCK_OPT_YES;
+               c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_alloc_v2;
+       }
+
        if (!c->replicas.entries ||
            c->opts.rebuild_replicas) {
                bch_info(c, "building replicas info");
index 0330204fb4bf3e96b2ff0bb391754c8089869586..be73b458e4f63c91751dbdfbcc1bcbfd4bbe012b 100644 (file)
@@ -958,94 +958,48 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
 
 /* Query replicas: */
 
-struct replicas_status __bch2_replicas_status(struct bch_fs *c,
-                                             struct bch_devs_mask online_devs)
+bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
+                          unsigned flags, bool print)
 {
-       struct bch_sb_field_members *mi;
        struct bch_replicas_entry *e;
-       unsigned i, nr_online, nr_offline;
-       struct replicas_status ret;
-
-       memset(&ret, 0, sizeof(ret));
-
-       for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-               ret.replicas[i].redundancy = INT_MAX;
-
-       mi = bch2_sb_get_members(c->disk_sb.sb);
+       bool ret = true;
 
        percpu_down_read(&c->mark_lock);
-
        for_each_cpu_replicas_entry(&c->replicas, e) {
-               if (e->data_type >= ARRAY_SIZE(ret.replicas))
-                       panic("e %p data_type %u\n", e, e->data_type);
+               unsigned i, nr_online = 0, dflags = 0;
+               bool metadata = e->data_type < BCH_DATA_user;
 
-               nr_online = nr_offline = 0;
+               for (i = 0; i < e->nr_devs; i++)
+                       nr_online += test_bit(e->devs[i], devs.d);
 
-               for (i = 0; i < e->nr_devs; i++) {
-                       BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
-                                               e->devs[i]));
+               if (nr_online < e->nr_required)
+                       dflags |= metadata
+                               ? BCH_FORCE_IF_METADATA_LOST
+                               : BCH_FORCE_IF_DATA_LOST;
 
-                       if (test_bit(e->devs[i], online_devs.d))
-                               nr_online++;
-                       else
-                               nr_offline++;
-               }
+               if (nr_online < e->nr_devs)
+                       dflags |= metadata
+                               ? BCH_FORCE_IF_METADATA_DEGRADED
+                               : BCH_FORCE_IF_DATA_DEGRADED;
 
-               ret.replicas[e->data_type].redundancy =
-                       min(ret.replicas[e->data_type].redundancy,
-                           (int) nr_online - (int) e->nr_required);
+               if (dflags & ~flags) {
+                       if (print) {
+                               char buf[100];
 
-               ret.replicas[e->data_type].nr_offline =
-                       max(ret.replicas[e->data_type].nr_offline,
-                           nr_offline);
-       }
+                               bch2_replicas_entry_to_text(&PBUF(buf), e);
+                               bch_err(c, "insufficient devices online (%u) for replicas entry %s",
+                                       nr_online, buf);
+                       }
+                       ret = false;
+                       break;
+               }
 
+       }
        percpu_up_read(&c->mark_lock);
 
-       for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-               if (ret.replicas[i].redundancy == INT_MAX)
-                       ret.replicas[i].redundancy = 0;
-
        return ret;
 }
 
-struct replicas_status bch2_replicas_status(struct bch_fs *c)
-{
-       return __bch2_replicas_status(c, bch2_online_devs(c));
-}
-
-static bool have_enough_devs(struct replicas_status s,
-                            enum bch_data_type type,
-                            bool force_if_degraded,
-                            bool force_if_lost)
-{
-       return (!s.replicas[type].nr_offline || force_if_degraded) &&
-               (s.replicas[type].redundancy >= 0 || force_if_lost);
-}
-
-bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
-{
-       return (have_enough_devs(s, BCH_DATA_journal,
-                                flags & BCH_FORCE_IF_METADATA_DEGRADED,
-                                flags & BCH_FORCE_IF_METADATA_LOST) &&
-               have_enough_devs(s, BCH_DATA_btree,
-                                flags & BCH_FORCE_IF_METADATA_DEGRADED,
-                                flags & BCH_FORCE_IF_METADATA_LOST) &&
-               have_enough_devs(s, BCH_DATA_user,
-                                flags & BCH_FORCE_IF_DATA_DEGRADED,
-                                flags & BCH_FORCE_IF_DATA_LOST));
-}
-
-int bch2_replicas_online(struct bch_fs *c, bool meta)
-{
-       struct replicas_status s = bch2_replicas_status(c);
-
-       return (meta
-               ? min(s.replicas[BCH_DATA_journal].redundancy,
-                     s.replicas[BCH_DATA_btree].redundancy)
-               : s.replicas[BCH_DATA_user].redundancy) + 1;
-}
-
 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
 {
        struct bch_replicas_entry *e;
index a16ef23bde8af4fdf7d9113601c504a04c5b3853..9c8fd3d9824767448f3dcdfe321ff4fd2be1f523 100644 (file)
@@ -39,19 +39,9 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
        e->devs[0]      = dev;
 }
 
-struct replicas_status {
-       struct {
-               int             redundancy;
-               unsigned        nr_offline;
-       }                       replicas[BCH_DATA_NR];
-};
-
-struct replicas_status __bch2_replicas_status(struct bch_fs *,
-                                             struct bch_devs_mask);
-struct replicas_status bch2_replicas_status(struct bch_fs *);
-bool bch2_have_enough_devs(struct replicas_status, unsigned);
-
-int bch2_replicas_online(struct bch_fs *, bool);
+bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
+                          unsigned, bool);
+
 unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
 
 int bch2_replicas_gc_end(struct bch_fs *, int);
index a510a25e2edbf28363d7b414912926119eaab99a..47a0e20668e33f933ea21b05d07237b8123599ee 100644 (file)
@@ -767,15 +767,13 @@ int bch2_write_super(struct bch_fs *c)
        nr_wrote = dev_mask_nr(&sb_written);
 
        can_mount_with_written =
-               bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-                                     BCH_FORCE_IF_DEGRADED);
+               bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
 
        for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
                sb_written.d[i] = ~sb_written.d[i];
 
        can_mount_without_written =
-               bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
-                                     BCH_FORCE_IF_DEGRADED);
+               bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
 
        /*
         * If we would be able to mount _without_ the devices we successfully
@@ -786,6 +784,7 @@ int bch2_write_super(struct bch_fs *c)
         * mount with the devices we did successfully write to:
         */
        if (bch2_fs_fatal_err_on(!nr_wrote ||
+                                !can_mount_with_written ||
                                 (can_mount_without_written &&
                                  !can_mount_with_written), c,
                "Unable to write superblock to sufficient devices"))
index a2c5be44c4baf84c2bdad91cdb37f77ca6cd71e4..a3c61a7480bebc602e5cf037a704d916e873f9b8 100644 (file)
@@ -1264,7 +1264,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
                            enum bch_member_state new_state, int flags)
 {
        struct bch_devs_mask new_online_devs;
-       struct replicas_status s;
        struct bch_dev *ca2;
        int i, nr_rw = 0, required;
 
@@ -1300,9 +1299,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
                new_online_devs = bch2_online_devs(c);
                __clear_bit(ca->dev_idx, new_online_devs.d);
 
-               s = __bch2_replicas_status(c, new_online_devs);
-
-               return bch2_have_enough_devs(s, flags);
+               return bch2_have_enough_devs(c, new_online_devs, flags, false);
        default:
                BUG();
        }
@@ -1310,14 +1307,18 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
 
 static bool bch2_fs_may_start(struct bch_fs *c)
 {
-       struct replicas_status s;
        struct bch_sb_field_members *mi;
        struct bch_dev *ca;
-       unsigned i, flags = c->opts.degraded
-               ? BCH_FORCE_IF_DEGRADED
-               : 0;
+       unsigned i, flags = 0;
+
+       if (c->opts.very_degraded)
+               flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
 
-       if (!c->opts.degraded) {
+       if (c->opts.degraded)
+               flags |= BCH_FORCE_IF_DEGRADED;
+
+       if (!c->opts.degraded &&
+           !c->opts.very_degraded) {
                mutex_lock(&c->sb_lock);
                mi = bch2_sb_get_members(c->disk_sb.sb);
 
@@ -1337,9 +1338,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
                mutex_unlock(&c->sb_lock);
        }
 
-       s = bch2_replicas_status(c);
-
-       return bch2_have_enough_devs(s, flags);
+       return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
 }
 
 static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
index f934f12bc677c7c6b9c92788bb9193bcc481e179..bc4c3a77ea62b7c6ed192bc4338241f0f1869d26 100644 (file)
@@ -199,9 +199,6 @@ read_attribute(new_stripes);
 
 rw_attribute(pd_controllers_update_seconds);
 
-read_attribute(meta_replicas_have);
-read_attribute(data_replicas_have);
-
 read_attribute(io_timers_read);
 read_attribute(io_timers_write);
 
@@ -347,9 +344,6 @@ SHOW(bch2_fs)
 
        sysfs_print(promote_whole_extents,      c->promote_whole_extents);
 
-       sysfs_printf(meta_replicas_have, "%i",  bch2_replicas_online(c, true));
-       sysfs_printf(data_replicas_have, "%i",  bch2_replicas_online(c, false));
-
        /* Debugging: */
 
        if (attr == &sysfs_alloc_debug)
@@ -520,9 +514,6 @@ struct attribute *bch2_fs_files[] = {
        &sysfs_btree_node_size,
        &sysfs_btree_cache_size,
 
-       &sysfs_meta_replicas_have,
-       &sysfs_data_replicas_have,
-
        &sysfs_journal_write_delay_ms,
        &sysfs_journal_reclaim_delay_ms,