]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 0568ed4886 bcachefs: Fix copygc dying on startup
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 16 Oct 2020 02:53:27 +0000 (22:53 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 16 Oct 2020 02:53:27 +0000 (22:53 -0400)
13 files changed:
.bcachefs_revision
libbcachefs/alloc_background.h
libbcachefs/bcachefs.h
libbcachefs/btree_cache.c
libbcachefs/btree_gc.c
libbcachefs/buckets.c
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/move.c
libbcachefs/super.c
libbcachefs/super.h

index 7f15ea6821e886f044dee69914e2778d279b23ab..d4dc4eade46ee979b6afc002c4966c4409f9d268 100644 (file)
@@ -1 +1 @@
-10ab39f2faede817eebfd04a4990e739d0cedcb8
+0568ed488651273d01891c3481613dd652677edb
index f6b9f27f071345e2d53d06a657eb27b463683e78..4f462696b747a88f9f80fd6dc31b74e291cee517 100644 (file)
@@ -61,8 +61,10 @@ static inline void bch2_wake_allocator(struct bch_dev *ca)
 
        rcu_read_lock();
        p = rcu_dereference(ca->alloc_thread);
-       if (p)
+       if (p) {
                wake_up_process(p);
+               ca->allocator_state = ALLOCATOR_RUNNING;
+       }
        rcu_read_unlock();
 }
 
index 3a5a00e53cbfb986e5f52014bec27dae0583d013..29f411635f29968e9fae422d9fbc5ad67318b265 100644 (file)
@@ -491,7 +491,6 @@ enum {
        BCH_FS_ERRORS_FIXED,
 
        /* misc: */
-       BCH_FS_BDEV_MOUNTED,
        BCH_FS_FIXED_GENS,
        BCH_FS_ALLOC_WRITTEN,
        BCH_FS_REBUILD_REPLICAS,
index 7366711128613d6d9ed614291645a2d0157c5a86..bb94fa2341eea839eca31128f1245b190d8244f8 100644 (file)
@@ -252,7 +252,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
        unsigned long can_free;
        unsigned long touched = 0;
        unsigned long freed = 0;
-       unsigned i;
+       unsigned i, flags;
 
        if (btree_shrinker_disabled(c))
                return SHRINK_STOP;
@@ -263,6 +263,8 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
        else if (!mutex_trylock(&bc->lock))
                return -1;
 
+       flags = memalloc_nofs_save();
+
        /*
         * It's _really_ critical that we don't free too many btree nodes - we
         * have to always leave ourselves a reserve. The reserve is how we
@@ -326,6 +328,7 @@ restart:
                        clear_btree_node_accessed(b);
        }
 
+       memalloc_nofs_restore(flags);
        mutex_unlock(&bc->lock);
 out:
        return (unsigned long) freed * btree_pages(c);
@@ -348,11 +351,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
-       unsigned i;
+       unsigned i, flags;
 
        if (bc->shrink.list.next)
                unregister_shrinker(&bc->shrink);
 
+       /* vfree() can allocate memory: */
+       flags = memalloc_nofs_save();
        mutex_lock(&bc->lock);
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -388,6 +393,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
        }
 
        mutex_unlock(&bc->lock);
+       memalloc_nofs_restore(flags);
 
        if (bc->table_init_done)
                rhashtable_destroy(&bc->table);
index 4f581130270cc0e2a798921d1398822a16dc7e85..2aa8140aec3297843e7775eb20c5f4d5f8ffa8e3 100644 (file)
@@ -8,6 +8,7 @@
 #include "alloc_background.h"
 #include "alloc_foreground.h"
 #include "bkey_methods.h"
+#include "bkey_on_stack.h"
 #include "btree_locking.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
@@ -888,40 +889,77 @@ out:
        return ret;
 }
 
+static bool gc_btree_gens_key(struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+
+       percpu_down_read(&c->mark_lock);
+       bkey_for_each_ptr(ptrs, ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct bucket *g = PTR_BUCKET(ca, ptr, false);
+
+               if (gen_after(g->mark.gen, ptr->gen) > 16) {
+                       percpu_up_read(&c->mark_lock);
+                       return true;
+               }
+       }
+
+       bkey_for_each_ptr(ptrs, ptr) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct bucket *g = PTR_BUCKET(ca, ptr, false);
+
+               if (gen_after(g->gc_gen, ptr->gen))
+                       g->gc_gen = ptr->gen;
+       }
+       percpu_up_read(&c->mark_lock);
+
+       return false;
+}
+
 /*
  * For recalculating oldest gen, we only need to walk keys in leaf nodes; btree
  * node pointers currently never have cached pointers that can become stale:
  */
-static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id id)
+static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
 {
        struct btree_trans trans;
        struct btree_iter *iter;
        struct bkey_s_c k;
-       int ret;
+       struct bkey_on_stack sk;
+       int ret = 0;
 
+       bkey_on_stack_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, id, POS_MIN, BTREE_ITER_PREFETCH, k, ret) {
-               struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-               const struct bch_extent_ptr *ptr;
+       iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
+                                  BTREE_ITER_PREFETCH);
 
-               percpu_down_read(&c->mark_lock);
-               bkey_for_each_ptr(ptrs, ptr) {
-                       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-                       struct bucket *g = PTR_BUCKET(ca, ptr, false);
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret = bkey_err(k))) {
+               if (gc_btree_gens_key(c, k)) {
+                       bkey_on_stack_reassemble(&sk, c, k);
+                       bch2_extent_normalize(c, bkey_i_to_s(sk.k));
 
-                       if (gen_after(g->gc_gen, ptr->gen))
-                               g->gc_gen = ptr->gen;
+                       bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
 
-                       if (gen_after(g->mark.gen, ptr->gen) > 32) {
-                               /* rewrite btree node */
+                       bch2_trans_update(&trans, iter, sk.k, 0);
 
+                       ret = bch2_trans_commit(&trans, NULL, NULL,
+                                               BTREE_INSERT_NOFAIL);
+                       if (ret == -EINTR)
+                               continue;
+                       if (ret) {
+                               break;
                        }
                }
-               percpu_up_read(&c->mark_lock);
+
+               bch2_btree_iter_next(iter);
        }
 
        bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
+
        return ret;
 }
 
index 97a8af31ded1247e097753798c90e57b96387e7a..797114353aa22537dc6959dc65a814a6c4d6f925 100644 (file)
@@ -949,7 +949,7 @@ static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
                return -EIO;
        }
 
-       if (gen_cmp(bucket_gen, p.ptr.gen) >= 96U) {
+       if (gen_cmp(bucket_gen, p.ptr.gen) > 96U) {
                bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
                        "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
                        "while marking %s",
index 55004998536da0efd55597fe25875d1b952b57ba..60684380f191156cf399b7171985e5970baf6ca3 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/migrate.h>
 #include <linux/mmu_context.h>
 #include <linux/pagevec.h>
+#include <linux/rmap.h>
 #include <linux/sched/signal.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/uio.h>
@@ -2190,6 +2191,12 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
        ret = bch2_get_page_disk_reservation(c, inode, page, false);
        BUG_ON(ret);
 
+       /*
+        * This removes any writeable userspace mappings; we need to force
+        * .page_mkwrite to be called again before any mmapped writes, to
+        * redirty the full page:
+        */
+       page_mkclean(page);
        __set_page_dirty_nobuffers(page);
 unlock:
        unlock_page(page);
index 121150b537bbcd813ea4c69f7dc874b5f6b902bf..5c80142e50ed940b93b533bdb6426631fe246b17 100644 (file)
@@ -38,7 +38,8 @@ static void bch2_vfs_inode_init(struct bch_fs *,
                                struct bch_inode_info *,
                                struct bch_inode_unpacked *);
 
-static void journal_seq_copy(struct bch_inode_info *dst,
+static void journal_seq_copy(struct bch_fs *c,
+                            struct bch_inode_info *dst,
                             u64 journal_seq)
 {
        u64 old, v = READ_ONCE(dst->ei_journal_seq);
@@ -49,6 +50,8 @@ static void journal_seq_copy(struct bch_inode_info *dst,
                if (old >= journal_seq)
                        break;
        } while ((v = cmpxchg(&dst->ei_journal_seq, old, journal_seq)) != old);
+
+       bch2_journal_set_has_inum(&c->journal, dst->v.i_ino, journal_seq);
 }
 
 static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
@@ -285,12 +288,12 @@ err_before_quota:
        if (!tmpfile) {
                bch2_inode_update_after_write(c, dir, &dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
-               journal_seq_copy(dir, journal_seq);
+               journal_seq_copy(c, dir, journal_seq);
                mutex_unlock(&dir->ei_update_lock);
        }
 
        bch2_vfs_inode_init(c, inode, &inode_u);
-       journal_seq_copy(inode, journal_seq);
+       journal_seq_copy(c, inode, journal_seq);
 
        set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
        set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
@@ -307,7 +310,7 @@ err_before_quota:
                 * We raced, another process pulled the new inode into cache
                 * before us:
                 */
-               journal_seq_copy(old, journal_seq);
+               journal_seq_copy(c, old, journal_seq);
                make_bad_inode(&inode->v);
                iput(&inode->v);
 
@@ -401,7 +404,7 @@ static int __bch2_link(struct bch_fs *c,
        if (likely(!ret)) {
                BUG_ON(inode_u.bi_inum != inode->v.i_ino);
 
-               journal_seq_copy(inode, dir->ei_journal_seq);
+               journal_seq_copy(c, inode, dir->ei_journal_seq);
                bch2_inode_update_after_write(c, dir, &dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
                bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
@@ -458,7 +461,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
        if (likely(!ret)) {
                BUG_ON(inode_u.bi_inum != inode->v.i_ino);
 
-               journal_seq_copy(inode, dir->ei_journal_seq);
+               journal_seq_copy(c, inode, dir->ei_journal_seq);
                bch2_inode_update_after_write(c, dir, &dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
                bch2_inode_update_after_write(c, inode, &inode_u,
@@ -493,7 +496,7 @@ static int bch2_symlink(struct inode *vdir, struct dentry *dentry,
        if (unlikely(ret))
                goto err;
 
-       journal_seq_copy(dir, inode->ei_journal_seq);
+       journal_seq_copy(c, dir, inode->ei_journal_seq);
 
        ret = __bch2_link(c, inode, dir, dentry);
        if (unlikely(ret))
@@ -591,22 +594,22 @@ retry:
 
        bch2_inode_update_after_write(c, src_dir, &src_dir_u,
                                      ATTR_MTIME|ATTR_CTIME);
-       journal_seq_copy(src_dir, journal_seq);
+       journal_seq_copy(c, src_dir, journal_seq);
 
        if (src_dir != dst_dir) {
                bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
-               journal_seq_copy(dst_dir, journal_seq);
+               journal_seq_copy(c, dst_dir, journal_seq);
        }
 
        bch2_inode_update_after_write(c, src_inode, &src_inode_u,
                                      ATTR_CTIME);
-       journal_seq_copy(src_inode, journal_seq);
+       journal_seq_copy(c, src_inode, journal_seq);
 
        if (dst_inode) {
                bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
                                              ATTR_CTIME);
-               journal_seq_copy(dst_inode, journal_seq);
+               journal_seq_copy(c, dst_inode, journal_seq);
        }
 err:
        bch2_trans_exit(&trans);
@@ -1278,91 +1281,36 @@ static struct bch_fs *bch2_path_to_fs(const char *dev)
 
        c = bch2_bdev_to_fs(bdev);
        bdput(bdev);
-       return c ?: ERR_PTR(-ENOENT);
-}
-
-static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * const *devs,
-                                              unsigned nr_devs, struct bch_opts opts)
-{
-       struct bch_fs *c, *c1, *c2;
-       size_t i;
-
-       if (!nr_devs)
-               return ERR_PTR(-EINVAL);
-
-       c = bch2_fs_open(devs, nr_devs, opts);
-
-       if (IS_ERR(c) && PTR_ERR(c) == -EBUSY) {
-               /*
-                * Already open?
-                * Look up each block device, make sure they all belong to a
-                * filesystem and they all belong to the _same_ filesystem
-                */
-
-               c1 = bch2_path_to_fs(devs[0]);
-               if (IS_ERR(c1))
-                       return c;
-
-               for (i = 1; i < nr_devs; i++) {
-                       c2 = bch2_path_to_fs(devs[i]);
-                       if (!IS_ERR(c2))
-                               closure_put(&c2->cl);
-
-                       if (c1 != c2) {
-                               closure_put(&c1->cl);
-                               return c;
-                       }
-               }
-
-               c = c1;
-       }
-
-       if (IS_ERR(c))
-               return c;
-
-       down_write(&c->state_lock);
-
-       if (!test_bit(BCH_FS_STARTED, &c->flags)) {
-               up_write(&c->state_lock);
+       if (c)
                closure_put(&c->cl);
-               pr_err("err mounting %s: incomplete filesystem", dev_name);
-               return ERR_PTR(-EINVAL);
-       }
-
-       up_write(&c->state_lock);
-
-       set_bit(BCH_FS_BDEV_MOUNTED, &c->flags);
-       return c;
+       return c ?: ERR_PTR(-ENOENT);
 }
 
-static struct bch_fs *bch2_open_as_blockdevs(const char *_dev_name,
-                                            struct bch_opts opts)
+static char **split_devs(const char *_dev_name, unsigned *nr)
 {
        char *dev_name = NULL, **devs = NULL, *s;
-       struct bch_fs *c = ERR_PTR(-ENOMEM);
        size_t i, nr_devs = 0;
 
        dev_name = kstrdup(_dev_name, GFP_KERNEL);
        if (!dev_name)
-               goto err;
+               return NULL;
 
        for (s = dev_name; s; s = strchr(s + 1, ':'))
                nr_devs++;
 
-       devs = kcalloc(nr_devs, sizeof(const char *), GFP_KERNEL);
-       if (!devs)
-               goto err;
+       devs = kcalloc(nr_devs + 1, sizeof(const char *), GFP_KERNEL);
+       if (!devs) {
+               kfree(dev_name);
+               return NULL;
+       }
 
        for (i = 0, s = dev_name;
             s;
             (s = strchr(s, ':')) && (*s++ = '\0'))
                devs[i++] = s;
 
-       c = __bch2_open_as_blockdevs(_dev_name, devs, nr_devs, opts);
-err:
-       kfree(devs);
-       kfree(dev_name);
-       return c;
+       *nr = nr_devs;
+       return devs;
 }
 
 static int bch2_remount(struct super_block *sb, int *flags, char *data)
@@ -1406,6 +1354,24 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
        return ret;
 }
 
+static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
+{
+       struct bch_fs *c = root->d_sb->s_fs_info;
+       struct bch_dev *ca;
+       unsigned i;
+       bool first = true;
+
+       for_each_online_member(ca, c, i) {
+               if (!first)
+                       seq_putc(seq, ':');
+               first = false;
+               seq_puts(seq, "/dev/");
+               seq_puts(seq, ca->name);
+       }
+
+       return 0;
+}
+
 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
 {
        struct bch_fs *c = root->d_sb->s_fs_info;
@@ -1429,7 +1395,13 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
        }
 
        return 0;
+}
+
+static void bch2_put_super(struct super_block *sb)
+{
+       struct bch_fs *c = sb->s_fs_info;
 
+       __bch2_fs_stop(c);
 }
 
 static const struct super_operations bch_super_operations = {
@@ -1439,26 +1411,42 @@ static const struct super_operations bch_super_operations = {
        .evict_inode    = bch2_evict_inode,
        .sync_fs        = bch2_sync_fs,
        .statfs         = bch2_statfs,
+       .show_devname   = bch2_show_devname,
        .show_options   = bch2_show_options,
        .remount_fs     = bch2_remount,
-#if 0
        .put_super      = bch2_put_super,
+#if 0
        .freeze_fs      = bch2_freeze,
        .unfreeze_fs    = bch2_unfreeze,
 #endif
 };
 
-static int bch2_test_super(struct super_block *s, void *data)
-{
-       return s->s_fs_info == data;
-}
-
 static int bch2_set_super(struct super_block *s, void *data)
 {
        s->s_fs_info = data;
        return 0;
 }
 
+static int bch2_noset_super(struct super_block *s, void *data)
+{
+       return -EBUSY;
+}
+
+static int bch2_test_super(struct super_block *s, void *data)
+{
+       struct bch_fs *c = s->s_fs_info;
+       struct bch_fs **devs = data;
+       unsigned i;
+
+       if (!c)
+               return false;
+
+       for (i = 0; devs[i]; i++)
+               if (c != devs[i])
+                       return false;
+       return true;
+}
+
 static struct dentry *bch2_mount(struct file_system_type *fs_type,
                                 int flags, const char *dev_name, void *data)
 {
@@ -1467,7 +1455,9 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
        struct super_block *sb;
        struct inode *vinode;
        struct bch_opts opts = bch2_opts_empty();
-       unsigned i;
+       char **devs;
+       struct bch_fs **devs_to_fs = NULL;
+       unsigned i, nr_devs;
        int ret;
 
        opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
@@ -1476,21 +1466,41 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
        if (ret)
                return ERR_PTR(ret);
 
-       c = bch2_open_as_blockdevs(dev_name, opts);
-       if (IS_ERR(c))
-               return ERR_CAST(c);
+       devs = split_devs(dev_name, &nr_devs);
+       if (!devs)
+               return ERR_PTR(-ENOMEM);
 
-       sb = sget(fs_type, bch2_test_super, bch2_set_super, flags|SB_NOSEC, c);
-       if (IS_ERR(sb)) {
-               closure_put(&c->cl);
-               return ERR_CAST(sb);
+       devs_to_fs = kcalloc(nr_devs + 1, sizeof(void *), GFP_KERNEL);
+       if (!devs_to_fs) {
+               sb = ERR_PTR(-ENOMEM);
+               goto got_sb;
        }
 
-       BUG_ON(sb->s_fs_info != c);
+       for (i = 0; i < nr_devs; i++)
+               devs_to_fs[i] = bch2_path_to_fs(devs[i]);
 
-       if (sb->s_root) {
-               closure_put(&c->cl);
+       sb = sget(fs_type, bch2_test_super, bch2_noset_super,
+                 flags|SB_NOSEC, devs_to_fs);
+       if (!IS_ERR(sb))
+               goto got_sb;
+
+       c = bch2_fs_open(devs, nr_devs, opts);
 
+       if (!IS_ERR(c))
+               sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
+       else
+               sb = ERR_CAST(c);
+got_sb:
+       kfree(devs_to_fs);
+       kfree(devs[0]);
+       kfree(devs);
+
+       if (IS_ERR(sb))
+               return ERR_CAST(sb);
+
+       c = sb->s_fs_info;
+
+       if (sb->s_root) {
                if ((flags ^ sb->s_flags) & SB_RDONLY) {
                        ret = -EBUSY;
                        goto err_put_super;
@@ -1565,11 +1575,7 @@ static void bch2_kill_sb(struct super_block *sb)
        struct bch_fs *c = sb->s_fs_info;
 
        generic_shutdown_super(sb);
-
-       if (test_bit(BCH_FS_BDEV_MOUNTED, &c->flags))
-               bch2_fs_stop(c);
-       else
-               closure_put(&c->cl);
+       bch2_fs_free(c);
 }
 
 static struct file_system_type bcache_fs_type = {
index 210ad1b0c4695ca49972ae9491e834b8a26b35ee..b8b719902c637ffd739d7b43f1432beb8820f327 100644 (file)
@@ -18,6 +18,8 @@
 
 #include <trace/events/bcachefs.h>
 
+static inline struct journal_buf *journal_seq_to_buf(struct journal *, u64);
+
 static bool __journal_entry_is_open(union journal_res_state state)
 {
        return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
@@ -305,6 +307,19 @@ u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
        return seq;
 }
 
+void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq)
+{
+       size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
+       struct journal_buf *buf;
+
+       spin_lock(&j->lock);
+
+       if ((buf = journal_seq_to_buf(j, seq)))
+               set_bit(h, buf->has_inode);
+
+       spin_unlock(&j->lock);
+}
+
 static int __journal_res_get(struct journal *j, struct journal_res *res,
                             unsigned flags)
 {
index 56438840efd772513139e8305b744263a8c2aae7..f60bc964ee1f4cb99527b0c7eff6086fc63c70ee 100644 (file)
@@ -147,6 +147,7 @@ static inline u64 journal_cur_seq(struct journal *j)
 }
 
 u64 bch2_inode_journal_seq(struct journal *, u64);
+void bch2_journal_set_has_inum(struct journal *, u64, u64);
 
 static inline int journal_state_count(union journal_res_state s, int idx)
 {
index 2f3be487ef656cb730ff8662dd41e833a61be573..1ffb14a22f94d7a52199e16f7396485e02c4a1f9 100644 (file)
@@ -95,10 +95,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                    !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
                        goto nomatch;
 
-               if (m->data_cmd == DATA_REWRITE &&
-                   !bch2_bkey_has_device(k, m->data_opts.rewrite_dev))
-                       goto nomatch;
-
                bkey_reassemble(&_insert.k, k);
                insert = &_insert.k;
 
@@ -110,9 +106,19 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                bch2_cut_back(new->k.p,         insert);
                bch2_cut_back(insert->k.p,      &new->k_i);
 
-               if (m->data_cmd == DATA_REWRITE)
-                       bch2_bkey_drop_device(bkey_i_to_s(insert),
-                                             m->data_opts.rewrite_dev);
+               if (m->data_cmd == DATA_REWRITE) {
+                       struct bch_extent_ptr *new_ptr, *old_ptr = (void *)
+                               bch2_bkey_has_device(bkey_i_to_s_c(insert),
+                                                    m->data_opts.rewrite_dev);
+                       if (!old_ptr)
+                               goto nomatch;
+
+                       if (old_ptr->cached)
+                               extent_for_each_ptr(extent_i_to_s(new), new_ptr)
+                                       new_ptr->cached = true;
+
+                       bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
+               }
 
                extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
                        if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
@@ -291,14 +297,14 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
                unsigned compressed_sectors = 0;
 
                bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
-                       if (!p.ptr.cached &&
-                           crc_is_compressed(p.crc) &&
-                           bch2_dev_in_target(c, p.ptr.dev, data_opts.target))
+                       if (p.ptr.dev == data_opts.rewrite_dev &&
+                           !p.ptr.cached &&
+                           crc_is_compressed(p.crc))
                                compressed_sectors += p.crc.compressed_size;
 
                if (compressed_sectors) {
                        ret = bch2_disk_reservation_add(c, &m->op.res,
-                                       compressed_sectors,
+                                       k.k->size * m->op.nr_replicas,
                                        BCH_DISK_RESERVATION_NOFAIL);
                        if (ret)
                                return ret;
index 94288fc92ec4a4da9a5cb52d7948db6123bb9d9e..c873b67115f71733ed76ef80c4926d4bebd14e3c 100644 (file)
@@ -496,7 +496,7 @@ int bch2_fs_read_write_early(struct bch_fs *c)
 
 /* Filesystem startup/shutdown: */
 
-static void bch2_fs_free(struct bch_fs *c)
+static void __bch2_fs_free(struct bch_fs *c)
 {
        unsigned i;
 
@@ -552,10 +552,10 @@ static void bch2_fs_release(struct kobject *kobj)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
 
-       bch2_fs_free(c);
+       __bch2_fs_free(c);
 }
 
-void bch2_fs_stop(struct bch_fs *c)
+void __bch2_fs_stop(struct bch_fs *c)
 {
        struct bch_dev *ca;
        unsigned i;
@@ -586,13 +586,6 @@ void bch2_fs_stop(struct bch_fs *c)
        kobject_put(&c->opts_dir);
        kobject_put(&c->internal);
 
-       mutex_lock(&bch_fs_list_lock);
-       list_del(&c->list);
-       mutex_unlock(&bch_fs_list_lock);
-
-       closure_sync(&c->cl);
-       closure_debug_destroy(&c->cl);
-
        /* btree prefetch might have kicked off reads in the background: */
        bch2_btree_flush_all_reads(c);
 
@@ -603,6 +596,22 @@ void bch2_fs_stop(struct bch_fs *c)
        cancel_delayed_work_sync(&c->pd_controllers_update);
        cancel_work_sync(&c->read_only_work);
 
+       for (i = 0; i < c->sb.nr_devices; i++)
+               if (c->devs[i])
+                       bch2_free_super(&c->devs[i]->disk_sb);
+}
+
+void bch2_fs_free(struct bch_fs *c)
+{
+       unsigned i;
+
+       mutex_lock(&bch_fs_list_lock);
+       list_del(&c->list);
+       mutex_unlock(&bch_fs_list_lock);
+
+       closure_sync(&c->cl);
+       closure_debug_destroy(&c->cl);
+
        for (i = 0; i < c->sb.nr_devices; i++)
                if (c->devs[i])
                        bch2_dev_free(rcu_dereference_protected(c->devs[i], 1));
@@ -612,6 +621,12 @@ void bch2_fs_stop(struct bch_fs *c)
        kobject_put(&c->kobj);
 }
 
+void bch2_fs_stop(struct bch_fs *c)
+{
+       __bch2_fs_stop(c);
+       bch2_fs_free(c);
+}
+
 static const char *bch2_fs_online(struct bch_fs *c)
 {
        struct bch_dev *ca;
@@ -669,6 +684,14 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 
        __module_get(THIS_MODULE);
 
+       closure_init(&c->cl, NULL);
+
+       c->kobj.kset = bcachefs_kset;
+       kobject_init(&c->kobj, &bch2_fs_ktype);
+       kobject_init(&c->internal, &bch2_fs_internal_ktype);
+       kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
+       kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
+
        c->minor                = -1;
        c->disk_sb.fs_sb        = true;
 
@@ -799,18 +822,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                    bch2_dev_alloc(c, i))
                        goto err;
 
-       /*
-        * Now that all allocations have succeeded, init various refcounty
-        * things that let us shutdown:
-        */
-       closure_init(&c->cl, NULL);
-
-       c->kobj.kset = bcachefs_kset;
-       kobject_init(&c->kobj, &bch2_fs_ktype);
-       kobject_init(&c->internal, &bch2_fs_internal_ktype);
-       kobject_init(&c->opts_dir, &bch2_fs_opts_dir_ktype);
-       kobject_init(&c->time_stats, &bch2_fs_time_stats_ktype);
-
        mutex_lock(&bch_fs_list_lock);
        err = bch2_fs_online(c);
        mutex_unlock(&bch_fs_list_lock);
@@ -906,6 +917,13 @@ int bch2_fs_start(struct bch_fs *c)
 
        set_bit(BCH_FS_STARTED, &c->flags);
 
+       /*
+        * Allocator threads don't start filling copygc reserve until after we
+        * set BCH_FS_STARTED - wake them now:
+        */
+       for_each_online_member(ca, c, i)
+               bch2_wake_allocator(ca);
+
        if (c->opts.read_only || c->opts.nochanges) {
                bch2_fs_read_only(c);
        } else {
@@ -1826,7 +1844,6 @@ err:
 /* return with ref on ca->ref: */
 struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *path)
 {
-
        struct block_device *bdev = lookup_bdev(path);
        struct bch_dev *ca;
        unsigned i;
@@ -1851,6 +1868,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
 {
        struct bch_sb_handle *sb = NULL;
        struct bch_fs *c = NULL;
+       struct bch_sb_field_members *mi;
        unsigned i, best_sb = 0;
        const char *err;
        int ret = -ENOMEM;
@@ -1886,10 +1904,24 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
                    le64_to_cpu(sb[best_sb].sb->seq))
                        best_sb = i;
 
-       for (i = 0; i < nr_devices; i++) {
+       mi = bch2_sb_get_members(sb[best_sb].sb);
+
+       i = 0;
+       while (i < nr_devices) {
+               if (i != best_sb &&
+                   !bch2_dev_exists(sb[best_sb].sb, mi, sb[i].sb->dev_idx)) {
+                       char buf[BDEVNAME_SIZE];
+                       pr_info("%s has been removed, skipping",
+                               bdevname(sb[i].bdev, buf));
+                       bch2_free_super(&sb[i]);
+                       array_remove_item(sb, nr_devices, i);
+                       continue;
+               }
+
                err = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
                if (err)
                        goto err_print;
+               i++;
        }
 
        ret = -ENOMEM;
index fffee96726ce4a34ab8cb8556604392efe9bec2a..048ffec622af86e45e232a6aa28c7c9bf6a4e57b 100644 (file)
@@ -231,6 +231,8 @@ static inline void bch2_fs_lazy_rw(struct bch_fs *c)
                bch2_fs_read_write_early(c);
 }
 
+void __bch2_fs_stop(struct bch_fs *);
+void bch2_fs_free(struct bch_fs *);
 void bch2_fs_stop(struct bch_fs *);
 
 int bch2_fs_start(struct bch_fs *);