]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 09a5465430 bcachefs: Don't need to walk inodes on clean...
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 10 Feb 2019 00:54:14 +0000 (19:54 -0500)
committerKent Overstreet <kent.overstreet@gmail.com>
Sun, 10 Feb 2019 00:54:14 +0000 (19:54 -0500)
23 files changed:
.bcachefs_revision
libbcachefs/alloc_background.c
libbcachefs/bcachefs.h
libbcachefs/bcachefs_format.h
libbcachefs/btree_gc.c
libbcachefs/btree_gc.h
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/fsck.c
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/journal_io.c
libbcachefs/journal_types.h
libbcachefs/recovery.c
libbcachefs/replicas.c
libbcachefs/replicas.h
libbcachefs/super-io.c
libbcachefs/super-io.h
libbcachefs/super.c
libbcachefs/sysfs.c
libbcachefs/util.h

index 8eca05930b099268f419ee266d226d8ce7c08777..39d11479be4ef89a31478bf8f534a4355e3a8415 100644 (file)
@@ -1 +1 @@
-99750eab4d583132cf61f071082c7cf21f5295c0
+09a546543006b60d44c4c51e7b40cd3ec7837a5e
index 2552d45799ca08ce2c96ecc4ab00288c6a169140..ce42202fdd14289dd9e73cd2f02e1520118a36a2 100644 (file)
@@ -1256,7 +1256,8 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
 
 void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
 {
-       closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
+       if (ca->alloc_thread)
+               closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
 }
 
 /* stop allocator thread: */
@@ -1534,6 +1535,8 @@ int bch2_fs_allocator_start(struct bch_fs *c)
                }
        }
 
+       set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
+
        return bch2_alloc_write(c, false, &wrote);
 }
 
index f42b2f9052c636286d435f7a96957e8405ecfec8..245d832218d216f91a56613877a8dd6017b0ceb2 100644 (file)
@@ -474,6 +474,7 @@ enum {
        /* startup: */
        BCH_FS_ALLOC_READ_DONE,
        BCH_FS_ALLOCATOR_STARTED,
+       BCH_FS_ALLOCATOR_RUNNING,
        BCH_FS_INITIAL_GC_DONE,
        BCH_FS_FSCK_DONE,
        BCH_FS_STARTED,
@@ -541,6 +542,8 @@ struct bch_fs {
        struct bch_replicas_cpu replicas_gc;
        struct mutex            replicas_gc_lock;
 
+       struct journal_entry_res replicas_journal_res;
+
        struct bch_disk_groups_cpu __rcu *disk_groups;
 
        struct bch_opts         opts;
@@ -562,6 +565,7 @@ struct bch_fs {
                u32             time_base_hi;
                u32             time_precision;
                u64             features;
+               u64             compat;
        }                       sb;
 
        struct bch_sb_handle    disk_sb;
index 9245465da1fd08bee32f774c8343bb9196356078..d020cf74e9e975f8ee88f04652e011aeb1df7c38 100644 (file)
@@ -1274,6 +1274,10 @@ enum bch_sb_features {
        BCH_FEATURE_NR,
 };
 
+enum bch_sb_compat {
+       BCH_COMPAT_FEAT_ALLOC_INFO      = 0,
+};
+
 /* options: */
 
 #define BCH_REPLICAS_MAX               4U
@@ -1354,7 +1358,9 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
        x(btree_root,           1)              \
        x(prio_ptrs,            2)              \
        x(blacklist,            3)              \
-       x(blacklist_v2,         4)
+       x(blacklist_v2,         4)              \
+       x(usage,                5)              \
+       x(data_usage,           6)
 
 enum {
 #define x(f, nr)       BCH_JSET_ENTRY_##f      = nr,
@@ -1384,6 +1390,24 @@ struct jset_entry_blacklist_v2 {
        __le64                  end;
 };
 
+enum {
+       FS_USAGE_RESERVED               = 0,
+       FS_USAGE_INODES                 = 1,
+       FS_USAGE_KEY_VERSION            = 2,
+       FS_USAGE_NR                     = 3
+};
+
+struct jset_entry_usage {
+       struct jset_entry       entry;
+       __le64                  v;
+} __attribute__((packed));
+
+struct jset_entry_data_usage {
+       struct jset_entry       entry;
+       __le64                  v;
+       struct bch_replicas_entry r;
+} __attribute__((packed));
+
 /*
  * On disk format for a journal entry:
  * seq is monotonically increasing; every journal entry has its own unique
index 433e8f22fd1904607dd168e515c2c866e3d4bf63..b1f5e8b1071e5e2f65e98f270c49d16573b56979 100644 (file)
@@ -573,7 +573,8 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
 
        percpu_down_write(&c->mark_lock);
 
-       if (initial) {
+       if (initial &&
+           !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) {
                bch2_gc_done_nocheck(c);
                goto out;
        }
@@ -815,9 +816,6 @@ out:
        bch2_gc_free(c);
        up_write(&c->gc_lock);
 
-       if (!ret && initial)
-               set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
-
        trace_gc_end(c);
        bch2_time_stats_update(&c->times[BCH_TIME_btree_gc], start_time);
 
@@ -1245,19 +1243,3 @@ int bch2_gc_thread_start(struct bch_fs *c)
        wake_up_process(p);
        return 0;
 }
-
-/* Initial GC computes bucket marks during startup */
-
-int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
-{
-       int ret = bch2_gc(c, journal, true);
-
-       /*
-        * Skip past versions that might have possibly been used (as nonces),
-        * but hadn't had their pointers written:
-        */
-       if (c->sb.encryption_type)
-               atomic64_add(1 << 16, &c->key_version);
-
-       return ret;
-}
index 1905acfa028a535f82dbdc43e0e408323bacf96d..df51eb83ab49bb4d5cda65ad8ac994e12b252011 100644 (file)
@@ -7,7 +7,6 @@ void bch2_coalesce(struct bch_fs *);
 int bch2_gc(struct bch_fs *, struct list_head *, bool);
 void bch2_gc_thread_stop(struct bch_fs *);
 int bch2_gc_thread_start(struct bch_fs *);
-int bch2_initial_gc(struct bch_fs *, struct list_head *);
 void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
 
 /*
@@ -109,7 +108,7 @@ static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
 
        do {
                seq = read_seqcount_begin(&c->gc_pos_lock);
-               ret = gc_pos_cmp(pos, c->gc_pos) < 0;
+               ret = gc_pos_cmp(pos, c->gc_pos) <= 0;
        } while (read_seqcount_retry(&c->gc_pos_lock, seq));
 
        return ret;
index dce4ed385e8e3fc8ecef2b3a8c25d45d0124fca8..18596dc8d7ba9b7f946566d8c935facbe29c8ac1 100644 (file)
@@ -474,6 +474,7 @@ struct btree_root {
        __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
        u8                      level;
        u8                      alive;
+       s8                      error;
 };
 
 /*
index 0f2fa6f72619d150b1b177586a9a9e1358d67141..33b5cf40a5f48377f2a5fe3a6357fc77cba02eba 100644 (file)
@@ -2117,7 +2117,6 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
        BUG_ON(btree_node_root(c, b));
 
        __bch2_btree_set_root_inmem(c, b);
-       bch2_btree_set_root_ondisk(c, b, READ);
 }
 
 void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
index ea71acb5f8cfc6cb4b0d74b34ec9557542a51605..9f4872a9be18d67e72d5dece13b8d6bc94383ed7 100644 (file)
@@ -113,6 +113,36 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c)
        }
 }
 
+void bch2_fs_usage_initialize(struct bch_fs *c)
+{
+       struct bch_fs_usage *usage;
+       unsigned i, nr;
+
+       percpu_down_write(&c->mark_lock);
+       nr = sizeof(struct bch_fs_usage) / sizeof(u64) + c->replicas.nr;
+       usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr);
+
+       for (i = 0; i < BCH_REPLICAS_MAX; i++)
+               usage->s.reserved += usage->persistent_reserved[i];
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+
+               switch (e->data_type) {
+               case BCH_DATA_BTREE:
+               case BCH_DATA_USER:
+                       usage->s.data   += usage->data[i];
+                       break;
+               case BCH_DATA_CACHED:
+                       usage->s.cached += usage->data[i];
+                       break;
+               }
+       }
+
+       percpu_up_write(&c->mark_lock);
+}
+
 #define bch2_usage_read_raw(_stats)                                    \
 ({                                                                     \
        typeof(*this_cpu_ptr(_stats)) _acc;                             \
@@ -814,7 +844,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
                ret = bch2_mark_stripe(c, k, inserting,
                                       fs_usage, journal_seq, flags, gc);
                break;
-       case KEY_TYPE_alloc:
+       case KEY_TYPE_inode:
                if (inserting)
                        fs_usage->s.nr_inodes++;
                else
@@ -994,10 +1024,7 @@ void bch2_mark_update(struct btree_insert *trans,
 
 static u64 bch2_recalc_sectors_available(struct bch_fs *c)
 {
-       int cpu;
-
-       for_each_possible_cpu(cpu)
-               per_cpu_ptr(c->pcpu, cpu)->sectors_available = 0;
+       percpu_u64_set(&c->pcpu->sectors_available, 0);
 
        return avail_factor(bch2_fs_sectors_free(c));
 }
index 6f3681728f0a3029016f9209dc64d2e40d199d99..19cf652570a2d093dbc82c9b2728442a71fc51b4 100644 (file)
@@ -247,6 +247,7 @@ static inline u64 bch2_fs_sectors_free(struct bch_fs *c)
 /* key/bucket marking: */
 
 void bch2_bucket_seq_cleanup(struct bch_fs *);
+void bch2_fs_usage_initialize(struct bch_fs *);
 
 void bch2_invalidate_bucket(struct bch_fs *, struct bch_dev *,
                            size_t, struct bucket_mark *);
index 955ab8bec904373780b8ed2491c80e9fdd28b7fa..42bd2f7a271f62274cd2504b46099f99d5162a90 100644 (file)
@@ -1186,6 +1186,11 @@ static int check_inode(struct bch_fs *c,
        }
 
        if (u.bi_flags & BCH_INODE_UNLINKED) {
+               fsck_err_on(c->sb.clean, c,
+                           "filesystem marked clean, "
+                           "but inode %llu unlinked",
+                           u.bi_inum);
+
                bch_verbose(c, "deleting inode %llu", u.bi_inum);
 
                ret = bch2_inode_rm(c, u.bi_inum);
@@ -1388,16 +1393,13 @@ static int check_inodes_fast(struct bch_fs *c)
                    (BCH_INODE_I_SIZE_DIRTY|
                     BCH_INODE_I_SECTORS_DIRTY|
                     BCH_INODE_UNLINKED)) {
-                       fsck_err_on(c->sb.clean, c,
-                               "filesystem marked clean but found inode %llu with flags %x",
-                               inode.k->p.inode, inode.v->bi_flags);
                        ret = check_inode(c, NULL, &iter, inode, NULL);
                        BUG_ON(ret == -EINTR);
                        if (ret)
                                break;
                }
        }
-fsck_err:
+
        return bch2_btree_iter_unlock(&iter) ?: ret;
 }
 
@@ -1459,9 +1461,10 @@ int bch2_fsck(struct bch_fs *c)
        if (c->opts.fsck)
                return bch2_fsck_full(c);
 
-       if (!c->sb.clean &&
-           !(c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK)))
-               return bch2_fsck_inode_nlink(c);
+       if (c->sb.clean)
+               return 0;
 
-       return bch2_fsck_walk_inodes_only(c);
+       return c->sb.features & (1 << BCH_FEATURE_ATOMIC_NLINK)
+               ? bch2_fsck_walk_inodes_only(c)
+               : bch2_fsck_inode_nlink(c);
 }
index 261149adf16271a6c191d9681159adf9c654d969..8ff8cfa8bf7628b6d97dda7ee1367a67a8f68791 100644 (file)
@@ -64,11 +64,6 @@ static void bch2_journal_buf_init(struct journal *j)
        buf->data->u64s = 0;
 }
 
-static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf)
-{
-       return BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
-}
-
 static inline bool journal_entry_empty(struct jset *j)
 {
        struct jset_entry *i;
@@ -130,7 +125,7 @@ static enum {
 
        j->prev_buf_sectors =
                vstruct_blocks_plus(buf->data, c->block_bits,
-                                   journal_entry_u64s_reserve(buf)) *
+                                   buf->u64s_reserved) *
                c->opts.block_size;
        BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
 
@@ -225,6 +220,7 @@ static int journal_entry_open(struct journal *j)
                return sectors;
 
        buf->disk_sectors       = sectors;
+       buf->u64s_reserved      = j->entry_u64s_reserved;
 
        sectors = min_t(unsigned, sectors, buf->size >> 9);
        j->cur_buf_sectors      = sectors;
@@ -233,11 +229,7 @@ static int journal_entry_open(struct journal *j)
 
        /* Subtract the journal header */
        u64s -= sizeof(struct jset) / sizeof(u64);
-       /*
-        * Btree roots, prio pointers don't get added until right before we do
-        * the write:
-        */
-       u64s -= journal_entry_u64s_reserve(buf);
+       u64s -= buf->u64s_reserved;
        u64s  = max_t(ssize_t, 0L, u64s);
 
        BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL);
@@ -436,6 +428,45 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
        return ret;
 }
 
+/* journal_entry_res: */
+
+void bch2_journal_entry_res_resize(struct journal *j,
+                                  struct journal_entry_res *res,
+                                  unsigned new_u64s)
+{
+       union journal_res_state state;
+       int d = new_u64s - res->u64s;
+
+       spin_lock(&j->lock);
+
+       j->entry_u64s_reserved += d;
+       if (d <= 0)
+               goto out_unlock;
+
+       j->cur_entry_u64s -= d;
+       smp_mb();
+       state = READ_ONCE(j->reservations);
+
+       if (state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL &&
+           state.cur_entry_offset > j->cur_entry_u64s) {
+               j->cur_entry_u64s += d;
+               /*
+                * Not enough room in current journal entry, have to flush it:
+                */
+               __journal_entry_close(j);
+               goto out;
+       }
+
+       journal_cur_buf(j)->u64s_reserved += d;
+out_unlock:
+       spin_unlock(&j->lock);
+out:
+       res->u64s += d;
+       return;
+}
+
+/* journal flushing: */
+
 u64 bch2_journal_last_unwritten_seq(struct journal *j)
 {
        u64 seq;
@@ -1020,6 +1051,10 @@ int bch2_fs_journal_init(struct journal *j)
        j->write_delay_ms       = 1000;
        j->reclaim_delay_ms     = 100;
 
+       /* Btree roots: */
+       j->entry_u64s_reserved +=
+               BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
+
        atomic64_set(&j->reservations.counter,
                ((union journal_res_state)
                 { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
index 3a083748110058fe775c3336470fd54d3c1e8beb..50d864a3cae3566bcb31da609ccd48fe6043db6c 100644 (file)
@@ -330,6 +330,10 @@ out:
        return 0;
 }
 
+void bch2_journal_entry_res_resize(struct journal *,
+                                  struct journal_entry_res *,
+                                  unsigned);
+
 u64 bch2_journal_last_unwritten_seq(struct journal *);
 int bch2_journal_open_seq_async(struct journal *, u64, struct closure *);
 
index 5cc0651c7449f761750a553ccd70d87b2e92da3c..0f1f8e1507c40b3515f98698de458042d1a976aa 100644 (file)
@@ -284,6 +284,7 @@ static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
        if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, c,
                "invalid journal seq blacklist entry: bad size")) {
                journal_entry_null_range(entry, vstruct_next(entry));
+               goto out;
        }
 
        bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
@@ -293,6 +294,49 @@ static int journal_entry_validate_blacklist_v2(struct bch_fs *c,
                "invalid journal seq blacklist entry: start > end")) {
                journal_entry_null_range(entry, vstruct_next(entry));
        }
+out:
+fsck_err:
+       return ret;
+}
+
+static int journal_entry_validate_usage(struct bch_fs *c,
+                                       struct jset *jset,
+                                       struct jset_entry *entry,
+                                       int write)
+{
+       struct jset_entry_usage *u =
+               container_of(entry, struct jset_entry_usage, entry);
+       unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+       int ret = 0;
+
+       if (journal_entry_err_on(bytes < sizeof(*u),
+                                c,
+                                "invalid journal entry usage: bad size")) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+               return ret;
+       }
+
+fsck_err:
+       return ret;
+}
+
+static int journal_entry_validate_data_usage(struct bch_fs *c,
+                                       struct jset *jset,
+                                       struct jset_entry *entry,
+                                       int write)
+{
+       struct jset_entry_data_usage *u =
+               container_of(entry, struct jset_entry_data_usage, entry);
+       unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64);
+       int ret = 0;
+
+       if (journal_entry_err_on(bytes < sizeof(*u) ||
+                                bytes < sizeof(*u) + u->r.nr_devs,
+                                c,
+                                "invalid journal entry usage: bad size")) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+               return ret;
+       }
 
 fsck_err:
        return ret;
@@ -315,18 +359,10 @@ static const struct jset_entry_ops bch2_jset_entry_ops[] = {
 static int journal_entry_validate(struct bch_fs *c, struct jset *jset,
                                  struct jset_entry *entry, int write)
 {
-       int ret = 0;
-
-       if (entry->type >= BCH_JSET_ENTRY_NR) {
-               journal_entry_err(c, "invalid journal entry type %u",
-                                 entry->type);
-               journal_entry_null_range(entry, vstruct_next(entry));
-               return 0;
-       }
-
-       ret = bch2_jset_entry_ops[entry->type].validate(c, jset, entry, write);
-fsck_err:
-       return ret;
+       return entry->type < BCH_JSET_ENTRY_NR
+               ? bch2_jset_entry_ops[entry->type].validate(c, jset,
+                                                           entry, write)
+               : 0;
 }
 
 static int jset_validate_entries(struct bch_fs *c, struct jset *jset,
@@ -848,19 +884,6 @@ err:
 
 /* journal write: */
 
-static void bch2_journal_add_btree_root(struct journal_buf *buf,
-                                      enum btree_id id, struct bkey_i *k,
-                                      unsigned level)
-{
-       struct jset_entry *entry;
-
-       entry = bch2_journal_add_entry_noreservation(buf, k->k.u64s);
-       entry->type     = BCH_JSET_ENTRY_btree_root;
-       entry->btree_id = id;
-       entry->level    = level;
-       memcpy_u64s(entry->_data, k, k->k.u64s);
-}
-
 static unsigned journal_dev_buckets_available(struct journal *j,
                                              struct journal_device *ja)
 {
@@ -1191,25 +1214,26 @@ void bch2_journal_write(struct closure *cl)
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        struct journal_buf *w = journal_prev_buf(j);
+       struct jset_entry *start, *end;
        struct jset *jset;
        struct bio *bio;
        struct bch_extent_ptr *ptr;
        bool validate_before_checksum = false;
-       unsigned i, sectors, bytes;
+       unsigned i, sectors, bytes, u64s;
 
        journal_buf_realloc(j, w);
        jset = w->data;
 
        j->write_start_time = local_clock();
-       mutex_lock(&c->btree_root_lock);
-       for (i = 0; i < BTREE_ID_NR; i++) {
-               struct btree_root *r = &c->btree_roots[i];
 
-               if (r->alive)
-                       bch2_journal_add_btree_root(w, i, &r->key, r->level);
-       }
-       c->btree_roots_dirty = false;
-       mutex_unlock(&c->btree_root_lock);
+       start   = vstruct_last(w->data);
+       end     = bch2_journal_super_entries_add_common(c, start);
+       u64s    = (u64 *) end - (u64 *) start;
+       BUG_ON(u64s > j->entry_u64s_reserved);
+
+       le32_add_cpu(&w->data->u64s, u64s);
+       BUG_ON(vstruct_sectors(jset, c->block_bits) >
+              w->disk_sectors);
 
        journal_write_compact(jset);
 
index a593368921c6f3f9ef69191070c440c94dd8a6aa..a91662f6a61badc865336adf7bad48b665fd76ae 100644 (file)
@@ -23,6 +23,7 @@ struct journal_buf {
 
        unsigned                size;
        unsigned                disk_sectors;
+       unsigned                u64s_reserved;
        /* bloom filter: */
        unsigned long           has_inode[1024 / sizeof(unsigned long)];
 };
@@ -154,6 +155,9 @@ struct journal {
        u64                     seq_ondisk;
        u64                     last_seq_ondisk;
 
+       /* Reserved space in journal entry to be used just prior to write */
+       unsigned                entry_u64s_reserved;
+
        /*
         * FIFO of journal entries whose btree updates have not yet been
         * written out.
@@ -242,4 +246,11 @@ struct journal_device {
        struct closure          read;
 };
 
+/*
+ * journal_entry_res - reserve space in every journal entry:
+ */
+struct journal_entry_res {
+       unsigned                u64s;
+};
+
 #endif /* _BCACHEFS_JOURNAL_TYPES_H */
index f5f3f94ea44af8d32d135b4b8280070d1545a8e8..7e50547cc51f6b0b31ced5c9bf6142eebb291440 100644 (file)
@@ -5,6 +5,7 @@
 #include "btree_update.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
+#include "buckets.h"
 #include "dirent.h"
 #include "ec.h"
 #include "error.h"
 #include "journal_io.h"
 #include "quota.h"
 #include "recovery.h"
+#include "replicas.h"
 #include "super-io.h"
 
 #include <linux/stat.h>
 
 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
 
-struct bkey_i *btree_root_find(struct bch_fs *c,
-                              struct bch_sb_field_clean *clean,
-                              struct jset *j,
-                              enum btree_id id, unsigned *level)
+static struct bkey_i *btree_root_find(struct bch_fs *c,
+                                     struct bch_sb_field_clean *clean,
+                                     struct jset *j,
+                                     enum btree_id id, unsigned *level)
 {
        struct bkey_i *k;
        struct jset_entry *entry, *start, *end;
@@ -49,6 +51,60 @@ found:
        return k;
 }
 
+static int journal_replay_entry_early(struct bch_fs *c,
+                                     struct jset_entry *entry)
+{
+       int ret = 0;
+
+       switch (entry->type) {
+       case BCH_JSET_ENTRY_btree_root: {
+               struct btree_root *r = &c->btree_roots[entry->btree_id];
+
+               if (entry->u64s) {
+                       r->level = entry->level;
+                       bkey_copy(&r->key, &entry->start[0]);
+                       r->error = 0;
+               } else {
+                       r->error = -EIO;
+               }
+               r->alive = true;
+               break;
+       }
+       case BCH_JSET_ENTRY_usage: {
+               struct jset_entry_usage *u =
+                       container_of(entry, struct jset_entry_usage, entry);
+
+               switch (entry->btree_id) {
+               case FS_USAGE_RESERVED:
+                       if (entry->level < BCH_REPLICAS_MAX)
+                               percpu_u64_set(&c->usage[0]->
+                                              persistent_reserved[entry->level],
+                                              le64_to_cpu(u->v));
+                       break;
+               case FS_USAGE_INODES:
+                       percpu_u64_set(&c->usage[0]->s.nr_inodes,
+                                      le64_to_cpu(u->v));
+                       break;
+               case FS_USAGE_KEY_VERSION:
+                       atomic64_set(&c->key_version,
+                                    le64_to_cpu(u->v));
+                       break;
+               }
+
+               break;
+       }
+       case BCH_JSET_ENTRY_data_usage: {
+               struct jset_entry_data_usage *u =
+                       container_of(entry, struct jset_entry_data_usage, entry);
+               ret = bch2_replicas_set_usage(c, &u->r,
+                                             le64_to_cpu(u->v));
+               break;
+       }
+       }
+
+       return ret;
+}
+
 static int verify_superblock_clean(struct bch_fs *c,
                                   struct bch_sb_field_clean *clean,
                                   struct jset *j)
@@ -107,7 +163,9 @@ static bool journal_empty(struct list_head *journal)
 
        list_for_each_entry(i, journal, list) {
                vstruct_for_each(&i->j, entry) {
-                       if (entry->type == BCH_JSET_ENTRY_btree_root)
+                       if (entry->type == BCH_JSET_ENTRY_btree_root ||
+                           entry->type == BCH_JSET_ENTRY_usage ||
+                           entry->type == BCH_JSET_ENTRY_data_usage)
                                continue;
 
                        if (entry->type == BCH_JSET_ENTRY_btree_keys &&
@@ -124,6 +182,7 @@ int bch2_fs_recovery(struct bch_fs *c)
 {
        const char *err = "cannot allocate memory";
        struct bch_sb_field_clean *clean = NULL, *sb_clean = NULL;
+       struct jset_entry *entry;
        LIST_HEAD(journal);
        struct jset *j = NULL;
        unsigned i;
@@ -176,28 +235,46 @@ int bch2_fs_recovery(struct bch_fs *c)
        fsck_err_on(clean && !journal_empty(&journal), c,
                    "filesystem marked clean but journal not empty");
 
+       err = "insufficient memory";
        if (clean) {
                c->bucket_clock[READ].hand = le16_to_cpu(clean->read_clock);
                c->bucket_clock[WRITE].hand = le16_to_cpu(clean->write_clock);
+
+               for (entry = clean->start;
+                    entry != vstruct_end(&clean->field);
+                    entry = vstruct_next(entry)) {
+                       ret = journal_replay_entry_early(c, entry);
+                       if (ret)
+                               goto err;
+               }
        } else {
+               struct journal_replay *i;
+
                c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock);
                c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock);
+
+               list_for_each_entry(i, &journal, list)
+                       vstruct_for_each(&i->j, entry) {
+                               ret = journal_replay_entry_early(c, entry);
+                               if (ret)
+                                       goto err;
+                       }
        }
 
+       bch2_fs_usage_initialize(c);
+
        for (i = 0; i < BTREE_ID_NR; i++) {
-               unsigned level;
-               struct bkey_i *k;
+               struct btree_root *r = &c->btree_roots[i];
 
-               k = btree_root_find(c, clean, j, i, &level);
-               if (!k)
+               if (!r->alive)
                        continue;
 
                err = "invalid btree root pointer";
-               if (IS_ERR(k))
+               if (r->error)
                        goto err;
 
                err = "error reading btree root";
-               if (bch2_btree_root_read(c, i, k, level)) {
+               if (bch2_btree_root_read(c, i, &r->key, r->level)) {
                        if (i != BTREE_ID_ALLOC)
                                goto err;
 
@@ -214,21 +291,33 @@ int bch2_fs_recovery(struct bch_fs *c)
        if (ret)
                goto err;
 
+       bch_verbose(c, "starting stripes_read");
        ret = bch2_stripes_read(c, &journal);
        if (ret)
                goto err;
-       pr_info("stripes_read done");
+       bch_verbose(c, "stripes_read done");
 
        set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
 
-       bch_verbose(c, "starting mark and sweep:");
-       err = "error in recovery";
-       ret = bch2_initial_gc(c, &journal);
-       if (ret)
-               goto err;
-       bch_verbose(c, "mark and sweep done");
+       if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
+           c->opts.fsck) {
+               bch_verbose(c, "starting mark and sweep:");
+               err = "error in recovery";
+               ret = bch2_gc(c, &journal, true);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "mark and sweep done");
+       }
 
        clear_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
+       set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+
+       /*
+        * Skip past versions that might have possibly been used (as nonces),
+        * but hadn't had their pointers written:
+        */
+       if (c->sb.encryption_type && !c->sb.clean)
+               atomic64_add(1 << 16, &c->key_version);
 
        if (c->opts.noreplay)
                goto out;
@@ -311,15 +400,22 @@ int bch2_fs_initialize(struct bch_fs *c)
 
        bch_notice(c, "initializing new filesystem");
 
+       mutex_lock(&c->sb_lock);
+       for_each_online_member(ca, c, i)
+               bch2_mark_dev_superblock(c, ca, 0);
+       mutex_unlock(&c->sb_lock);
+
        set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
 
        for (i = 0; i < BTREE_ID_NR; i++)
                bch2_btree_root_alloc(c, i);
 
-       ret = bch2_initial_gc(c, &journal);
+       ret = bch2_gc(c, &journal, true);
        if (ret)
                goto err;
 
+       set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+
        err = "unable to allocate journal buckets";
        for_each_online_member(ca, c, i)
                if (bch2_dev_journal_alloc(ca)) {
index 230f807bdf107cd8ea84843bc2639d15cc6d7b42..4d0c9718c109360af32b318ef42ca81a87b933ef 100644 (file)
@@ -1,5 +1,6 @@
 
 #include "bcachefs.h"
+#include "journal.h"
 #include "replicas.h"
 #include "super-io.h"
 
@@ -28,11 +29,6 @@ static void replicas_entry_sort(struct bch_replicas_entry *e)
        bubble_sort(e->devs, e->nr_devs, u8_cmp);
 }
 
-#define for_each_cpu_replicas_entry(_r, _i)                            \
-       for (_i = (_r)->entries;                                        \
-            (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
-            _i = (void *) (_i) + (_r)->entry_size)
-
 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
 {
        eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
@@ -301,6 +297,32 @@ err:
        return ret;
 }
 
+static unsigned reserve_journal_replicas(struct bch_fs *c,
+                                    struct bch_replicas_cpu *r)
+{
+       struct bch_replicas_entry *e;
+       unsigned journal_res_u64s = 0;
+
+       /* nr_inodes: */
+       journal_res_u64s +=
+               DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
+
+       /* key_version: */
+       journal_res_u64s +=
+               DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
+
+       /* persistent_reserved: */
+       journal_res_u64s +=
+               DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
+               BCH_REPLICAS_MAX;
+
+       for_each_cpu_replicas_entry(r, e)
+               journal_res_u64s +=
+                       DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
+                                    e->nr_devs, sizeof(u64));
+       return journal_res_u64s;
+}
+
 noinline
 static int bch2_mark_replicas_slowpath(struct bch_fs *c,
                                struct bch_replicas_entry *new_entry)
@@ -328,6 +350,10 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
                ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
                if (ret)
                        goto err;
+
+               bch2_journal_entry_res_resize(&c->journal,
+                               &c->replicas_journal_res,
+                               reserve_journal_replicas(c, &new_r));
        }
 
        if (!new_r.entries &&
@@ -425,14 +451,12 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
                struct bch_replicas_entry *e =
                        cpu_replicas_entry(&c->replicas, i);
                struct bch_replicas_cpu n;
-               u64 v = 0;
-               int cpu;
+               u64 v;
 
                if (__replicas_has_entry(&c->replicas_gc, e))
                        continue;
 
-               for_each_possible_cpu(cpu)
-                       v += *per_cpu_ptr(&c->usage[0]->data[i], cpu);
+               v = percpu_u64_get(&c->usage[0]->data[i]);
                if (!v)
                        continue;
 
@@ -510,6 +534,34 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
        return 0;
 }
 
+int bch2_replicas_set_usage(struct bch_fs *c,
+                           struct bch_replicas_entry *r,
+                           u64 sectors)
+{
+       int ret, idx = bch2_replicas_entry_idx(c, r);
+
+       if (idx < 0) {
+               struct bch_replicas_cpu n;
+
+               n = cpu_replicas_add_entry(&c->replicas, r);
+               if (!n.entries)
+                       return -ENOMEM;
+
+               ret = replicas_table_update(c, &n);
+               if (ret)
+                       return ret;
+
+               kfree(n.entries);
+
+               idx = bch2_replicas_entry_idx(c, r);
+               BUG_ON(ret < 0);
+       }
+
+       percpu_u64_set(&c->usage[0]->data[idx], sectors);
+
+       return 0;
+}
+
 /* Replicas tracking - superblock: */
 
 static int
@@ -596,6 +648,7 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
        bch2_cpu_replicas_sort(&new_r);
 
        percpu_down_write(&c->mark_lock);
+
        ret = replicas_table_update(c, &new_r);
        percpu_up_write(&c->mark_lock);
 
@@ -916,3 +969,10 @@ unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
 
        return ret;
 }
+
+int bch2_fs_replicas_init(struct bch_fs *c)
+{
+       c->journal.entry_u64s_reserved +=
+               reserve_journal_replicas(c, &c->replicas);
+       return 0;
+}
index 0ac2b8e082cdda0be8991c74be8d79f0ecca09ec..1607b7bdfd50df52ff4016b676b43543480750c6 100644 (file)
@@ -56,6 +56,15 @@ unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
 int bch2_replicas_gc_end(struct bch_fs *, int);
 int bch2_replicas_gc_start(struct bch_fs *, unsigned);
 
+int bch2_replicas_set_usage(struct bch_fs *,
+                           struct bch_replicas_entry *,
+                           u64);
+
+#define for_each_cpu_replicas_entry(_r, _i)                            \
+       for (_i = (_r)->entries;                                        \
+            (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
+            _i = (void *) (_i) + (_r)->entry_size)
+
 /* iterate over superblock replicas - used by userspace tools: */
 
 #define replicas_entry_bytes(_i)                                       \
@@ -79,4 +88,6 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
 extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
 extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
 
+int bch2_fs_replicas_init(struct bch_fs *);
+
 #endif /* _BCACHEFS_REPLICAS_H */
index c5eaf155c66593b8714bd6367648808fc5dd6b15..b88750ff1bb77ad81ba226ffc402df79d4f7d004 100644 (file)
@@ -181,6 +181,7 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
                }
        }
 
+       f = bch2_sb_field_get(sb->sb, type);
        f = __bch2_sb_field_resize(sb, f, u64s);
        if (f)
                f->type = cpu_to_le32(type);
@@ -362,6 +363,7 @@ static void bch2_sb_update(struct bch_fs *c)
        c->sb.time_base_hi      = le32_to_cpu(src->time_base_hi);
        c->sb.time_precision    = le32_to_cpu(src->time_precision);
        c->sb.features          = le64_to_cpu(src->features[0]);
+       c->sb.compat            = le64_to_cpu(src->compat[0]);
 
        for_each_member_device(ca, c, i)
                ca->mi = bch2_mi_to_cpu(mi->members + i);
@@ -881,29 +883,132 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
                bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
 }
 
+static void bch2_fs_mark_dirty(struct bch_fs *c)
+{
+       mutex_lock(&c->sb_lock);
+       if (BCH_SB_CLEAN(c->disk_sb.sb) ||
+           (c->disk_sb.sb->compat[0] & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) {
+               SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+               c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+               bch2_write_super(c);
+       }
+       mutex_unlock(&c->sb_lock);
+}
+
+struct jset_entry *
+bch2_journal_super_entries_add_common(struct bch_fs *c,
+                                     struct jset_entry *entry)
+{
+       struct btree_root *r;
+       unsigned i;
+
+       mutex_lock(&c->btree_root_lock);
+
+       for (r = c->btree_roots;
+            r < c->btree_roots + BTREE_ID_NR;
+            r++)
+               if (r->alive) {
+                       entry->u64s     = r->key.u64s;
+                       entry->btree_id = r - c->btree_roots;
+                       entry->level    = r->level;
+                       entry->type     = BCH_JSET_ENTRY_btree_root;
+                       bkey_copy(&entry->start[0], &r->key);
+
+                       entry = vstruct_next(entry);
+               }
+       c->btree_roots_dirty = false;
+
+       mutex_unlock(&c->btree_root_lock);
+
+       percpu_down_read_preempt_disable(&c->mark_lock);
+
+       {
+               u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes);
+               struct jset_entry_usage *u =
+                       container_of(entry, struct jset_entry_usage, entry);
+
+               memset(u, 0, sizeof(*u));
+               u->entry.u64s   = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
+               u->entry.type   = BCH_JSET_ENTRY_usage;
+               u->entry.btree_id = FS_USAGE_INODES;
+               u->v            = cpu_to_le64(nr_inodes);
+
+               entry = vstruct_next(entry);
+       }
+
+       {
+               struct jset_entry_usage *u =
+                       container_of(entry, struct jset_entry_usage, entry);
+
+               memset(u, 0, sizeof(*u));
+               u->entry.u64s   = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
+               u->entry.type   = BCH_JSET_ENTRY_usage;
+               u->entry.btree_id = FS_USAGE_KEY_VERSION;
+               u->v            = cpu_to_le64(atomic64_read(&c->key_version));
+
+               entry = vstruct_next(entry);
+       }
+
+       for (i = 0; i < BCH_REPLICAS_MAX; i++) {
+               struct jset_entry_usage *u =
+                       container_of(entry, struct jset_entry_usage, entry);
+               u64 sectors = percpu_u64_get(&c->usage[0]->persistent_reserved[i]);
+
+               if (!sectors)
+                       continue;
+
+               memset(u, 0, sizeof(*u));
+               u->entry.u64s   = DIV_ROUND_UP(sizeof(*u), sizeof(u64)) - 1;
+               u->entry.type   = BCH_JSET_ENTRY_usage;
+               u->entry.btree_id = FS_USAGE_RESERVED;
+               u->entry.level  = i;
+               u->v            = sectors;
+
+               entry = vstruct_next(entry);
+       }
+
+       for (i = 0; i < c->replicas.nr; i++) {
+               struct bch_replicas_entry *e =
+                       cpu_replicas_entry(&c->replicas, i);
+               u64 sectors = percpu_u64_get(&c->usage[0]->data[i]);
+               struct jset_entry_data_usage *u =
+                       container_of(entry, struct jset_entry_data_usage, entry);
+
+               memset(u, 0, sizeof(*u));
+               u->entry.u64s   = DIV_ROUND_UP(sizeof(*u) + e->nr_devs,
+                                              sizeof(u64)) - 1;
+               u->entry.type   = BCH_JSET_ENTRY_data_usage;
+               u->v            = cpu_to_le64(sectors);
+               memcpy(&u->r, e, replicas_entry_bytes(e));
+
+               entry = vstruct_next(entry);
+       }
+
+       percpu_up_read_preempt_enable(&c->mark_lock);
+
+       return entry;
+}
+
 void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
 {
        struct bch_sb_field_clean *sb_clean;
-       unsigned u64s = sizeof(*sb_clean) / sizeof(u64);
        struct jset_entry *entry;
-       struct btree_root *r;
+       unsigned u64s;
+
+       if (!clean) {
+               bch2_fs_mark_dirty(c);
+               return;
+       }
 
        mutex_lock(&c->sb_lock);
-       if (clean == BCH_SB_CLEAN(c->disk_sb.sb))
+       if (BCH_SB_CLEAN(c->disk_sb.sb))
                goto out;
 
-       SET_BCH_SB_CLEAN(c->disk_sb.sb, clean);
+       SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
 
-       if (!clean)
-               goto write_super;
+       c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
 
-       mutex_lock(&c->btree_root_lock);
-
-       for (r = c->btree_roots;
-            r < c->btree_roots + BTREE_ID_NR;
-            r++)
-               if (r->alive)
-                       u64s += jset_u64s(r->key.u64s);
+       u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
 
        sb_clean = bch2_sb_resize_clean(&c->disk_sb, u64s);
        if (!sb_clean) {
@@ -917,30 +1022,16 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
        sb_clean->journal_seq   = journal_cur_seq(&c->journal) - 1;
 
        entry = sb_clean->start;
+       entry = bch2_journal_super_entries_add_common(c, entry);
+       BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
+
        memset(entry, 0,
               vstruct_end(&sb_clean->field) - (void *) entry);
 
-       for (r = c->btree_roots;
-            r < c->btree_roots + BTREE_ID_NR;
-            r++)
-               if (r->alive) {
-                       entry->u64s     = r->key.u64s;
-                       entry->btree_id = r - c->btree_roots;
-                       entry->level    = r->level;
-                       entry->type     = BCH_JSET_ENTRY_btree_root;
-                       bkey_copy(&entry->start[0], &r->key);
-                       entry = vstruct_next(entry);
-                       BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
-               }
-
-       BUG_ON(entry != vstruct_end(&sb_clean->field));
-
        if (le16_to_cpu(c->disk_sb.sb->version) <
            bcachefs_metadata_version_bkey_renumber)
                bch2_sb_clean_renumber(sb_clean, WRITE);
 
-       mutex_unlock(&c->btree_root_lock);
-write_super:
        bch2_write_super(c);
 out:
        mutex_unlock(&c->sb_lock);
index b493d628cef6fbd4d24a7a91f34c637a77b72b77..c48294c8253abe82e185525a33cdf7a43af41285 100644 (file)
@@ -134,6 +134,10 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
 
 /* BCH_SB_FIELD_clean: */
 
+struct jset_entry *
+bch2_journal_super_entries_add_common(struct bch_fs *,
+                                     struct jset_entry *);
+
 void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
 
 void bch2_fs_mark_clean(struct bch_fs *, bool);
index 1835b5355c67df3a9d9e16d92ad1cb531bbc9b26..a8eb161585c1f162cd951f5e9747d769a930310e 100644 (file)
@@ -222,6 +222,9 @@ static void __bch2_fs_read_only(struct bch_fs *c)
         */
        bch2_journal_flush_all_pins(&c->journal);
 
+       if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
+               goto allocator_not_running;
+
        do {
                ret = bch2_alloc_write(c, false, &wrote);
                if (ret) {
@@ -250,10 +253,12 @@ static void __bch2_fs_read_only(struct bch_fs *c)
                closure_wait_event(&c->btree_interior_update_wait,
                                   !bch2_btree_interior_updates_nr_pending(c));
        } while (wrote);
-
+allocator_not_running:
        for_each_member_device(ca, c, i)
                bch2_dev_allocator_stop(ca);
 
+       clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
+
        bch2_fs_journal_stop(&c->journal);
 
        /* XXX: mark super that alloc info is persistent */
@@ -380,6 +385,8 @@ const char *bch2_fs_read_write(struct bch_fs *c)
                        goto err;
                }
 
+       set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
+
        err = "error starting btree GC thread";
        if (bch2_gc_thread_start(c))
                goto err;
@@ -683,6 +690,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
            bch2_io_clock_init(&c->io_clock[READ]) ||
            bch2_io_clock_init(&c->io_clock[WRITE]) ||
            bch2_fs_journal_init(&c->journal) ||
+           bch2_fs_replicas_init(c) ||
            bch2_fs_btree_cache_init(c) ||
            bch2_fs_io_init(c) ||
            bch2_fs_encryption_init(c) ||
@@ -1101,9 +1109,12 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
        if (ret)
                return ret;
 
-       mutex_lock(&c->sb_lock);
-       bch2_mark_dev_superblock(ca->fs, ca, 0);
-       mutex_unlock(&c->sb_lock);
+       if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
+           !percpu_u64_get(&ca->usage[0]->buckets[BCH_DATA_SB])) {
+               mutex_lock(&c->sb_lock);
+               bch2_mark_dev_superblock(ca->fs, ca, 0);
+               mutex_unlock(&c->sb_lock);
+       }
 
        bch2_dev_sysfs_online(c, ca);
 
index 40384e7e5af8314ffa3cbb021ffa2555214214e2..7e3aebed2c18533dc54c257767124dc345413a2e 100644 (file)
@@ -132,6 +132,7 @@ do {                                                                        \
 write_attribute(trigger_journal_flush);
 write_attribute(trigger_btree_coalesce);
 write_attribute(trigger_gc);
+write_attribute(trigger_alloc_write);
 write_attribute(prune_cache);
 rw_attribute(btree_gc_periodic);
 
@@ -239,27 +240,29 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
        if (!fs_usage)
                return -ENOMEM;
 
-       pr_buf(&out, "capacity:\t\t%llu\n", c->capacity);
+       pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
+
+       pr_buf(&out, "hidden:\t\t\t\t%llu\n",
+              fs_usage->s.hidden);
+       pr_buf(&out, "data:\t\t\t\t%llu\n",
+              fs_usage->s.data);
+       pr_buf(&out, "cached:\t\t\t\t%llu\n",
+              fs_usage->s.cached);
+       pr_buf(&out, "reserved:\t\t\t%llu\n",
+              fs_usage->s.reserved);
+       pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
+              fs_usage->s.nr_inodes);
+       pr_buf(&out, "online reserved:\t\t%llu\n",
+              fs_usage->s.online_reserved);
 
        for (i = 0;
             i < ARRAY_SIZE(fs_usage->persistent_reserved);
             i++) {
                pr_buf(&out, "%u replicas:\n", i + 1);
-#if 0
-               for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
-                       pr_buf(&out, "\t%s:\t\t%llu\n",
-                              bch2_data_types[type],
-                              stats.replicas[replicas].data[type]);
-               pr_buf(&out, "\terasure coded:\t%llu\n",
-                      stats.replicas[replicas].ec_data);
-#endif
-               pr_buf(&out, "\treserved:\t%llu\n",
+               pr_buf(&out, "\treserved:\t\t%llu\n",
                       fs_usage->persistent_reserved[i]);
        }
 
-       pr_buf(&out, "online reserved:\t%llu\n",
-              fs_usage->s.online_reserved);
-
        for (i = 0; i < c->replicas.nr; i++) {
                struct bch_replicas_entry *e =
                        cpu_replicas_entry(&c->replicas, i);
@@ -492,6 +495,12 @@ STORE(__bch2_fs)
        if (attr == &sysfs_trigger_gc)
                bch2_gc(c, NULL, false);
 
+       if (attr == &sysfs_trigger_alloc_write) {
+               bool wrote;
+
+               bch2_alloc_write(c, false, &wrote);
+       }
+
        if (attr == &sysfs_prune_cache) {
                struct shrink_control sc;
 
@@ -584,6 +593,7 @@ struct attribute *bch2_fs_internal_files[] = {
        &sysfs_trigger_journal_flush,
        &sysfs_trigger_btree_coalesce,
        &sysfs_trigger_gc,
+       &sysfs_trigger_alloc_write,
        &sysfs_prune_cache,
 
        &sysfs_copy_gc_enabled,
@@ -882,20 +892,15 @@ static const char * const bch2_rw[] = {
 static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
 {
        struct printbuf out = _PBUF(buf, PAGE_SIZE);
-       int rw, i, cpu;
+       int rw, i;
 
        for (rw = 0; rw < 2; rw++) {
                pr_buf(&out, "%s:\n", bch2_rw[rw]);
 
-               for (i = 1; i < BCH_DATA_NR; i++) {
-                       u64 n = 0;
-
-                       for_each_possible_cpu(cpu)
-                               n += per_cpu_ptr(ca->io_done, cpu)->sectors[rw][i];
-
+               for (i = 1; i < BCH_DATA_NR; i++)
                        pr_buf(&out, "%-12s:%12llu\n",
-                              bch2_data_types[i], n << 9);
-               }
+                              bch2_data_types[i],
+                              percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
        }
 
        return out.pos - buf;
index fbfb2085801c94310c4e9767b776eb96904cbe35..7e1729a4b125a954c529b5735ce18e4c918ef997 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/llist.h>
 #include <linux/log2.h>
 #include <linux/percpu.h>
+#include <linux/preempt.h>
 #include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -698,6 +699,28 @@ do {                                                                       \
        }                                                               \
 } while (0)
 
+static inline u64 percpu_u64_get(u64 __percpu *src)
+{
+       u64 ret = 0;
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               ret += *per_cpu_ptr(src, cpu);
+       return ret;
+}
+
+static inline void percpu_u64_set(u64 __percpu *dst, u64 src)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               *per_cpu_ptr(dst, cpu) = 0;
+
+       preempt_disable();
+       *this_cpu_ptr(dst) = src;
+       preempt_enable();
+}
+
 static inline void acc_u64s(u64 *acc, const u64 *src, unsigned nr)
 {
        unsigned i;