]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to b9bd69421f73 bcachefs: x-macro-ify inode flags enum
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 2 Nov 2023 18:39:01 +0000 (14:39 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 2 Nov 2023 18:39:01 +0000 (14:39 -0400)
38 files changed:
.bcachefs_revision
include/linux/generic-radix-tree.h
libbcachefs/alloc_background.c
libbcachefs/alloc_background.h
libbcachefs/alloc_foreground.c
libbcachefs/bcachefs_format.h
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_locking.c
libbcachefs/btree_trans_commit.c
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_interior.h
libbcachefs/darray.h
libbcachefs/fs-common.c
libbcachefs/fs-ioctl.c
libbcachefs/fs-ioctl.h
libbcachefs/fs.c
libbcachefs/fsck.c
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/io_write.c
libbcachefs/move.c
libbcachefs/move.h
libbcachefs/movinggc.c
libbcachefs/rebalance.c
libbcachefs/recovery.c
libbcachefs/reflink.c
libbcachefs/sb-errors.c
libbcachefs/sb-members.c
libbcachefs/sb-members.h
libbcachefs/six.c
libbcachefs/six.h
libbcachefs/super-io.c
libbcachefs/super.c
libbcachefs/util.c
libbcachefs/util.h
linux/closure.c

index 0476cc0ead1a5ab034a149c45e1379be5e6832bd..f778625352ece04d84b70b8c74c42a5d0a3de909 100644 (file)
@@ -1 +1 @@
-6628827a87075d3f807c974045ed293ac1e8965b
+b9bd69421f7364ca4ff11c827fd0e171a8b826ea
index c74b7376990d53301bfbd55acb414dbb491d8184..8474131647388b954812515930c81b0c83315f3a 100644 (file)
@@ -191,8 +191,8 @@ void *__genradix_iter_peek_prev(struct genradix_iter *, struct __genradix *,
                                size_t, size_t);
 
 /**
- * genradix_iter_peek - get first entry at or below iterator's current
- *                     position
+ * genradix_iter_peek_prev - get first entry at or below iterator's current
+ *                          position
  * @_iter:     a genradix_iter
  * @_radix:    genradix being iterated over
  *
index c342ec3b0385ecd474fd7a93fe240cc6c90694ec..bcfae91667af10e98d94c96f590c66c6ddc269ad 100644 (file)
@@ -2085,6 +2085,17 @@ void bch2_recalc_capacity(struct bch_fs *c)
        closure_wake_up(&c->freelist_wait);
 }
 
+u64 bch2_min_rw_member_capacity(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned i;
+       u64 ret = U64_MAX;
+
+       for_each_rw_member(ca, c, i)
+               ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
+       return ret;
+}
+
 static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
 {
        struct open_bucket *ob;
index e1ce38ef052eb3d2eef5483d2d80b25bb16c1a9b..73faf99a222aac3b33035432666e4d9b272c6fe9 100644 (file)
@@ -249,6 +249,7 @@ int bch2_dev_freespace_init(struct bch_fs *, struct bch_dev *, u64, u64);
 int bch2_fs_freespace_init(struct bch_fs *);
 
 void bch2_recalc_capacity(struct bch_fs *);
+u64 bch2_min_rw_member_capacity(struct bch_fs *);
 
 void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
index 3bc4abd3d7d5725e821e37b43282a3d92de149db..b85c7765272f6e4ae5e8aceb5a4bbaa89c535912 100644 (file)
@@ -399,12 +399,23 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
                        struct bucket_alloc_state *s,
                        struct closure *cl)
 {
-       struct btree_iter iter;
-       struct bkey_s_c k;
+       struct btree_iter iter, citer;
+       struct bkey_s_c k, ck;
        struct open_bucket *ob = NULL;
-       u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
-       u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor));
+       u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+       u64 alloc_start = max(first_bucket, READ_ONCE(ca->alloc_cursor));
+       u64 alloc_cursor = alloc_start;
        int ret;
+
+       /*
+        * Scan with an uncached iterator to avoid polluting the key cache. An
+        * uncached iter will return a cached key if one exists, but if not
+        * there is no other underlying protection for the associated key cache
+        * slot. To avoid racing bucket allocations, look up the cached key slot
+        * of any likely allocation candidate before attempting to proceed with
+        * the allocation. This provides proper exclusion on the associated
+        * bucket.
+        */
 again:
        for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
                           BTREE_ITER_SLOTS, k, ret) {
@@ -419,25 +430,38 @@ again:
                        continue;
 
                a = bch2_alloc_to_v4(k, &a_convert);
-
                if (a->data_type != BCH_DATA_free)
                        continue;
 
+               /* now check the cached key to serialize concurrent allocs of the bucket */
+               ck = bch2_bkey_get_iter(trans, &citer, BTREE_ID_alloc, k.k->p, BTREE_ITER_CACHED);
+               ret = bkey_err(ck);
+               if (ret)
+                       break;
+
+               a = bch2_alloc_to_v4(ck, &a_convert);
+               if (a->data_type != BCH_DATA_free)
+                       goto next;
+
                s->buckets_seen++;
 
                ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
+next:
+               citer.path->preserve = false;
+               bch2_trans_iter_exit(trans, &citer);
                if (ob)
                        break;
        }
        bch2_trans_iter_exit(trans, &iter);
 
+       alloc_cursor = iter.pos.offset;
        ca->alloc_cursor = alloc_cursor;
 
        if (!ob && ret)
                ob = ERR_PTR(ret);
 
-       if (!ob && alloc_cursor > alloc_start) {
-               alloc_cursor = alloc_start;
+       if (!ob && alloc_start > first_bucket) {
+               alloc_cursor = alloc_start = first_bucket;
                goto again;
        }
 
index 29b000c6b7e1f49495e51b361ba0cdc66d81107e..7a1c244071f9254dfdf0c3eca51b79100955bd76 100644 (file)
@@ -824,34 +824,30 @@ enum inode_opt_id {
        Inode_opt_nr,
 };
 
-enum {
-       /*
-        * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
-        * flags)
-        */
-       __BCH_INODE_SYNC                = 0,
-       __BCH_INODE_IMMUTABLE           = 1,
-       __BCH_INODE_APPEND              = 2,
-       __BCH_INODE_NODUMP              = 3,
-       __BCH_INODE_NOATIME             = 4,
-
-       __BCH_INODE_I_SIZE_DIRTY        = 5, /* obsolete */
-       __BCH_INODE_I_SECTORS_DIRTY     = 6, /* obsolete */
-       __BCH_INODE_UNLINKED            = 7,
-       __BCH_INODE_BACKPTR_UNTRUSTED   = 8,
-
-       /* bits 20+ reserved for packed fields below: */
-};
-
-#define BCH_INODE_SYNC         (1 << __BCH_INODE_SYNC)
-#define BCH_INODE_IMMUTABLE    (1 << __BCH_INODE_IMMUTABLE)
-#define BCH_INODE_APPEND       (1 << __BCH_INODE_APPEND)
-#define BCH_INODE_NODUMP       (1 << __BCH_INODE_NODUMP)
-#define BCH_INODE_NOATIME      (1 << __BCH_INODE_NOATIME)
-#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY)
-#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
-#define BCH_INODE_UNLINKED     (1 << __BCH_INODE_UNLINKED)
-#define BCH_INODE_BACKPTR_UNTRUSTED (1 << __BCH_INODE_BACKPTR_UNTRUSTED)
+#define BCH_INODE_FLAGS()                      \
+       x(sync,                         0)      \
+       x(immutable,                    1)      \
+       x(append,                       2)      \
+       x(nodump,                       3)      \
+       x(noatime,                      4)      \
+       x(i_size_dirty,                 5)      \
+       x(i_sectors_dirty,              6)      \
+       x(unlinked,                     7)      \
+       x(backptr_untrusted,            8)
+
+/* bits 20+ reserved for packed fields below: */
+
+enum bch_inode_flags {
+#define x(t, n)        BCH_INODE_##t = 1U << n,
+       BCH_INODE_FLAGS()
+#undef x
+};
+
+enum __bch_inode_flags {
+#define x(t, n)        __BCH_INODE_##t = n,
+       BCH_INODE_FLAGS()
+#undef x
+};
 
 LE32_BITMASK(INODE_STR_HASH,   struct bch_inode, bi_flags, 20, 24);
 LE32_BITMASK(INODE_NR_FIELDS,  struct bch_inode, bi_flags, 24, 31);
@@ -1617,9 +1613,7 @@ struct journal_seq_blacklist_entry {
 
 struct bch_sb_field_journal_seq_blacklist {
        struct bch_sb_field     field;
-
-       struct journal_seq_blacklist_entry start[0];
-       __u64                   _data[];
+       struct journal_seq_blacklist_entry start[];
 };
 
 struct bch_sb_field_errors {
index feba9a315c8ce0657de9cd30322aa47f3f420af4..ba392eb02a57b53493cf6e91fab14b513b51083f 100644 (file)
@@ -1109,6 +1109,9 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
        if (unlikely(ret))
                goto out;
 
+       if (unlikely(!trans->srcu_held))
+               bch2_trans_srcu_lock(trans);
+
        /*
         * Ensure we obey path->should_be_locked: if it's set, we can't unlock
         * and re-traverse the path without a transaction restart:
@@ -2830,18 +2833,35 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        return p;
 }
 
-static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
+static inline void check_srcu_held_too_long(struct btree_trans *trans)
 {
-       struct bch_fs *c = trans->c;
-       struct btree_path *path;
+       WARN(time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+            "btree trans held srcu lock (delaying memory reclaim) by more than 10 seconds");
+}
 
-       trans_for_each_path(trans, path)
-               if (path->cached && !btree_node_locked(path, 0))
-                       path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+void bch2_trans_srcu_unlock(struct btree_trans *trans)
+{
+       if (trans->srcu_held) {
+               struct bch_fs *c = trans->c;
+               struct btree_path *path;
 
-       srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
-       trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
-       trans->srcu_lock_time   = jiffies;
+               trans_for_each_path(trans, path)
+                       if (path->cached && !btree_node_locked(path, 0))
+                               path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+
+               check_srcu_held_too_long(trans);
+               srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+               trans->srcu_held = false;
+       }
+}
+
+void bch2_trans_srcu_lock(struct btree_trans *trans)
+{
+       if (!trans->srcu_held) {
+               trans->srcu_idx = srcu_read_lock(&trans->c->btree_trans_barrier);
+               trans->srcu_lock_time   = jiffies;
+               trans->srcu_held = true;
+       }
 }
 
 /**
@@ -2895,8 +2915,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
        }
        trans->last_begin_time = now;
 
-       if (unlikely(time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
-               bch2_trans_reset_srcu_lock(trans);
+       if (unlikely(trans->srcu_held &&
+                    time_after(jiffies, trans->srcu_lock_time + msecs_to_jiffies(10))))
+               bch2_trans_srcu_unlock(trans);
 
        trans->last_begin_ip = _RET_IP_;
        if (trans->restarted) {
@@ -2983,8 +3004,9 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
                trans->wb_updates_size = s->wb_updates_size;
        }
 
-       trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+       trans->srcu_idx         = srcu_read_lock(&c->btree_trans_barrier);
        trans->srcu_lock_time   = jiffies;
+       trans->srcu_held        = true;
 
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
                struct btree_trans *pos;
@@ -3061,7 +3083,10 @@ void bch2_trans_put(struct btree_trans *trans)
 
        check_btree_paths_leaked(trans);
 
-       srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+       if (trans->srcu_held) {
+               check_srcu_held_too_long(trans);
+               srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+       }
 
        bch2_journal_preres_put(&c->journal, &trans->journal_preres);
 
index 70759ee3e5c730092ccc7b12d13f7b73c85d136e..5e103f519e62ec280863c389cb765904a6becb91 100644 (file)
@@ -274,6 +274,7 @@ void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
 int bch2_trans_relock(struct btree_trans *);
 int bch2_trans_relock_notrace(struct btree_trans *);
 void bch2_trans_unlock(struct btree_trans *);
+void bch2_trans_unlock_long(struct btree_trans *);
 bool bch2_trans_locked(struct btree_trans *);
 
 static inline int trans_was_restarted(struct btree_trans *trans, u32 restart_count)
@@ -579,6 +580,9 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
        __bch2_bkey_get_val_typed(_trans, _btree_id, _pos, _flags,      \
                                  KEY_TYPE_##_type, sizeof(*_val), _val)
 
+void bch2_trans_srcu_unlock(struct btree_trans *);
+void bch2_trans_srcu_lock(struct btree_trans *);
+
 u32 bch2_trans_begin(struct btree_trans *);
 
 /*
index ba263302585d6ab62f71e48fe2d09eca8a910280..c4266835b8646f45adf9eb1a5f116bac12470fe1 100644 (file)
@@ -759,6 +759,12 @@ void bch2_trans_unlock(struct btree_trans *trans)
                bch2_assert_btree_nodes_not_locked();
 }
 
+void bch2_trans_unlock_long(struct btree_trans *trans)
+{
+       bch2_trans_unlock(trans);
+       bch2_trans_srcu_unlock(trans);
+}
+
 bool bch2_trans_locked(struct btree_trans *trans)
 {
        struct btree_path *path;
index 8140b6e6e9a65245615397dd60489feb57240da1..32693f7c6221043d0c28b07e57f1b396bd845582 100644 (file)
@@ -681,7 +681,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
                                                       BCH_JSET_ENTRY_overwrite,
                                                       i->btree_id, i->level,
                                                       i->old_k.u64s);
-                               bkey_reassemble(&entry->start[0],
+                               bkey_reassemble((struct bkey_i *) entry->start,
                                                (struct bkey_s_c) { &i->old_k, i->old_v });
                        }
 
@@ -689,7 +689,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
                                               BCH_JSET_ENTRY_btree_keys,
                                               i->btree_id, i->level,
                                               i->k->k.u64s);
-                       bkey_copy(&entry->start[0], i->k);
+                       bkey_copy((struct bkey_i *) entry->start, i->k);
                }
 
                trans_for_each_wb_update(trans, wb) {
@@ -697,7 +697,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
                                               BCH_JSET_ENTRY_btree_keys,
                                               wb->btree, 0,
                                               wb->k.k.u64s);
-                       bkey_copy(&entry->start[0], &wb->k);
+                       bkey_copy((struct bkey_i *) entry->start, &wb->k);
                }
 
                if (trans->journal_seq)
index cbcb04a45e8eed4ba27a51022e323e511436c704..4b9cc61a4a6098f122c4d04a7657699ec20959dd 100644 (file)
@@ -432,6 +432,7 @@ struct btree_trans {
        u8                      nr_updates;
        u8                      nr_wb_updates;
        u8                      wb_updates_size;
+       bool                    srcu_held:1;
        bool                    used_mempool:1;
        bool                    in_traverse_all:1;
        bool                    paths_sorted:1;
index d029e0348c918a292d596af7ce520588235ccabd..89ada89eafe7b79096f806da7541b94939fd2da1 100644 (file)
@@ -2411,7 +2411,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry
 
        r->level = entry->level;
        r->alive = true;
-       bkey_copy(&r->key, &entry->start[0]);
+       bkey_copy(&r->key, (struct bkey_i *) entry->start);
 
        mutex_unlock(&c->btree_root_lock);
 }
index 5e0a467fe9056acf25ce8c3d65a1d5ca6adfc216..d92b3cf5f5e0f26b823d32a8719bf0f879cf994b 100644 (file)
@@ -271,7 +271,7 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
        struct btree_node_entry *bne = max(write_block(b),
                        (void *) btree_bkey_last(b, bset_tree_last(b)));
        ssize_t remaining_space =
-               __bch_btree_u64s_remaining(c, b, &bne->keys.start[0]);
+               __bch_btree_u64s_remaining(c, b, bne->keys.start);
 
        if (unlikely(bset_written(b, bset(b, t)))) {
                if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
index 114f86b45fd52ffc0e3be365f7eb4109e0358fdc..87b4b2d1ec766f65e97c40cab918b256015ae3ed 100644 (file)
@@ -69,9 +69,15 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
        _ret;                                                           \
 })
 
+#define darray_remove_item(_d, _pos)                                   \
+       array_remove_item((_d)->data, (_d)->nr, (_pos) - (_d)->data)
+
 #define darray_for_each(_d, _i)                                                \
        for (_i = (_d).data; _i < (_d).data + (_d).nr; _i++)
 
+#define darray_for_each_reverse(_d, _i)                                        \
+       for (_i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i)
+
 #define darray_init(_d)                                                        \
 do {                                                                   \
        (_d)->data = NULL;                                              \
index bb5305441f275938d4f76535a9b9bf1aaccf7934..4496cf91a4c17bcde4e4a934eb0475007ff1311c 100644 (file)
@@ -51,7 +51,7 @@ int bch2_create_trans(struct btree_trans *trans,
                bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
 
                if (flags & BCH_CREATE_TMPFILE)
-                       new_inode->bi_flags |= BCH_INODE_UNLINKED;
+                       new_inode->bi_flags |= BCH_INODE_unlinked;
 
                ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
                if (ret)
index 6040bd3f077819b41accbc08ac37fa5928d0592d..5a39bcb597a33d42826a16a98da394de3fe23660 100644 (file)
@@ -45,13 +45,13 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
        unsigned newflags = s->flags;
        unsigned oldflags = bi->bi_flags & s->mask;
 
-       if (((newflags ^ oldflags) & (BCH_INODE_APPEND|BCH_INODE_IMMUTABLE)) &&
+       if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) &&
            !capable(CAP_LINUX_IMMUTABLE))
                return -EPERM;
 
        if (!S_ISREG(bi->bi_mode) &&
            !S_ISDIR(bi->bi_mode) &&
-           (newflags & (BCH_INODE_NODUMP|BCH_INODE_NOATIME)) != newflags)
+           (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
                return -EINVAL;
 
        if (s->set_projinherit) {
index 54a9c21a3b832ba9ad5c0281abe7363ec6f9cd9a..d30f9bb056fd9790f97c4b08f839b480bf46397c 100644 (file)
@@ -6,28 +6,28 @@
 
 /* bcachefs inode flags -> vfs inode flags: */
 static const __maybe_unused unsigned bch_flags_to_vfs[] = {
-       [__BCH_INODE_SYNC]      = S_SYNC,
-       [__BCH_INODE_IMMUTABLE] = S_IMMUTABLE,
-       [__BCH_INODE_APPEND]    = S_APPEND,
-       [__BCH_INODE_NOATIME]   = S_NOATIME,
+       [__BCH_INODE_sync]      = S_SYNC,
+       [__BCH_INODE_immutable] = S_IMMUTABLE,
+       [__BCH_INODE_append]    = S_APPEND,
+       [__BCH_INODE_noatime]   = S_NOATIME,
 };
 
 /* bcachefs inode flags -> FS_IOC_GETFLAGS: */
 static const __maybe_unused unsigned bch_flags_to_uflags[] = {
-       [__BCH_INODE_SYNC]      = FS_SYNC_FL,
-       [__BCH_INODE_IMMUTABLE] = FS_IMMUTABLE_FL,
-       [__BCH_INODE_APPEND]    = FS_APPEND_FL,
-       [__BCH_INODE_NODUMP]    = FS_NODUMP_FL,
-       [__BCH_INODE_NOATIME]   = FS_NOATIME_FL,
+       [__BCH_INODE_sync]      = FS_SYNC_FL,
+       [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
+       [__BCH_INODE_append]    = FS_APPEND_FL,
+       [__BCH_INODE_nodump]    = FS_NODUMP_FL,
+       [__BCH_INODE_noatime]   = FS_NOATIME_FL,
 };
 
 /* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
 static const __maybe_unused unsigned bch_flags_to_xflags[] = {
-       [__BCH_INODE_SYNC]      = FS_XFLAG_SYNC,
-       [__BCH_INODE_IMMUTABLE] = FS_XFLAG_IMMUTABLE,
-       [__BCH_INODE_APPEND]    = FS_XFLAG_APPEND,
-       [__BCH_INODE_NODUMP]    = FS_XFLAG_NODUMP,
-       [__BCH_INODE_NOATIME]   = FS_XFLAG_NOATIME,
+       [__BCH_INODE_sync]      = FS_XFLAG_SYNC,
+       [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE,
+       [__BCH_INODE_append]    = FS_XFLAG_APPEND,
+       [__BCH_INODE_nodump]    = FS_XFLAG_NODUMP,
+       [__BCH_INODE_noatime]   = FS_XFLAG_NOATIME,
        //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT;
 };
 
index 89759e6a74dbda39061f746e73b9d4f3f8de1997..693f34747841181b75f673780dd27f6d2a647ad0 100644 (file)
@@ -66,11 +66,11 @@ void bch2_inode_update_after_write(struct btree_trans *trans,
        inode->v.i_mode = bi->bi_mode;
 
        if (fields & ATTR_ATIME)
-               inode->v.i_atime = bch2_time_to_timespec(c, bi->bi_atime);
+               inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime));
        if (fields & ATTR_MTIME)
-               inode->v.i_mtime = bch2_time_to_timespec(c, bi->bi_mtime);
+               inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime));
        if (fields & ATTR_CTIME)
-               inode->v.i_ctime = bch2_time_to_timespec(c, bi->bi_ctime);
+               inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
 
        inode->ei_inode         = *bi;
 
@@ -753,9 +753,9 @@ static int bch2_getattr(struct mnt_idmap *idmap,
        stat->gid       = inode->v.i_gid;
        stat->rdev      = inode->v.i_rdev;
        stat->size      = i_size_read(&inode->v);
-       stat->atime     = inode->v.i_atime;
-       stat->mtime     = inode->v.i_mtime;
-       stat->ctime     = inode->v.i_ctime;
+       stat->atime     = inode_get_atime(&inode->v);
+       stat->mtime     = inode_get_mtime(&inode->v);
+       stat->ctime     = inode_get_ctime(&inode->v);
        stat->blksize   = block_bytes(c);
        stat->blocks    = inode->v.i_blocks;
 
@@ -764,15 +764,15 @@ static int bch2_getattr(struct mnt_idmap *idmap,
                stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
        }
 
-       if (inode->ei_inode.bi_flags & BCH_INODE_IMMUTABLE)
+       if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
                stat->attributes |= STATX_ATTR_IMMUTABLE;
        stat->attributes_mask    |= STATX_ATTR_IMMUTABLE;
 
-       if (inode->ei_inode.bi_flags & BCH_INODE_APPEND)
+       if (inode->ei_inode.bi_flags & BCH_INODE_append)
                stat->attributes |= STATX_ATTR_APPEND;
        stat->attributes_mask    |= STATX_ATTR_APPEND;
 
-       if (inode->ei_inode.bi_flags & BCH_INODE_NODUMP)
+       if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
                stat->attributes |= STATX_ATTR_NODUMP;
        stat->attributes_mask    |= STATX_ATTR_NODUMP;
 
@@ -1418,9 +1418,9 @@ static int inode_update_times_fn(struct btree_trans *trans,
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
 
-       bi->bi_atime    = timespec_to_bch2_time(c, inode->v.i_atime);
-       bi->bi_mtime    = timespec_to_bch2_time(c, inode->v.i_mtime);
-       bi->bi_ctime    = timespec_to_bch2_time(c, inode->v.i_ctime);
+       bi->bi_atime    = timespec_to_bch2_time(c, inode_get_atime(&inode->v));
+       bi->bi_mtime    = timespec_to_bch2_time(c, inode_get_mtime(&inode->v));
+       bi->bi_ctime    = timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
 
        return 0;
 }
index 0e470ebd7f103944cb7ba2a744a4c806ff9f3478..9f3e9bd3d767a75fb1a0734c0413193a671f3206 100644 (file)
@@ -854,9 +854,9 @@ static int check_inode(struct btree_trans *trans,
        BUG_ON(bch2_inode_unpack(k, &u));
 
        if (!full &&
-           !(u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|
-                           BCH_INODE_I_SECTORS_DIRTY|
-                           BCH_INODE_UNLINKED)))
+           !(u.bi_flags & (BCH_INODE_i_size_dirty|
+                           BCH_INODE_i_sectors_dirty|
+                           BCH_INODE_unlinked)))
                return 0;
 
        if (prev->bi_inum != u.bi_inum)
@@ -870,7 +870,7 @@ static int check_inode(struct btree_trans *trans,
                return -EINVAL;
        }
 
-       if ((u.bi_flags & (BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED)) &&
+       if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) &&
            bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) {
                struct bpos new_min_pos;
 
@@ -878,7 +878,7 @@ static int check_inode(struct btree_trans *trans,
                if (ret)
                        goto err;
 
-               u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY|BCH_INODE_UNLINKED;
+               u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked;
 
                ret = __write_inode(trans, &u, iter->pos.snapshot);
                bch_err_msg(c, ret, "in fsck updating inode");
@@ -890,7 +890,7 @@ static int check_inode(struct btree_trans *trans,
                return 0;
        }
 
-       if (u.bi_flags & BCH_INODE_UNLINKED &&
+       if (u.bi_flags & BCH_INODE_unlinked &&
            (!c->sb.clean ||
             fsck_err(c, inode_unlinked_but_clean,
                      "filesystem marked clean, but inode %llu unlinked",
@@ -903,7 +903,7 @@ static int check_inode(struct btree_trans *trans,
                return ret;
        }
 
-       if (u.bi_flags & BCH_INODE_I_SIZE_DIRTY &&
+       if (u.bi_flags & BCH_INODE_i_size_dirty &&
            (!c->sb.clean ||
             fsck_err(c, inode_i_size_dirty_but_clean,
                      "filesystem marked clean, but inode %llu has i_size dirty",
@@ -930,13 +930,13 @@ static int check_inode(struct btree_trans *trans,
                 * We truncated without our normal sector accounting hook, just
                 * make sure we recalculate it:
                 */
-               u.bi_flags |= BCH_INODE_I_SECTORS_DIRTY;
+               u.bi_flags |= BCH_INODE_i_sectors_dirty;
 
-               u.bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
+               u.bi_flags &= ~BCH_INODE_i_size_dirty;
                do_update = true;
        }
 
-       if (u.bi_flags & BCH_INODE_I_SECTORS_DIRTY &&
+       if (u.bi_flags & BCH_INODE_i_sectors_dirty &&
            (!c->sb.clean ||
             fsck_err(c, inode_i_sectors_dirty_but_clean,
                      "filesystem marked clean, but inode %llu has i_sectors dirty",
@@ -953,14 +953,14 @@ static int check_inode(struct btree_trans *trans,
                }
 
                u.bi_sectors = sectors;
-               u.bi_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
+               u.bi_flags &= ~BCH_INODE_i_sectors_dirty;
                do_update = true;
        }
 
-       if (u.bi_flags & BCH_INODE_BACKPTR_UNTRUSTED) {
+       if (u.bi_flags & BCH_INODE_backptr_untrusted) {
                u.bi_dir = 0;
                u.bi_dir_offset = 0;
-               u.bi_flags &= ~BCH_INODE_BACKPTR_UNTRUSTED;
+               u.bi_flags &= ~BCH_INODE_backptr_untrusted;
                do_update = true;
        }
 
@@ -1065,7 +1065,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
                        return -BCH_ERR_internal_fsck_err;
                }
 
-               if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY),
+               if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty),
                                c, inode_i_sectors_wrong,
                                "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
                                w->last_pos.inode, i->snapshot,
@@ -1405,7 +1405,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        continue;
 
                if (k.k->type != KEY_TYPE_whiteout) {
-                       if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+                       if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) &&
                                        k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
                                        !bkey_extent_is_reservation(k),
                                        c, extent_past_end_of_inode,
@@ -1588,7 +1588,7 @@ static int check_dirent_target(struct btree_trans *trans,
                                "inode %llu type %s has multiple links but i_nlink 0",
                                target->bi_inum, bch2_d_types[d.v->d_type])) {
                        target->bi_nlink++;
-                       target->bi_flags &= ~BCH_INODE_UNLINKED;
+                       target->bi_flags &= ~BCH_INODE_unlinked;
 
                        ret = __write_inode(trans, target, target_snapshot);
                        if (ret)
@@ -2160,7 +2160,7 @@ int bch2_check_directory_structure(struct bch_fs *c)
                        break;
                }
 
-               if (u.bi_flags & BCH_INODE_UNLINKED)
+               if (u.bi_flags & BCH_INODE_unlinked)
                        continue;
 
                ret = check_path(trans, &path, &u, iter.pos.snapshot);
index 925d1b7f28877de06c1e424eb91ec2b691f75716..8b3c675eb2957183adeda0bdb9dcb629acca0028 100644 (file)
 
 #include <asm/unaligned.h>
 
-const char * const bch2_inode_opts[] = {
 #define x(name, ...)   #name,
+const char * const bch2_inode_opts[] = {
        BCH_INODE_OPTS()
-#undef  x
        NULL,
 };
 
+static const char * const bch2_inode_flag_strs[] = {
+       BCH_INODE_FLAGS()
+       NULL
+};
+#undef  x
+
 static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
 
 static int inode_decode_field(const u8 *in, const u8 *end,
@@ -425,7 +430,7 @@ static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct prin
                         inode_compression_type_invalid,
                         "invalid compression opt %u", unpacked.bi_compression - 1);
 
-       bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_UNLINKED) &&
+       bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) &&
                         unpacked.bi_nlink != 0, c, err,
                         inode_unlinked_but_nlink_nonzero,
                         "flagged as unlinked but bi_nlink != 0");
@@ -499,15 +504,20 @@ fsck_err:
 static void __bch2_inode_unpacked_to_text(struct printbuf *out,
                                          struct bch_inode_unpacked *inode)
 {
-       prt_printf(out, "mode %o flags %x journal_seq %llu bi_size %llu bi_sectors %llu bi_version %llu",
-              inode->bi_mode, inode->bi_flags,
+       prt_printf(out, "mode=%o ", inode->bi_mode);
+
+       prt_str(out, "flags=");
+       prt_bitflags(out, bch2_inode_flag_strs, inode->bi_flags & ((1U << 20) - 1));
+       prt_printf(out, " (%x)", inode->bi_flags);
+
+       prt_printf(out, " journal_seq=%llu bi_size=%llu bi_sectors=%llu bi_version=%llu",
               inode->bi_journal_seq,
               inode->bi_size,
               inode->bi_sectors,
               inode->bi_version);
 
 #define x(_name, _bits)                                                \
-       prt_printf(out, " "#_name " %llu", (u64) inode->_name);
+       prt_printf(out, " "#_name "=%llu", (u64) inode->_name);
        BCH_INODE_FIELDS_v3()
 #undef  x
 }
@@ -546,7 +556,7 @@ static inline u64 bkey_inode_flags(struct bkey_s_c k)
 
 static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
 {
-       return bkey_inode_flags(k) & BCH_INODE_UNLINKED;
+       return bkey_inode_flags(k) & BCH_INODE_unlinked;
 }
 
 int bch2_trans_mark_inode(struct btree_trans *trans,
@@ -927,8 +937,8 @@ int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum,
 
 int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
 {
-       if (bi->bi_flags & BCH_INODE_UNLINKED)
-               bi->bi_flags &= ~BCH_INODE_UNLINKED;
+       if (bi->bi_flags & BCH_INODE_unlinked)
+               bi->bi_flags &= ~BCH_INODE_unlinked;
        else {
                if (bi->bi_nlink == U32_MAX)
                        return -EINVAL;
@@ -941,13 +951,13 @@ int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi)
 
 void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *bi)
 {
-       if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_UNLINKED)) {
+       if (bi->bi_nlink && (bi->bi_flags & BCH_INODE_unlinked)) {
                bch2_trans_inconsistent(trans, "inode %llu unlinked but link count nonzero",
                                        bi->bi_inum);
                return;
        }
 
-       if (bi->bi_flags & BCH_INODE_UNLINKED) {
+       if (bi->bi_flags & BCH_INODE_unlinked) {
                bch2_trans_inconsistent(trans, "inode %llu link count underflow", bi->bi_inum);
                return;
        }
@@ -955,7 +965,7 @@ void bch2_inode_nlink_dec(struct btree_trans *trans, struct bch_inode_unpacked *
        if (bi->bi_nlink)
                bi->bi_nlink--;
        else
-               bi->bi_flags |= BCH_INODE_UNLINKED;
+               bi->bi_flags |= BCH_INODE_unlinked;
 }
 
 struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *inode)
@@ -1094,7 +1104,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos)
                        pos.offset, pos.snapshot))
                goto delete;
 
-       if (fsck_err_on(!(inode.bi_flags & BCH_INODE_UNLINKED), c,
+       if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), c,
                        deleted_inode_not_unlinked,
                        "non-deleted inode %llu:%u in deleted_inodes btree",
                        pos.offset, pos.snapshot))
index 74c62e6c16cc0669b82ffc74f6fd34a88843044c..5068ba9ca8ba2e549e1131649dd6e559397c4b3f 100644 (file)
@@ -177,7 +177,7 @@ static inline unsigned nlink_bias(umode_t mode)
 
 static inline unsigned bch2_inode_nlink_get(struct bch_inode_unpacked *bi)
 {
-       return bi->bi_flags & BCH_INODE_UNLINKED
+       return bi->bi_flags & BCH_INODE_unlinked
                  ? 0
                  : bi->bi_nlink + nlink_bias(bi->bi_mode);
 }
@@ -187,10 +187,10 @@ static inline void bch2_inode_nlink_set(struct bch_inode_unpacked *bi,
 {
        if (nlink) {
                bi->bi_nlink = nlink - nlink_bias(bi->bi_mode);
-               bi->bi_flags &= ~BCH_INODE_UNLINKED;
+               bi->bi_flags &= ~BCH_INODE_unlinked;
        } else {
                bi->bi_nlink = 0;
-               bi->bi_flags |= BCH_INODE_UNLINKED;
+               bi->bi_flags |= BCH_INODE_unlinked;
        }
 }
 
index 613f384366403816dd4a0ca5cf066e8b55b3d8f5..fbfc42ff08036a17efec5fe573fede3335cce24d 100644 (file)
@@ -223,7 +223,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
 
        inode = bkey_i_to_inode_v3(k);
 
-       if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_I_SIZE_DIRTY) &&
+       if (!(le64_to_cpu(inode->v.bi_flags) & BCH_INODE_i_size_dirty) &&
            new_i_size > le64_to_cpu(inode->v.bi_size)) {
                inode->v.bi_size = cpu_to_le64(new_i_size);
                inode_update_flags = 0;
index 1b15b010461ae19b7b52dfda1f7bcbcbad63bb93..ab749bf2fcbc551e68753857efdf008848d140b7 100644 (file)
@@ -147,9 +147,8 @@ void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
 {
        struct moving_io *io;
 
-       bch2_trans_unlock(ctxt->trans);
-
        while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
+               bch2_trans_unlock_long(ctxt->trans);
                list_del(&io->read_list);
                move_write(io);
        }
@@ -485,8 +484,8 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
        struct bch_fs *c = ctxt->trans->c;
        u64 delay;
 
-       if (ctxt->wait_on_copygc) {
-               bch2_trans_unlock(ctxt->trans);
+       if (ctxt->wait_on_copygc && !c->copygc_running) {
+               bch2_trans_unlock_long(ctxt->trans);
                wait_event_killable(c->copygc_running_wq,
                                    !c->copygc_running ||
                                    kthread_should_stop());
@@ -495,8 +494,12 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
        do {
                delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0;
 
+
                if (delay) {
-                       bch2_trans_unlock(ctxt->trans);
+                       if (delay > HZ / 10)
+                               bch2_trans_unlock_long(ctxt->trans);
+                       else
+                               bch2_trans_unlock(ctxt->trans);
                        set_current_state(TASK_INTERRUPTIBLE);
                }
 
index 1b1e8678bfaef452f3d8ccd456fc679bdc8b46c5..07cf9d42643b4fe537b6db513285efc1f65bd366 100644 (file)
@@ -45,6 +45,7 @@ do {                                                                  \
                                                                        \
        if (_cond)                                                      \
                break;                                                  \
+       bch2_trans_unlock_long((_ctxt)->trans);                         \
        __wait_event((_ctxt)->wait,                                     \
                     bch2_moving_ctxt_next_pending_write(_ctxt) ||      \
                     (cond_finished = (_cond)));                        \
index f73b9b7f4bf7ee111e586754d868e13c5fc1d0bb..0158c7aae6b066a75f0d08ebdc5152cd6b9246fb 100644 (file)
@@ -128,7 +128,7 @@ static void move_buckets_wait(struct moving_context *ctxt,
                kfree(i);
        }
 
-       bch2_trans_unlock(ctxt->trans);
+       bch2_trans_unlock_long(ctxt->trans);
 }
 
 static bool bucket_in_flight(struct buckets_in_flight *list,
@@ -188,7 +188,8 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
 
 noinline
 static int bch2_copygc(struct moving_context *ctxt,
-                      struct buckets_in_flight *buckets_in_flight)
+                      struct buckets_in_flight *buckets_in_flight,
+                      bool *did_work)
 {
        struct btree_trans *trans = ctxt->trans;
        struct bch_fs *c = trans->c;
@@ -224,6 +225,8 @@ static int bch2_copygc(struct moving_context *ctxt,
                                             f->bucket.k.gen, data_opts);
                if (ret)
                        goto err;
+
+               *did_work = true;
        }
 err:
        darray_exit(&buckets);
@@ -302,14 +305,16 @@ static int bch2_copygc_thread(void *arg)
        struct moving_context ctxt;
        struct bch_move_stats move_stats;
        struct io_clock *clock = &c->io_clock[WRITE];
-       struct buckets_in_flight buckets;
+       struct buckets_in_flight *buckets;
        u64 last, wait;
        int ret = 0;
 
-       memset(&buckets, 0, sizeof(buckets));
-
-       ret = rhashtable_init(&buckets.table, &bch_move_bucket_params);
+       buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL);
+       if (!buckets)
+               return -ENOMEM;
+       ret = rhashtable_init(&buckets->table, &bch_move_bucket_params);
        if (ret) {
+               kfree(buckets);
                bch_err_msg(c, ret, "allocating copygc buckets in flight");
                return ret;
        }
@@ -322,16 +327,18 @@ static int bch2_copygc_thread(void *arg)
                              false);
 
        while (!ret && !kthread_should_stop()) {
-               bch2_trans_unlock(ctxt.trans);
+               bool did_work = false;
+
+               bch2_trans_unlock_long(ctxt.trans);
                cond_resched();
 
                if (!c->copy_gc_enabled) {
-                       move_buckets_wait(&ctxt, &buckets, true);
+                       move_buckets_wait(&ctxt, buckets, true);
                        kthread_wait_freezable(c->copy_gc_enabled);
                }
 
                if (unlikely(freezing(current))) {
-                       move_buckets_wait(&ctxt, &buckets, true);
+                       move_buckets_wait(&ctxt, buckets, true);
                        __refrigerator(false);
                        continue;
                }
@@ -342,7 +349,7 @@ static int bch2_copygc_thread(void *arg)
                if (wait > clock->max_slop) {
                        c->copygc_wait_at = last;
                        c->copygc_wait = last + wait;
-                       move_buckets_wait(&ctxt, &buckets, true);
+                       move_buckets_wait(&ctxt, buckets, true);
                        trace_and_count(c, copygc_wait, c, wait, last + wait);
                        bch2_kthread_io_clock_wait(clock, last + wait,
                                        MAX_SCHEDULE_TIMEOUT);
@@ -352,14 +359,26 @@ static int bch2_copygc_thread(void *arg)
                c->copygc_wait = 0;
 
                c->copygc_running = true;
-               ret = bch2_copygc(&ctxt, &buckets);
+               ret = bch2_copygc(&ctxt, buckets, &did_work);
                c->copygc_running = false;
 
                wake_up(&c->copygc_running_wq);
+
+               if (!wait && !did_work) {
+                       u64 min_member_capacity = bch2_min_rw_member_capacity(c);
+
+                       if (min_member_capacity == U64_MAX)
+                               min_member_capacity = 128 * 2048;
+
+                       bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6),
+                                       MAX_SCHEDULE_TIMEOUT);
+               }
        }
 
-       move_buckets_wait(&ctxt, &buckets, true);
-       rhashtable_destroy(&buckets.table);
+       move_buckets_wait(&ctxt, buckets, true);
+
+       rhashtable_destroy(&buckets->table);
+       kfree(buckets);
        bch2_moving_ctxt_exit(&ctxt);
        bch2_move_stats_exit(&move_stats, c);
 
index 6ee4d2e02073851a07ad1ef28eb02150e2a430e1..3319190b8d9c330fde44ad959bc299aa00d2ba87 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "alloc_background.h"
 #include "alloc_foreground.h"
 #include "btree_iter.h"
 #include "btree_update.h"
@@ -282,15 +283,12 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie)
 static void rebalance_wait(struct bch_fs *c)
 {
        struct bch_fs_rebalance *r = &c->rebalance;
-       struct bch_dev *ca;
        struct io_clock *clock = &c->io_clock[WRITE];
        u64 now = atomic64_read(&clock->now);
-       u64 min_member_capacity = 128 * 2048;
-       unsigned i;
+       u64 min_member_capacity = bch2_min_rw_member_capacity(c);
 
-       for_each_rw_member(ca, c, i)
-               min_member_capacity = min(min_member_capacity,
-                                         ca->mi.nbuckets * ca->mi.bucket_size);
+       if (min_member_capacity == U64_MAX)
+               min_member_capacity = 128 * 2048;
 
        r->wait_iotime_end              = now + (min_member_capacity >> 6);
 
@@ -350,7 +348,7 @@ static int do_rebalance(struct moving_context *ctxt)
            !kthread_should_stop() &&
            !atomic64_read(&r->work_stats.sectors_seen) &&
            !atomic64_read(&r->scan_stats.sectors_seen)) {
-               bch2_trans_unlock(trans);
+               bch2_trans_unlock_long(trans);
                rebalance_wait(c);
        }
 
index f73338f37bf1a800325f11665d378d4c95bcc88b..9600b808317534b317e5836802f483c67299c46a 100644 (file)
@@ -226,7 +226,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
 
                if (entry->u64s) {
                        r->level = entry->level;
-                       bkey_copy(&r->key, &entry->start[0]);
+                       bkey_copy(&r->key, (struct bkey_i *) entry->start);
                        r->error = 0;
                } else {
                        r->error = -EIO;
index eb31df605c2e0c0f11fb71be8b399f0450adc360..6e1bfe9feb59e4abe96e1dc74b30196fa5766f48 100644 (file)
@@ -255,7 +255,7 @@ s64 bch2_remap_range(struct bch_fs *c,
        struct bpos dst_end = dst_start, src_end = src_start;
        struct bch_io_opts opts;
        struct bpos src_want;
-       u64 dst_done;
+       u64 dst_done = 0;
        u32 dst_snapshot, src_snapshot;
        int ret = 0, ret2 = 0;
 
index 3d66f15ae8f50ed055eff3d70fddb570d174c444..9215d414b5253c81aa9cd6ee48eb214afa354164 100644 (file)
@@ -61,7 +61,6 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
 {
        struct bch_sb_field_errors *e = field_to_type(f, errors);
        unsigned i, nr = bch2_sb_field_errors_nr_entries(e);
-       u64 now = ktime_get_real_seconds();
 
        if (out->nr_tabstops <= 1)
                printbuf_tabstop_push(out, 16);
@@ -71,9 +70,7 @@ static void bch2_sb_errors_to_text(struct printbuf *out, struct bch_sb *sb,
                prt_tab(out);
                prt_u64(out, BCH_SB_ERROR_ENTRY_NR(&e->entries[i]));
                prt_tab(out);
-               bch2_pr_time_units(out, (now - le64_to_cpu(e->entries[i].last_error_time)) *
-                                  NSEC_PER_SEC);
-               prt_str(out, " ago");
+               bch2_prt_date_seconds(out, le64_to_cpu(e->entries[i].last_error_time));
                prt_newline(out);
        }
 }
index ab5de12eca4acdf230a4e26fee884cff059c6ef8..6a7e20de971c4bdedaec5d5d672c9dce38df32a2 100644 (file)
@@ -21,19 +21,14 @@ char * const bch2_member_error_strs[] = {
 
 /* Code for bch_sb_field_members_v1: */
 
-static struct bch_member *members_v2_get_mut(struct bch_sb_field_members_v2 *mi, int i)
-{
-       return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
-}
-
 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
 {
-       return members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
+       return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
 }
 
 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
 {
-       struct bch_member ret, *p = members_v2_get_mut(mi, i);
+       struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
        memset(&ret, 0, sizeof(ret));
        memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
        return ret;
@@ -75,7 +70,7 @@ static int sb_members_v2_resize_entries(struct bch_fs *c)
 
                for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
                        void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
-                       memmove(dst, members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
+                       memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
                        memset(dst + le16_to_cpu(mi->member_bytes),
                               0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
                }
@@ -118,7 +113,7 @@ int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
        mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
 
        for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
-               memcpy(members_v1_get_mut(mi1, i), members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
+               memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
 
        return 0;
 }
@@ -235,7 +230,7 @@ static void member_to_text(struct printbuf *out,
        prt_printf(out, "Last mount:");
        prt_tab(out);
        if (m.last_mount)
-               pr_time(out, le64_to_cpu(m.last_mount));
+               bch2_prt_date_seconds(out, le64_to_cpu(m.last_mount));
        else
                prt_printf(out, "(never)");
        prt_newline(out);
@@ -332,7 +327,7 @@ static int bch2_sb_members_v2_validate(struct bch_sb *sb,
                                       struct printbuf *err)
 {
        struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
-       size_t mi_bytes = (void *) members_v2_get_mut(mi, sb->nr_devices) -
+       size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
                (void *) mi;
 
        if (mi_bytes > vstruct_bytes(&mi->field)) {
@@ -363,7 +358,7 @@ void bch2_sb_members_from_cpu(struct bch_fs *c)
 
        rcu_read_lock();
        for_each_member_device_rcu(ca, c, i, NULL) {
-               struct bch_member *m = members_v2_get_mut(mi, i);
+               struct bch_member *m = __bch2_members_v2_get_mut(mi, i);
 
                for (e = 0; e < BCH_MEMBER_ERROR_NR; e++)
                        m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
index 1583e80afcbf3387c858d9941b4e9036c682947d..03613e3eb8e3df5bcda99218fb0168cf1f8bf9f8 100644 (file)
@@ -4,6 +4,12 @@
 
 extern char * const bch2_member_error_strs[];
 
+static inline struct bch_member *
+__bch2_members_v2_get_mut(struct bch_sb_field_members_v2 *mi, unsigned i)
+{
+       return (void *) mi->_members + (i * le16_to_cpu(mi->member_bytes));
+}
+
 int bch2_sb_members_v2_init(struct bch_fs *c);
 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb);
 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i);
@@ -186,11 +192,10 @@ static inline bool bch2_member_exists(struct bch_member *m)
        return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
 }
 
-static inline bool bch2_dev_exists(struct bch_sb *sb,
-                                  unsigned dev)
+static inline bool bch2_dev_exists(struct bch_sb *sb, unsigned dev)
 {
        if (dev < sb->nr_devices) {
-       struct bch_member m = bch2_sb_member_get(sb, dev);
+               struct bch_member m = bch2_sb_member_get(sb, dev);
                return bch2_member_exists(&m);
        }
        return false;
index 458a1de0a6e39c89bb6840be729a4fc3f2a52ec5..d22826cabee07d022194c93d31fe5e19055faaef 100644 (file)
@@ -323,99 +323,55 @@ EXPORT_SYMBOL_GPL(six_relock_ip);
 
 #ifdef CONFIG_LOCK_SPIN_ON_OWNER
 
-static inline bool six_can_spin_on_owner(struct six_lock *lock)
+static inline bool six_owner_running(struct six_lock *lock)
 {
-       struct task_struct *owner;
-       bool ret;
-
-       if (need_resched())
-               return false;
-
+       /*
+        * When there's no owner, we might have preempted between the owner
+        * acquiring the lock and setting the owner field. If we're an RT task
+        * that will live-lock because we won't let the owner complete.
+        */
        rcu_read_lock();
-       owner = READ_ONCE(lock->owner);
-       ret = !owner || owner_on_cpu(owner);
+       struct task_struct *owner = READ_ONCE(lock->owner);
+       bool ret = owner ? owner_on_cpu(owner) : !rt_task(current);
        rcu_read_unlock();
 
        return ret;
 }
 
-static inline bool six_spin_on_owner(struct six_lock *lock,
-                                    struct task_struct *owner,
-                                    u64 end_time)
+static inline bool six_optimistic_spin(struct six_lock *lock,
+                                      struct six_lock_waiter *wait,
+                                      enum six_lock_type type)
 {
-       bool ret = true;
        unsigned loop = 0;
-
-       rcu_read_lock();
-       while (lock->owner == owner) {
-               /*
-                * Ensure we emit the owner->on_cpu, dereference _after_
-                * checking lock->owner still matches owner. If that fails,
-                * owner might point to freed memory. If it still matches,
-                * the rcu_read_lock() ensures the memory stays valid.
-                */
-               barrier();
-
-               if (!owner_on_cpu(owner) || need_resched()) {
-                       ret = false;
-                       break;
-               }
-
-               if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
-                       six_set_bitmask(lock, SIX_LOCK_NOSPIN);
-                       ret = false;
-                       break;
-               }
-
-               cpu_relax();
-       }
-       rcu_read_unlock();
-
-       return ret;
-}
-
-static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
-{
-       struct task_struct *task = current;
        u64 end_time;
 
        if (type == SIX_LOCK_write)
                return false;
 
-       preempt_disable();
-       if (!six_can_spin_on_owner(lock))
-               goto fail;
+       if (lock->wait_list.next != &wait->list)
+               return false;
 
-       if (!osq_lock(&lock->osq))
-               goto fail;
+       if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN)
+               return false;
 
+       preempt_disable();
        end_time = sched_clock() + 10 * NSEC_PER_USEC;
 
-       while (1) {
-               struct task_struct *owner;
-
+       while (!need_resched() && six_owner_running(lock)) {
                /*
-                * If there's an owner, wait for it to either
-                * release the lock or go to sleep.
+                * Ensures that writes to the waitlist entry happen after we see
+                * wait->lock_acquired: pairs with the smp_store_release in
+                * __six_lock_wakeup
                 */
-               owner = READ_ONCE(lock->owner);
-               if (owner && !six_spin_on_owner(lock, owner, end_time))
-                       break;
-
-               if (do_six_trylock(lock, type, false)) {
-                       osq_unlock(&lock->osq);
+               if (smp_load_acquire(&wait->lock_acquired)) {
                        preempt_enable();
                        return true;
                }
 
-               /*
-                * When there's no owner, we might have preempted between the
-                * owner acquiring the lock and setting the owner field. If
-                * we're an RT task that will live-lock because we won't let
-                * the owner complete.
-                */
-               if (!owner && (need_resched() || rt_task(task)))
+               if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
+                       six_set_bitmask(lock, SIX_LOCK_NOSPIN);
                        break;
+               }
 
                /*
                 * The cpu_relax() call is a compiler barrier which forces
@@ -426,24 +382,15 @@ static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type
                cpu_relax();
        }
 
-       osq_unlock(&lock->osq);
-fail:
        preempt_enable();
-
-       /*
-        * If we fell out of the spin path because of need_resched(),
-        * reschedule now, before we try-lock again. This avoids getting
-        * scheduled out right after we obtained the lock.
-        */
-       if (need_resched())
-               schedule();
-
        return false;
 }
 
 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
 
-static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
+static inline bool six_optimistic_spin(struct six_lock *lock,
+                                      struct six_lock_waiter *wait,
+                                      enum six_lock_type type)
 {
        return false;
 }
@@ -467,9 +414,6 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
        trace_contention_begin(lock, 0);
        lock_contended(&lock->dep_map, ip);
 
-       if (six_optimistic_spin(lock, type))
-               goto out;
-
        wait->task              = current;
        wait->lock_want         = type;
        wait->lock_acquired     = false;
@@ -507,6 +451,9 @@ static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
                ret = 0;
        }
 
+       if (six_optimistic_spin(lock, wait, type))
+               goto out;
+
        while (1) {
                set_current_state(TASK_UNINTERRUPTIBLE);
 
index 394da423c28e511f4e0d733708941c9385104b80..a7104ac1d35c2ad0c7e61ce15a398f875e450407 100644 (file)
  */
 
 #include <linux/lockdep.h>
-#include <linux/osq_lock.h>
 #include <linux/sched.h>
 #include <linux/types.h>
 
@@ -140,7 +139,6 @@ struct six_lock {
        unsigned                intent_lock_recurse;
        struct task_struct      *owner;
        unsigned __percpu       *readers;
-       struct optimistic_spin_queue osq;
        raw_spinlock_t          wait_lock;
        struct list_head        wait_list;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
index 83bdb4368289f44a4675578b524346d7132872dc..a93e53d0b37ef623582beb8731f8c0b86bc51b2c 100644 (file)
@@ -1183,7 +1183,7 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
        prt_printf(out, "Created:");
        prt_tab(out);
        if (sb->time_base_lo)
-               pr_time(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
+               bch2_prt_date_seconds(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC));
        else
                prt_printf(out, "(not set)");
        prt_newline(out);
index 1b5c2a1bd68a26356b921baea4474791b72caa26..24672bb31cbe9c479964dffe1d1b979dd66013c7 100644 (file)
@@ -1885,9 +1885,9 @@ found:
 struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
                            struct bch_opts opts)
 {
-       struct bch_sb_handle *sb = NULL;
+       DARRAY(struct bch_sb_handle) sbs = { 0 };
        struct bch_fs *c = NULL;
-       unsigned i, best_sb = 0;
+       struct bch_sb_handle *sb, *best = NULL;
        struct printbuf errbuf = PRINTBUF;
        int ret = 0;
 
@@ -1899,49 +1899,46 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
                goto err;
        }
 
-       sb = kcalloc(nr_devices, sizeof(*sb), GFP_KERNEL);
-       if (!sb) {
-               ret = -ENOMEM;
+       ret = darray_make_room(&sbs, nr_devices);
+       if (ret)
                goto err;
-       }
 
-       for (i = 0; i < nr_devices; i++) {
-               ret = bch2_read_super(devices[i], &opts, &sb[i]);
+       for (unsigned i = 0; i < nr_devices; i++) {
+               struct bch_sb_handle sb = { NULL };
+
+               ret = bch2_read_super(devices[i], &opts, &sb);
                if (ret)
                        goto err;
 
+               BUG_ON(darray_push(&sbs, sb));
        }
 
-       for (i = 1; i < nr_devices; i++)
-               if (le64_to_cpu(sb[i].sb->seq) >
-                   le64_to_cpu(sb[best_sb].sb->seq))
-                       best_sb = i;
-
-       i = 0;
-       while (i < nr_devices) {
-               if (i != best_sb &&
-                   !bch2_dev_exists(sb[best_sb].sb, sb[i].sb->dev_idx)) {
-                       pr_info("%pg has been removed, skipping", sb[i].bdev);
-                       bch2_free_super(&sb[i]);
-                       array_remove_item(sb, nr_devices, i);
+       darray_for_each(sbs, sb)
+               if (!best || le64_to_cpu(sb->sb->seq) > le64_to_cpu(best->sb->seq))
+                       best = sb;
+
+       darray_for_each_reverse(sbs, sb) {
+               if (sb != best && !bch2_dev_exists(best->sb, sb->sb->dev_idx)) {
+                       pr_info("%pg has been removed, skipping", sb->bdev);
+                       bch2_free_super(sb);
+                       darray_remove_item(&sbs, sb);
+                       best -= best > sb;
                        continue;
                }
 
-               ret = bch2_dev_in_fs(sb[best_sb].sb, sb[i].sb);
+               ret = bch2_dev_in_fs(best->sb, sb->sb);
                if (ret)
                        goto err_print;
-               i++;
        }
 
-       c = bch2_fs_alloc(sb[best_sb].sb, opts);
-       if (IS_ERR(c)) {
-               ret = PTR_ERR(c);
+       c = bch2_fs_alloc(best->sb, opts);
+       ret = PTR_ERR_OR_ZERO(c);
+       if (ret)
                goto err;
-       }
 
        down_write(&c->state_lock);
-       for (i = 0; i < nr_devices; i++) {
-               ret = bch2_dev_attach_bdev(c, &sb[i]);
+       darray_for_each(sbs, sb) {
+               ret = bch2_dev_attach_bdev(c, sb);
                if (ret) {
                        up_write(&c->state_lock);
                        goto err;
@@ -1960,7 +1957,9 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
                        goto err;
        }
 out:
-       kfree(sb);
+       darray_for_each(sbs, sb)
+               bch2_free_super(sb);
+       darray_exit(&sbs);
        printbuf_exit(&errbuf);
        module_put(THIS_MODULE);
        return c;
@@ -1970,9 +1969,6 @@ err_print:
 err:
        if (!IS_ERR_OR_NULL(c))
                bch2_fs_stop(c);
-       if (sb)
-               for (i = 0; i < nr_devices; i++)
-                       bch2_free_super(&sb[i]);
        c = ERR_PTR(ret);
        goto out;
 }
index adeec805dd0cdb3dce3caecc0bef9a59b7ded7b8..7ba5df4e828608cb8c4e00597bdb460d2b78517e 100644 (file)
@@ -467,6 +467,24 @@ static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
        prt_printf(out, "%s", u->name);
 }
 
+#ifndef __KERNEL__
+#include <time.h>
+void bch2_prt_date_seconds(struct printbuf *out, time64_t sec)
+{
+       time_t t = sec;
+       char buf[64];
+       ctime_r(&t, buf);
+       prt_str(out, buf);
+}
+#else
+void bch2_prt_date_seconds(struct printbuf *out, time64_t sec)
+{
+       char buf[64];
+       snprintf(buf, sizeof(buf), "%ptT", &sec);
+       prt_u64(out, sec);
+}
+#endif
+
 #define TABSTOP_SIZE 12
 
 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns)
index 67f1a1d2a02d31a22edb411c5f7057b6cc8326a5..0595605e3180c4303b5f51598708e751f288fbaa 100644 (file)
@@ -244,26 +244,7 @@ do {                                                                       \
 #define prt_bitflags(...)              bch2_prt_bitflags(__VA_ARGS__)
 
 void bch2_pr_time_units(struct printbuf *, u64);
-
-#ifdef __KERNEL__
-static inline void pr_time(struct printbuf *out, u64 time)
-{
-       prt_printf(out, "%llu", time);
-}
-#else
-#include <time.h>
-static inline void pr_time(struct printbuf *out, u64 _time)
-{
-       char time_str[64];
-       time_t time = _time;
-       struct tm *tm = localtime(&time);
-       size_t err = strftime(time_str, sizeof(time_str), "%c", tm);
-       if (!err)
-               prt_printf(out, "(formatting error)");
-       else
-               prt_printf(out, "%s", time_str);
-}
-#endif
+void bch2_prt_date_seconds(struct printbuf *, time64_t);
 
 #ifdef __KERNEL__
 static inline void uuid_unparse_lower(u8 *uuid, char *out)
index 1faa24d6400e6097c8a0640d34768e32a9cd68b8..f86c9eeafb35ad9da21ebddda8a182ea27970ff8 100644 (file)
@@ -17,9 +17,8 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
 {
        int r = flags & CLOSURE_REMAINING_MASK;
 
-       if ((flags & CLOSURE_GUARD_MASK) ||
-           (!r && (flags & ~CLOSURE_DESTRUCTOR)))
-               panic("closure_put_after_sub: bogus flags %x remaining %i", flags, r);
+       BUG_ON(flags & CLOSURE_GUARD_MASK);
+       BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
 
        if (!r) {
                smp_acquire__after_ctrl_dep();