]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to 47ffed9fad bcachefs: bch2_btree_delete_range_trans() now...
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 10 Oct 2022 03:27:41 +0000 (23:27 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 10 Oct 2022 04:10:45 +0000 (00:10 -0400)
24 files changed:
.bcachefs_revision
include/linux/mm.h [new file with mode: 0644]
include/linux/rwsem.h
include/linux/sched.h
libbcachefs/backpointers.c
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_locking.c
libbcachefs/btree_update.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_interior.h
libbcachefs/btree_update_leaf.c
libbcachefs/data_update.c
libbcachefs/data_update.h
libbcachefs/debug.c
libbcachefs/ec.c
libbcachefs/errcode.h
libbcachefs/fs-io.c
libbcachefs/move.c
libbcachefs/super.c
libbcachefs/util.c
libbcachefs/util.h
linux/kthread.c
linux/shrinker.c

index 83d5a7dbfc7f191dda5960c484964b478c115195..1c9c4ec1ee5f70357c01488963a904e7aaed14a8 100644 (file)
@@ -1 +1 @@
-cbccc6d8692fdd3af7d5db97a065af5a47bc733c
+47ffed9fad891300a610191602a10ecd1e857cce
diff --git a/include/linux/mm.h b/include/linux/mm.h
new file mode 100644 (file)
index 0000000..4bf80ba
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_MM_H
+#define _TOOLS_LINUX_MM_H
+
+#include <linux/types.h>
+
+struct sysinfo {
+       long uptime;            /* Seconds since boot */
+       unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
+       unsigned long totalram; /* Total usable main memory size */
+       unsigned long freeram;  /* Available memory size */
+       unsigned long sharedram;        /* Amount of shared memory */
+       unsigned long bufferram;        /* Memory used by buffers */
+       unsigned long totalswap;        /* Total swap space size */
+       unsigned long freeswap; /* swap space still available */
+       __u16 procs;                    /* Number of current processes */
+       __u16 pad;                      /* Explicit padding for m68k */
+       unsigned long totalhigh;        /* Total high memory size */
+       unsigned long freehigh; /* Available high memory size */
+       __u32 mem_unit;                 /* Memory unit size in bytes */
+};
+
+extern void si_meminfo(struct sysinfo * val);
+
+#endif /* _TOOLS_LINUX_MM_H */
index 9d70e6e226ffbee5719d142a8e22d1c765412007..f851d6a2f2b7306df3d9d28ae5d0c524089d42e0 100644 (file)
@@ -19,6 +19,7 @@ static inline void init_rwsem(struct rw_semaphore *lock)
 }
 
 #define down_read(l)           pthread_rwlock_rdlock(&(l)->lock)
+#define down_read_killable(l)  (pthread_rwlock_rdlock(&(l)->lock), 0)
 #define down_read_trylock(l)   (!pthread_rwlock_tryrdlock(&(l)->lock))
 #define up_read(l)             pthread_rwlock_unlock(&(l)->lock)
 
index 48d20e29a1f334606c433f5220c051dd27444a31..ac6d27bb6b3bb3079c399330d7ec965408482676 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/bug.h>
 #include <linux/completion.h>
 #include <linux/jiffies.h>
+#include <linux/rwsem.h>
 #include <linux/time64.h>
 
 #define TASK_RUNNING           0
@@ -88,6 +89,10 @@ struct task_struct {
        pid_t                   pid;
 
        struct bio_list         *bio_list;
+
+       struct signal_struct    {
+               struct rw_semaphore exec_update_lock;
+       }                       *signal, _signal;
 };
 
 extern __thread struct task_struct *current;
@@ -157,4 +162,11 @@ static inline void ktime_get_coarse_real_ts64(struct timespec64 *ts)
 #define current_kernel_time64()        current_kernel_time()
 #define CURRENT_TIME           (current_kernel_time())
 
+static inline unsigned int stack_trace_save_tsk(struct task_struct *task,
+                                 unsigned long *store, unsigned int size,
+                                 unsigned int skipnr)
+{
+       return 0;
+}
+
 #endif /* __TOOLS_LINUX_SCHED_H */
index 7e8b13015133c5dceb9e01fa05db1df72a440f54..ee7e610f9282e112b55f53b85a8ee7a2ec206fbc 100644 (file)
@@ -6,6 +6,8 @@
 #include "btree_update.h"
 #include "error.h"
 
+#include <linux/mm.h>
+
 #define MAX_EXTENT_COMPRESS_RATIO_SHIFT                10
 
 /*
@@ -802,6 +804,103 @@ err:
        return ret;
 }
 
+struct bbpos {
+       enum btree_id           btree;
+       struct bpos             pos;
+};
+
+static inline int bbpos_cmp(struct bbpos l, struct bbpos r)
+{
+       return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos);
+}
+
+static inline struct bbpos bbpos_successor(struct bbpos pos)
+{
+       if (bpos_cmp(pos.pos, SPOS_MAX)) {
+               pos.pos = bpos_successor(pos.pos);
+               return pos;
+       }
+
+       if (pos.btree != BTREE_ID_NR) {
+               pos.btree++;
+               pos.pos = POS_MIN;
+               return pos;
+       }
+
+       BUG();
+}
+
+#if 0
+static void bbpos_to_text(struct printbuf *out, struct bbpos pos)
+{
+       prt_str(out, bch2_btree_ids[pos.btree]);
+       prt_char(out, ':');
+       bch2_bpos_to_text(out, pos.pos);
+}
+#endif
+
+static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
+{
+       return (struct bbpos) {
+               .btree  = bp.btree_id,
+               .pos    = bp.pos,
+       };
+}
+
+int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
+                                unsigned btree_leaf_mask,
+                                unsigned btree_interior_mask,
+                                struct bbpos start, struct bbpos *end)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       struct sysinfo i;
+       size_t btree_nodes;
+       enum btree_id btree;
+       int ret = 0;
+
+       si_meminfo(&i);
+
+       btree_nodes = (i.totalram >> 1) / btree_bytes(trans->c);
+
+       for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
+               unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
+
+               if (!((1U << btree) & btree_leaf_mask) &&
+                   !((1U << btree) & btree_interior_mask))
+                       continue;
+
+               bch2_trans_node_iter_init(trans, &iter, btree,
+                                         btree == start.btree ? start.pos : POS_MIN,
+                                         0, depth, 0);
+               /*
+                * for_each_btree_key_contineu() doesn't check the return value
+                * from bch2_btree_iter_advance(), which is needed when
+                * iterating over interior nodes where we'll see keys at
+                * SPOS_MAX:
+                */
+               do {
+                       k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
+                       ret = bkey_err(k);
+                       if (!k.k || ret)
+                               break;
+
+                       --btree_nodes;
+                       if (!btree_nodes) {
+                               end->btree = btree;
+                               end->pos = k.k->p;
+                               bch2_trans_iter_exit(trans, &iter);
+                               return 0;
+                       }
+               } while (bch2_btree_iter_advance(&iter));
+               bch2_trans_iter_exit(trans, &iter);
+       }
+
+       end->btree      = BTREE_ID_NR;
+       end->pos        = POS_MIN;
+       return ret;
+}
+
 int bch2_check_extents_to_backpointers(struct bch_fs *c)
 {
        struct btree_trans trans;
@@ -845,19 +944,26 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
 
 static int check_one_backpointer(struct btree_trans *trans,
                                 struct bpos bucket,
-                                u64 *bp_offset)
+                                u64 *bp_offset,
+                                struct bbpos start,
+                                struct bbpos end)
 {
        struct btree_iter iter;
        struct bch_backpointer bp;
+       struct bbpos pos;
        struct bkey_s_c k;
        struct printbuf buf = PRINTBUF;
        int ret;
 
-       ret = bch2_get_next_backpointer(trans, bucket, -1,
-                                       bp_offset, &bp);
+       ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp);
        if (ret || *bp_offset == U64_MAX)
                return ret;
 
+       pos = bp_to_bbpos(bp);
+       if (bbpos_cmp(pos, start) < 0 ||
+           bbpos_cmp(pos, end) > 0)
+               return 0;
+
        k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
        ret = bkey_err(k);
        if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
@@ -880,29 +986,52 @@ fsck_err:
        return ret;
 }
 
-int bch2_check_backpointers_to_extents(struct bch_fs *c)
+static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
+                                                  struct bbpos start,
+                                                  struct bbpos end)
 {
-       struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_trans_init(&trans, c, 0, 0);
-       for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+       for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
                           BTREE_ITER_PREFETCH, k, ret) {
                u64 bp_offset = 0;
 
-               while (!(ret = commit_do(&trans, NULL, NULL,
-                                              BTREE_INSERT_LAZY_RW|
-                                              BTREE_INSERT_NOFAIL,
-                               check_one_backpointer(&trans, iter.pos, &bp_offset))) &&
+               while (!(ret = commit_do(trans, NULL, NULL,
+                                        BTREE_INSERT_LAZY_RW|
+                                        BTREE_INSERT_NOFAIL,
+                               check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) &&
                       bp_offset < U64_MAX)
                        bp_offset++;
 
                if (ret)
                        break;
        }
-       bch2_trans_iter_exit(&trans, &iter);
-       bch2_trans_exit(&trans);
+       bch2_trans_iter_exit(trans, &iter);
        return ret < 0 ? ret : 0;
 }
+
+int bch2_check_backpointers_to_extents(struct bch_fs *c)
+{
+       struct btree_trans trans;
+       struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+       while (1) {
+               ret =   bch2_get_btree_in_memory_pos(&trans,
+                                                    (1U << BTREE_ID_extents)|
+                                                    (1U << BTREE_ID_reflink),
+                                                    ~0,
+                                                    start, &end) ?:
+                       bch2_check_backpointers_to_extents_pass(&trans, start, end);
+               if (ret || end.btree == BTREE_ID_NR)
+                       break;
+
+               start = bbpos_successor(end);
+       }
+       bch2_trans_exit(&trans);
+
+       return ret;
+}
index 13ce29750d288bf20c8979ec7d80c92d7ad1409d..dd6b536ced6a918ebd1152da352d05609c9658a6 100644 (file)
@@ -1913,6 +1913,8 @@ do_write:
        u64s = bch2_sort_keys(i->start, &sort_iter, false);
        le16_add_cpu(&i->u64s, u64s);
 
+       BUG_ON(!b->written && i->u64s != b->data->keys.u64s);
+
        set_needs_whiteout(i, false);
 
        /* do we have data to write? */
@@ -1922,6 +1924,10 @@ do_write:
        bytes_to_write = vstruct_end(i) - data;
        sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
 
+       if (!b->written &&
+           b->key.k.type == KEY_TYPE_btree_ptr_v2)
+               BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write);
+
        memset(data + bytes_to_write, 0,
               (sectors_to_write << 9) - bytes_to_write);
 
@@ -2010,11 +2016,6 @@ do_write:
 
        b->written += sectors_to_write;
 
-       if (wbio->wbio.first_btree_write &&
-           b->key.k.type == KEY_TYPE_btree_ptr_v2)
-               bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
-                       cpu_to_le16(b->written);
-
        if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2)
                bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
                        cpu_to_le16(b->written);
@@ -2027,10 +2028,6 @@ do_write:
        return;
 err:
        set_btree_node_noevict(b);
-       if (!b->written &&
-           b->key.k.type == KEY_TYPE_btree_ptr_v2)
-               bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
-                       cpu_to_le16(sectors_to_write);
        b->written += sectors_to_write;
 nowrite:
        btree_bounce_free(c, bytes, used_mempool, data);
index 925ffb318445af397b88e4842dd6b3a6fddcdb63..dffb0170850dc06b893eef103ed45b67fca69cd8 100644 (file)
@@ -1850,10 +1850,12 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
        struct bkey_s_c k, k2;
        int ret;
 
-       EBUG_ON(iter->path->cached || iter->path->level);
+       EBUG_ON(iter->path->cached);
        bch2_btree_iter_verify(iter);
 
        while (1) {
+               struct btree_path_level *l;
+
                iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
                                        iter->flags & BTREE_ITER_INTENT,
                                        btree_iter_ip_allocated(iter));
@@ -1866,9 +1868,18 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
                        goto out;
                }
 
+               l = path_l(iter->path);
+
+               if (unlikely(!l->b)) {
+                       /* No btree nodes at requested level: */
+                       bch2_btree_iter_set_pos(iter, SPOS_MAX);
+                       k = bkey_s_c_null;
+                       goto out;
+               }
+
                btree_path_set_should_be_locked(iter->path);
 
-               k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
+               k = btree_path_level_peek_all(trans->c, l, &iter->k);
 
                if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
                    k.k &&
@@ -1889,7 +1900,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
                        : NULL;
                if (next_update &&
                    bpos_cmp(next_update->k.p,
-                            k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
+                            k.k ? k.k->p : l->b->key.k.p) <= 0) {
                        iter->k = next_update->k;
                        k = bkey_i_to_s_c(next_update);
                }
@@ -1910,9 +1921,9 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
 
                if (likely(k.k)) {
                        break;
-               } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
+               } else if (likely(bpos_cmp(l->b->key.k.p, SPOS_MAX))) {
                        /* Advance to next leaf node: */
-                       search_key = bpos_successor(iter->path->l[0].b->key.k.p);
+                       search_key = bpos_successor(l->b->key.k.p);
                } else {
                        /* End of btree: */
                        bch2_btree_iter_set_pos(iter, SPOS_MAX);
index f4340086c357879caf63841831036b1e9516eb56..9a525d34c7f46a28a8f59291a70435437fe6a50b 100644 (file)
@@ -96,25 +96,26 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
 
 static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
 {
-       int ret;
-
        if (i == g->g) {
                trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_);
-               ret = btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
+               return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
        } else {
                i->trans->lock_must_abort = true;
-               ret = 0;
-       }
-
-       for (i = g->g + 1; i < g->g + g->nr; i++)
                wake_up_process(i->trans->locking_wait.task);
-       return ret;
+               return 0;
+       }
 }
 
 static noinline int break_cycle(struct lock_graph *g)
 {
        struct trans_waiting_for_lock *i;
 
+       /*
+        * We'd like to prioritize aborting transactions that have done less
+        * work - but it appears breaking cycles by telling other transactions
+        * to abort may still be buggy:
+        */
+#if 0
        for (i = g->g; i < g->g + g->nr; i++) {
                if (i->trans->lock_may_not_fail ||
                    i->trans->locking_wait.lock_want == SIX_LOCK_write)
@@ -130,7 +131,7 @@ static noinline int break_cycle(struct lock_graph *g)
 
                return abort_lock(g, i);
        }
-
+#endif
        for (i = g->g; i < g->g + g->nr; i++) {
                if (i->trans->lock_may_not_fail)
                        continue;
@@ -138,7 +139,29 @@ static noinline int break_cycle(struct lock_graph *g)
                return abort_lock(g, i);
        }
 
-       BUG();
+       {
+               struct bch_fs *c = g->g->trans->c;
+               struct printbuf buf = PRINTBUF;
+
+               bch_err(c, "cycle of nofail locks");
+
+               for (i = g->g; i < g->g + g->nr; i++) {
+                       struct btree_trans *trans = i->trans;
+
+                       bch2_btree_trans_to_text(&buf, trans);
+
+                       prt_printf(&buf, "backtrace:");
+                       prt_newline(&buf);
+                       printbuf_indent_add(&buf, 2);
+                       bch2_prt_backtrace(&buf, trans->locking_wait.task);
+                       printbuf_indent_sub(&buf, 2);
+                       prt_newline(&buf);
+               }
+
+               bch2_print_string_as_lines(KERN_ERR, buf.buf);
+               printbuf_exit(&buf);
+               BUG();
+       }
 }
 
 static void lock_graph_pop(struct lock_graph *g)
index 89941fb8caa06f35f00339e597f5a0bccfe2721c..1c2e7b2b4ed5b01b00b9e61d6bdaf0802bc69c48 100644 (file)
@@ -8,8 +8,8 @@
 struct bch_fs;
 struct btree;
 
-void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_path *,
-                                    struct btree *);
+void bch2_btree_node_prep_for_write(struct btree_trans *,
+                                   struct btree_path *, struct btree *);
 bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
                                struct btree *, struct btree_node_iter *,
                                struct bkey_i *);
index 578ba747826e0f4f84d1caf81d9d0410361b7a02..b966140760ff5db351b0ca997c9ef33b9f48fe5b 100644 (file)
@@ -23,9 +23,9 @@
 #include <linux/random.h>
 #include <trace/events/bcachefs.h>
 
-static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
-                                  struct btree_path *, struct btree *,
-                                  struct keylist *, unsigned);
+static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
+                                 struct btree_path *, struct btree *,
+                                 struct keylist *, unsigned);
 static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
 
 static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
@@ -37,8 +37,8 @@ static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
 
        path = bch2_path_get(trans, btree_id, pos, level + 1, level,
                             BTREE_ITER_NOPRESERVE|
-                            BTREE_ITER_INTENT, _THIS_IP_);
-       path = bch2_btree_path_make_mut(trans, path, true, _THIS_IP_);
+                            BTREE_ITER_INTENT, _RET_IP_);
+       path = bch2_btree_path_make_mut(trans, path, true, _RET_IP_);
        bch2_btree_path_downgrade(trans, path);
        __bch2_btree_path_unlock(trans, path);
        return path;
@@ -195,6 +195,43 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
                }
 }
 
+static void bch2_btree_node_free_never_used(struct btree_update *as,
+                                           struct btree_trans *trans,
+                                           struct btree *b)
+{
+       struct bch_fs *c = as->c;
+       struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL];
+       struct btree_path *path;
+       unsigned level = b->c.level;
+
+       BUG_ON(!list_empty(&b->write_blocked));
+       BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as));
+
+       b->will_make_reachable = 0;
+       closure_put(&as->cl);
+
+       clear_btree_node_will_make_reachable(b);
+       clear_btree_node_accessed(b);
+       clear_btree_node_dirty_acct(c, b);
+       clear_btree_node_need_write(b);
+
+       mutex_lock(&c->btree_cache.lock);
+       list_del_init(&b->list);
+       bch2_btree_node_hash_remove(&c->btree_cache, b);
+       mutex_unlock(&c->btree_cache.lock);
+
+       BUG_ON(p->nr >= ARRAY_SIZE(p->b));
+       p->b[p->nr++] = b;
+
+       six_unlock_intent(&b->c.lock);
+
+       trans_for_each_path(trans, path)
+               if (path->l[level].b == b) {
+                       btree_node_unlock(trans, path, level);
+                       path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
+               }
+}
+
 static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
                                             struct disk_reservation *res,
                                             struct closure *cl,
@@ -392,8 +429,6 @@ static struct btree *__btree_root_alloc(struct btree_update *as,
 
        btree_node_set_format(b, b->data->format);
        bch2_btree_build_aux_trees(b);
-
-       bch2_btree_update_add_new_node(as, b);
        six_unlock_write(&b->c.lock);
 
        return b;
@@ -859,6 +894,14 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
        mutex_unlock(&c->btree_interior_update_lock);
 
        btree_update_add_key(as, &as->new_keys, b);
+
+       if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+               unsigned bytes = vstruct_end(&b->data->keys) - (void *) b->data;
+               unsigned sectors = round_up(bytes, block_bytes(c)) >> 9;
+
+               bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
+                       cpu_to_le16(sectors);
+       }
 }
 
 /*
@@ -1026,23 +1069,23 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                nr_nodes[!!update_level] += 1 + split;
                update_level++;
 
-               if (!btree_path_node(path, update_level))
-                       break;
+               ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+               if (ret)
+                       return ERR_PTR(ret);
 
-               /*
-                * XXX: figure out how far we might need to split,
-                * instead of locking/reserving all the way to the root:
-                */
-               split = update_level + 1 < BTREE_MAX_DEPTH;
-       }
+               if (!btree_path_node(path, update_level)) {
+                       /* Allocating new root? */
+                       nr_nodes[1] += split;
+                       update_level = BTREE_MAX_DEPTH;
+                       break;
+               }
 
-       /* Might have to allocate a new root: */
-       if (update_level < BTREE_MAX_DEPTH)
-               nr_nodes[1] += 1;
+               if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
+                                       BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
+                       break;
 
-       ret = bch2_btree_path_upgrade(trans, path, U8_MAX);
-       if (ret)
-               return ERR_PTR(ret);
+               split = true;
+       }
 
        if (flags & BTREE_INSERT_GC_LOCK_HELD)
                lockdep_assert_held(&c->gc_lock);
@@ -1064,6 +1107,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        as->mode        = BTREE_INTERIOR_NO_UPDATE;
        as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD);
        as->btree_id    = path->btree_id;
+       as->update_level = update_level;
        INIT_LIST_HEAD(&as->list);
        INIT_LIST_HEAD(&as->unwritten_list);
        INIT_LIST_HEAD(&as->write_blocked_list);
@@ -1191,7 +1235,6 @@ static void bch2_btree_set_root(struct btree_update *as,
        struct btree *old;
 
        trace_and_count(c, btree_node_set_root, c, b);
-       BUG_ON(!b->written);
 
        old = btree_node_root(c, b);
 
@@ -1315,8 +1358,6 @@ static struct btree *__btree_split_node(struct btree_update *as,
        SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
        n2->key.k.p = n1->key.k.p;
 
-       bch2_btree_update_add_new_node(as, n2);
-
        set1 = btree_bset_first(n1);
        set2 = btree_bset_first(n2);
 
@@ -1458,18 +1499,19 @@ static void btree_split_insert_keys(struct btree_update *as,
        btree_node_interior_verify(as->c, b);
 }
 
-static void btree_split(struct btree_update *as, struct btree_trans *trans,
-                       struct btree_path *path, struct btree *b,
-                       struct keylist *keys, unsigned flags)
+static int btree_split(struct btree_update *as, struct btree_trans *trans,
+                      struct btree_path *path, struct btree *b,
+                      struct keylist *keys, unsigned flags)
 {
        struct bch_fs *c = as->c;
        struct btree *parent = btree_node_parent(path, b);
        struct btree *n1, *n2 = NULL, *n3 = NULL;
        struct btree_path *path1 = NULL, *path2 = NULL;
        u64 start_time = local_clock();
+       int ret = 0;
 
        BUG_ON(!parent && (b != btree_node_root(c, b)));
-       BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level));
+       BUG_ON(parent && !btree_node_intent_locked(path, b->c.level + 1));
 
        bch2_btree_interior_update_will_free_node(as, b);
 
@@ -1499,9 +1541,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                bch2_btree_path_level_init(trans, path2, n2);
 
                bch2_btree_update_add_new_node(as, n1);
-
-               bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
-               bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+               bch2_btree_update_add_new_node(as, n2);
 
                /*
                 * Note that on recursive parent_keys == keys, so we
@@ -1524,9 +1564,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                        n3->sib_u64s[0] = U16_MAX;
                        n3->sib_u64s[1] = U16_MAX;
 
-                       btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+                       bch2_btree_update_add_new_node(as, n3);
 
-                       bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+                       btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
                }
        } else {
                trace_and_count(c, btree_node_compact, c, b);
@@ -1541,8 +1581,6 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 
                bch2_btree_update_add_new_node(as, n1);
 
-               bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
-
                if (parent)
                        bch2_keylist_add(&as->parent_keys, &n1->key);
        }
@@ -1551,7 +1589,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
 
        if (parent) {
                /* Split a non root node */
-               bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+               ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+               if (ret)
+                       goto err;
        } else if (n3) {
                bch2_btree_set_root(as, trans, path, n3);
        } else {
@@ -1559,11 +1599,16 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                bch2_btree_set_root(as, trans, path, n1);
        }
 
-       bch2_btree_update_get_open_buckets(as, n1);
-       if (n2)
-               bch2_btree_update_get_open_buckets(as, n2);
-       if (n3)
+       if (n3) {
                bch2_btree_update_get_open_buckets(as, n3);
+               bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+       }
+       if (n2) {
+               bch2_btree_update_get_open_buckets(as, n2);
+               bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+       }
+       bch2_btree_update_get_open_buckets(as, n1);
+       bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
 
        /*
         * The old node must be freed (in memory) _before_ unlocking the new
@@ -1584,7 +1629,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
        if (n2)
                six_unlock_intent(&n2->c.lock);
        six_unlock_intent(&n1->c.lock);
-
+out:
        if (path2) {
                __bch2_btree_path_unlock(trans, path2);
                bch2_path_put(trans, path2, true);
@@ -1600,6 +1645,14 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
                               ? BCH_TIME_btree_node_split
                               : BCH_TIME_btree_node_compact],
                               start_time);
+       return ret;
+err:
+       if (n3)
+               bch2_btree_node_free_never_used(as, trans, n3);
+       if (n2)
+               bch2_btree_node_free_never_used(as, trans, n2);
+       bch2_btree_node_free_never_used(as, trans, n1);
+       goto out;
 }
 
 static void
@@ -1634,22 +1687,30 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
  * If a split occurred, this function will return early. This can only happen
  * for leaf nodes -- inserts into interior nodes have to be atomic.
  */
-static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
-                                  struct btree_path *path, struct btree *b,
-                                  struct keylist *keys, unsigned flags)
+static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
+                                 struct btree_path *path, struct btree *b,
+                                 struct keylist *keys, unsigned flags)
 {
        struct bch_fs *c = as->c;
        int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
        int old_live_u64s = b->nr.live_u64s;
        int live_u64s_added, u64s_added;
+       int ret;
 
        lockdep_assert_held(&c->gc_lock);
-       BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level));
+       BUG_ON(!btree_node_intent_locked(path, b->c.level));
        BUG_ON(!b->c.level);
        BUG_ON(!as || as->b);
        bch2_verify_keylist_sorted(keys);
 
-       bch2_btree_node_lock_for_insert(trans, path, b);
+       if (!(local_clock() & 63))
+               return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
+
+       ret = bch2_btree_node_lock_write(trans, path, &b->c);
+       if (ret)
+               return ret;
+
+       bch2_btree_node_prep_for_write(trans, path, b);
 
        if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
                bch2_btree_node_unlock_write(trans, path, b);
@@ -1675,9 +1736,16 @@ static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *
        bch2_btree_node_unlock_write(trans, path, b);
 
        btree_node_interior_verify(c, b);
-       return;
+       return 0;
 split:
-       btree_split(as, trans, path, b, keys, flags);
+       /*
+        * We could attempt to avoid the transaction restart, by calling
+        * bch2_btree_path_upgrade() and allocating more nodes:
+        */
+       if (b->c.level >= as->update_level)
+               return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
+
+       return btree_split(as, trans, path, b, keys, flags);
 }
 
 int bch2_btree_split_leaf(struct btree_trans *trans,
@@ -1694,10 +1762,15 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
        if (IS_ERR(as))
                return PTR_ERR(as);
 
-       btree_split(as, trans, path, b, NULL, flags);
+       ret = btree_split(as, trans, path, b, NULL, flags);
+       if (ret) {
+               bch2_btree_update_free(as, trans);
+               return ret;
+       }
+
        bch2_btree_update_done(as, trans);
 
-       for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
+       for (l = path->level + 1; btree_node_intent_locked(path, l) && !ret; l++)
                ret = bch2_foreground_maybe_merge(trans, path, l, flags);
 
        return ret;
@@ -1823,8 +1896,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        btree_set_min(n, prev->data->min_key);
        btree_set_max(n, next->data->max_key);
 
-       bch2_btree_update_add_new_node(as, n);
-
        n->data->format  = new_f;
        btree_node_set_format(n, new_f);
 
@@ -1834,13 +1905,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        bch2_btree_build_aux_trees(n);
        six_unlock_write(&n->c.lock);
 
+       bch2_btree_update_add_new_node(as, n);
+
        new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
        six_lock_increment(&n->c.lock, SIX_LOCK_intent);
        mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
        bch2_btree_path_level_init(trans, new_path, n);
 
-       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-
        bkey_init(&delete.k);
        delete.k.p = prev->key.k.p;
        bch2_keylist_add(&as->parent_keys, &delete);
@@ -1848,11 +1919,14 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
 
        bch2_trans_verify_paths(trans);
 
-       bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+       ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+       if (ret)
+               goto err_free_update;
 
        bch2_trans_verify_paths(trans);
 
        bch2_btree_update_get_open_buckets(as, n);
+       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
        bch2_btree_node_free_inmem(trans, path, b);
        bch2_btree_node_free_inmem(trans, sib_path, m);
@@ -1873,6 +1947,10 @@ err:
        bch2_path_put(trans, sib_path, true);
        bch2_trans_verify_locks(trans);
        return ret;
+err_free_update:
+       bch2_btree_node_free_never_used(as, trans, n);
+       bch2_btree_update_free(as, trans);
+       goto out;
 }
 
 /**
@@ -1913,17 +1991,18 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 
        trace_and_count(c, btree_node_rewrite, c, b);
 
-       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-
        if (parent) {
                bch2_keylist_add(&as->parent_keys, &n->key);
-               bch2_btree_insert_node(as, trans, iter->path, parent,
-                                      &as->parent_keys, flags);
+               ret = bch2_btree_insert_node(as, trans, iter->path, parent,
+                                            &as->parent_keys, flags);
+               if (ret)
+                       goto err;
        } else {
                bch2_btree_set_root(as, trans, iter->path, n);
        }
 
        bch2_btree_update_get_open_buckets(as, n);
+       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
        bch2_btree_node_free_inmem(trans, iter->path, b);
 
@@ -1931,10 +2010,15 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
        six_unlock_intent(&n->c.lock);
 
        bch2_btree_update_done(as, trans);
-       bch2_path_put(trans, new_path, true);
 out:
+       if (new_path)
+               bch2_path_put(trans, new_path, true);
        bch2_btree_path_downgrade(trans, iter->path);
        return ret;
+err:
+       bch2_btree_node_free_never_used(as, trans, n);
+       bch2_btree_update_free(as, trans);
+       goto out;
 }
 
 struct async_btree_rewrite {
index 7af810df8348e0ae9107ec737ef55f15acea80f1..dabe815965445484d2a24c7ab801d7bf0e19049a 100644 (file)
@@ -52,6 +52,7 @@ struct btree_update {
        unsigned                        took_gc_lock:1;
 
        enum btree_id                   btree_id;
+       unsigned                        update_level;
 
        struct disk_reservation         disk_res;
        struct journal_preres           journal_preres;
index 08d7001f72176f2b21211bcfb2e341432deaa98b..af3fbfcc5fac38ed90355dfbaee2289450f8ecd4 100644 (file)
@@ -56,9 +56,9 @@ static inline bool same_leaf_as_next(struct btree_trans *trans,
                insert_l(&i[0])->b == insert_l(&i[1])->b;
 }
 
-static inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
-                                                 struct btree_path *path,
-                                                 struct btree *b)
+inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
+                                          struct btree_path *path,
+                                          struct btree *b)
 {
        struct bch_fs *c = trans->c;
 
@@ -77,14 +77,6 @@ static inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
                bch2_btree_init_next(trans, b);
 }
 
-void bch2_btree_node_lock_for_insert(struct btree_trans *trans,
-                                    struct btree_path *path,
-                                    struct btree *b)
-{
-       bch2_btree_node_lock_write_nofail(trans, path, &b->c);
-       bch2_btree_node_prep_for_write(trans, path, b);
-}
-
 /* Inserting into a given leaf node (last stage of insert): */
 
 /* Handle overwrites and do insert, for non extents: */
@@ -1631,7 +1623,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
        int ret = 0;
 
        bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
-       while ((k = bch2_btree_iter_peek(&iter)).k) {
+       while ((k = bch2_btree_iter_peek_upto(&iter, bpos_predecessor(end))).k) {
                struct disk_reservation disk_res =
                        bch2_disk_reservation_init(trans->c, 0);
                struct bkey_i delete;
@@ -1640,9 +1632,6 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
                if (ret)
                        goto err;
 
-               if (bkey_cmp(iter.pos, end) >= 0)
-                       break;
-
                bkey_init(&delete.k);
 
                /*
index 3102166d38c22ab297ed1f085f8f2ed617a9e774..5ef35e3be7d64f0d91aac88c4629d94e6d6166a1 100644 (file)
@@ -328,8 +328,9 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
 
        i = 0;
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-               if (p.ptr.cached)
-                       m->data_opts.rewrite_ptrs &= ~(1U << i);
+               if (((1U << i) & m->data_opts.rewrite_ptrs) &&
+                   p.ptr.cached)
+                       BUG();
 
                if (!((1U << i) & m->data_opts.rewrite_ptrs))
                        bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
@@ -365,5 +366,23 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
 
        m->op.nr_replicas = m->op.nr_replicas_required =
                hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas;
+
+       BUG_ON(!m->op.nr_replicas);
        return 0;
 }
+
+void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const struct bch_extent_ptr *ptr;
+       unsigned i = 0;
+
+       bkey_for_each_ptr(ptrs, ptr) {
+               if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) {
+                       opts->kill_ptrs |= 1U << i;
+                       opts->rewrite_ptrs ^= 1U << i;
+               }
+
+               i++;
+       }
+}
index e64505453a550d7e20f6e61ba418ba71bb3109ca..6793aa577cd207f55af29fb3dba8f9f954748002 100644 (file)
@@ -10,6 +10,7 @@ struct moving_context;
 
 struct data_update_opts {
        unsigned        rewrite_ptrs;
+       unsigned        kill_ptrs;
        u16             target;
        u8              extra_replicas;
        unsigned        btree_insert_flags;
@@ -34,5 +35,6 @@ int bch2_data_update_init(struct bch_fs *, struct data_update *,
                          struct write_point_specifier,
                          struct bch_io_opts, struct data_update_opts,
                          enum btree_id, struct bkey_s_c);
+void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *);
 
 #endif /* _BCACHEFS_DATA_UPDATE_H */
index 1d2a1615507392957d1337ed88a65922affdcf24..d87131f58c80fa14990c5d14e2034e20b3ee5b0d 100644 (file)
@@ -501,26 +501,6 @@ static const struct file_operations cached_btree_nodes_ops = {
        .read           = bch2_cached_btree_nodes_read,
 };
 
-static int prt_backtrace(struct printbuf *out, struct task_struct *task)
-{
-       unsigned long entries[32];
-       unsigned i, nr_entries;
-       int ret;
-
-       ret = down_read_killable(&task->signal->exec_update_lock);
-       if (ret)
-               return ret;
-
-       nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0);
-       for (i = 0; i < nr_entries; i++) {
-               prt_printf(out, "[<0>] %pB", (void *)entries[i]);
-               prt_newline(out);
-       }
-
-       up_read(&task->signal->exec_update_lock);
-       return 0;
-}
-
 static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
                                            size_t size, loff_t *ppos)
 {
@@ -547,7 +527,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
                prt_printf(&i->buf, "backtrace:");
                prt_newline(&i->buf);
                printbuf_indent_add(&i->buf, 2);
-               prt_backtrace(&i->buf, trans->locking_wait.task);
+               bch2_prt_backtrace(&i->buf, trans->locking_wait.task);
                printbuf_indent_sub(&i->buf, 2);
                prt_newline(&i->buf);
 
index f902da01a817ff7e4398cb95ad8593f9917b409e..d3fa2d7ae1a2a860438563dfed81123300f2022c 100644 (file)
@@ -1403,10 +1403,8 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
        int ret;
 
        idx = get_existing_stripe(c, h);
-       if (idx < 0) {
-               bch_err(c, "failed to find an existing stripe");
+       if (idx < 0)
                return -BCH_ERR_ENOSPC_stripe_reuse;
-       }
 
        h->s->have_existing_stripe = true;
        ret = get_stripe_key(c, idx, &h->s->existing_stripe);
@@ -1444,21 +1442,9 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
 static int __bch2_ec_stripe_head_reserve(struct bch_fs *c,
                                                        struct ec_stripe_head *h)
 {
-       int ret;
-
-       ret = bch2_disk_reservation_get(c, &h->s->res,
-                       h->blocksize,
-                       h->s->nr_parity, 0);
-
-       if (ret) {
-               /*
-                * This means we need to wait for copygc to
-                * empty out buckets from existing stripes:
-                */
-               bch_err_ratelimited(c, "failed to reserve stripe: %s", bch2_err_str(ret));
-       }
-
-       return ret;
+       return bch2_disk_reservation_get(c, &h->s->res,
+                                        h->blocksize,
+                                        h->s->nr_parity, 0);
 }
 
 struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
@@ -1500,8 +1486,10 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                ret = __bch2_ec_stripe_head_reserve(c, h);
        if (ret && needs_stripe_new)
                ret = __bch2_ec_stripe_head_reuse(c, h);
-       if (ret)
+       if (ret) {
+               bch_err_ratelimited(c, "failed to get stripe: %s", bch2_err_str(ret));
                goto err;
+       }
 
        if (!h->s->allocated) {
                ret = new_stripe_alloc_buckets(c, h, cl);
index fc0bb5f8873aa4b184d952912e3d104d52562d7f..9f293040b25384d6823c4ccedc669ae63fbd4b71 100644 (file)
@@ -42,6 +42,7 @@
        x(BCH_ERR_transaction_restart,  transaction_restart_key_cache_raced)    \
        x(BCH_ERR_transaction_restart,  transaction_restart_key_cache_realloced)\
        x(BCH_ERR_transaction_restart,  transaction_restart_journal_preres_get) \
+       x(BCH_ERR_transaction_restart,  transaction_restart_split_race)         \
        x(BCH_ERR_transaction_restart,  transaction_restart_nested)             \
        x(0,                            no_btree_node)                          \
        x(BCH_ERR_no_btree_node,        no_btree_node_relock)                   \
index 7d45f4863469a97587b8c8f632e0548586e3c69e..fdd436865afee525cdf114283ba3eb8cb994edda 100644 (file)
@@ -2208,6 +2208,9 @@ err:
        /* inode->i_dio_count is our ref on inode and thus bch_fs */
        inode_dio_end(&inode->v);
 
+       if (ret < 0)
+               ret = bch2_err_class(ret);
+
        if (!sync) {
                req->ki_complete(req, ret);
                ret = -EIOCBQUEUED;
index e85c3143051c46d376d68befc2df2c2d4fe39fd7..4f4dfaa7bfb7cc30084a63fbfa5049e9f09229c3 100644 (file)
@@ -191,7 +191,52 @@ void bch_move_stats_init(struct bch_move_stats *stats, char *name)
        scnprintf(stats->name, sizeof(stats->name), "%s", name);
 }
 
+static int bch2_extent_drop_ptrs(struct btree_trans *trans,
+                                struct btree_iter *iter,
+                                struct bkey_s_c k,
+                                struct data_update_opts data_opts)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_i *n;
+       int ret;
+
+       n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+       ret = PTR_ERR_OR_ZERO(n);
+       if (ret)
+               return ret;
+
+       bkey_reassemble(n, k);
+
+       while (data_opts.kill_ptrs) {
+               unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+               struct bch_extent_ptr *ptr;
+
+               bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
+               data_opts.kill_ptrs ^= 1U << drop;
+       }
+
+       /*
+        * If the new extent no longer has any pointers, bch2_extent_normalize()
+        * will do the appropriate thing with it (turning it into a
+        * KEY_TYPE_error key, or just a discard if it was a cached extent)
+        */
+       bch2_extent_normalize(c, bkey_i_to_s(n));
+
+       /*
+        * Since we're not inserting through an extent iterator
+        * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+        * we aren't using the extent overwrite path to delete, we're
+        * just using the normal key deletion path:
+        */
+       if (bkey_deleted(&n->k))
+               n->k.size = 0;
+
+       return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+               bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
+}
+
 static int bch2_move_extent(struct btree_trans *trans,
+                           struct btree_iter *iter,
                            struct moving_context *ctxt,
                            struct bch_io_opts io_opts,
                            enum btree_id btree_id,
@@ -206,6 +251,15 @@ static int bch2_move_extent(struct btree_trans *trans,
        unsigned sectors = k.k->size, pages;
        int ret = -ENOMEM;
 
+       bch2_data_update_opts_normalize(k, &data_opts);
+
+       if (!data_opts.rewrite_ptrs &&
+           !data_opts.extra_replicas) {
+               if (data_opts.kill_ptrs)
+                       return bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+               return 0;
+       }
+
        if (!percpu_ref_tryget_live(&c->writes))
                return -EROFS;
 
@@ -447,7 +501,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
                bch2_bkey_buf_reassemble(&sk, c, k);
                k = bkey_i_to_s_c(sk.k);
 
-               ret2 = bch2_move_extent(&trans, ctxt, io_opts,
+               ret2 = bch2_move_extent(&trans, &iter, ctxt, io_opts,
                                        btree_id, k, data_opts);
                if (ret2) {
                        if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
@@ -544,7 +598,7 @@ again:
                        prt_str(&buf, "failed to evacuate bucket ");
                        bch2_bkey_val_to_text(&buf, c, k);
 
-                       bch2_trans_inconsistent(trans, "%s", buf.buf);
+                       bch_err(c, "%s", buf.buf);
                        printbuf_exit(&buf);
                }
        }
@@ -599,11 +653,12 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
 
                        bch2_bkey_buf_reassemble(&sk, c, k);
                        k = bkey_i_to_s_c(sk.k);
-                       bch2_trans_iter_exit(&trans, &iter);
 
                        ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
-                       if (ret)
+                       if (ret) {
+                               bch2_trans_iter_exit(&trans, &iter);
                                continue;
+                       }
 
                        data_opts = _data_opts;
                        data_opts.target        = io_opts.background_target;
@@ -615,8 +670,10 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
                                i++;
                        }
 
-                       ret = bch2_move_extent(&trans, ctxt, io_opts,
+                       ret = bch2_move_extent(&trans, &iter, ctxt, io_opts,
                                               bp.btree_id, k, data_opts);
+                       bch2_trans_iter_exit(&trans, &iter);
+
                        if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
                                continue;
                        if (ret == -ENOMEM) {
index a824e16079d51ead3f48e60168cf7e10a5bd7b1b..9df08289a004069845468805bfddaa861001514a 100644 (file)
@@ -1325,19 +1325,11 @@ static bool bch2_fs_may_start(struct bch_fs *c)
 
 static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
 {
-       /*
-        * Device going read only means the copygc reserve get smaller, so we
-        * don't want that happening while copygc is in progress:
-        */
-       bch2_copygc_stop(c);
-
        /*
         * The allocator thread itself allocates btree nodes, so stop it first:
         */
        bch2_dev_allocator_remove(c, ca);
        bch2_dev_journal_stop(&c->journal, ca);
-
-       bch2_copygc_start(c);
 }
 
 static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
index 81befc433aeb7b6747f64e22dde164f7595ec910..d19193508c6dc3cdfdaded288a805c203357fd2e 100644 (file)
@@ -296,6 +296,26 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
        console_unlock();
 }
 
+int bch2_prt_backtrace(struct printbuf *out, struct task_struct *task)
+{
+       unsigned long entries[32];
+       unsigned i, nr_entries;
+       int ret;
+
+       ret = down_read_killable(&task->signal->exec_update_lock);
+       if (ret)
+               return ret;
+
+       nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0);
+       for (i = 0; i < nr_entries; i++) {
+               prt_printf(out, "[<0>] %pB", (void *)entries[i]);
+               prt_newline(out);
+       }
+
+       up_read(&task->signal->exec_update_lock);
+       return 0;
+}
+
 /* time stats: */
 
 static void bch2_time_stats_update_one(struct time_stats *stats,
index aa8b416a919a16af32c1a40569421aa3a3b8841d..a7f68e1755aa26d60fa3d98da2bc08d2d5a31ac4 100644 (file)
@@ -356,6 +356,7 @@ u64 bch2_read_flag_list(char *, const char * const[]);
 void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
 
 void bch2_print_string_as_lines(const char *prefix, const char *lines);
+int bch2_prt_backtrace(struct printbuf *, struct task_struct *);
 
 #define NR_QUANTILES   15
 #define QUANTILE_IDX(i)        inorder_to_eytzinger0(i, NR_QUANTILES)
index 41bfca2f8d522b111fb967f27d14cb1f24d2385f..3c7bdb81dff57981a44ad8b1f42347892ef1b8c1 100644 (file)
@@ -71,8 +71,10 @@ struct task_struct *kthread_create(int (*thread_fn)(void *data),
        p->thread_fn    = thread_fn;
        p->thread_data  = thread_data;
        p->state        = TASK_UNINTERRUPTIBLE;
+       p->signal       = &p->_signal;
        atomic_set(&p->usage, 1);
        init_completion(&p->exited);
+       init_rwsem(&p->_signal.exec_update_lock);
 
        pthread_attr_t attr;
        pthread_attr_init(&attr);
index 13f0c4b979c968e89337a1d84cf212c96ec2e77a..25cdfbb64a2c7d9031eec07ac1873b8c150f1860 100644 (file)
@@ -2,6 +2,7 @@
 #include <stdio.h>
 
 #include <linux/list.h>
+#include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/shrinker.h>
 
@@ -39,30 +40,29 @@ static u64 parse_meminfo_line(const char *line)
        return v << 10;
 }
 
-static struct meminfo read_meminfo(void)
+void si_meminfo(struct sysinfo *val)
 {
-       struct meminfo ret = { 0 };
        size_t len, n = 0;
        char *line = NULL;
        const char *v;
        FILE *f;
 
+       memset(val, 0, sizeof(*val));
+
        f = fopen("/proc/meminfo", "r");
        if (!f)
-               return ret;
+               return;
 
        while ((len = getline(&line, &n, f)) != -1) {
                if ((v = strcmp_prefix(line, "MemTotal:")))
-                       ret.total = parse_meminfo_line(v);
+                       val->totalram = parse_meminfo_line(v);
 
                if ((v = strcmp_prefix(line, "MemAvailable:")))
-                       ret.available = parse_meminfo_line(v);
+                       val->freeram = parse_meminfo_line(v);
        }
 
        fclose(f);
        free(line);
-
-       return ret;
 }
 
 static void run_shrinkers_allocation_failed(gfp_t gfp_mask)
@@ -85,7 +85,7 @@ static void run_shrinkers_allocation_failed(gfp_t gfp_mask)
 void run_shrinkers(gfp_t gfp_mask, bool allocation_failed)
 {
        struct shrinker *shrinker;
-       struct meminfo info;
+       struct sysinfo info;
        s64 want_shrink;
 
        /* Fast out if there are no shrinkers to run. */
@@ -97,10 +97,10 @@ void run_shrinkers(gfp_t gfp_mask, bool allocation_failed)
                return;
        }
 
-       info = read_meminfo();
+       si_meminfo(&info);
 
-       if (info.total && info.available) {
-               want_shrink = (info.total >> 2) - info.available;
+       if (info.totalram && info.freeram) {
+               want_shrink = (info.totalram >> 2) - info.freeram;
 
                if (want_shrink <= 0)
                        return;