Update bcachefs sources to e1f6739c4a bcachefs: Fix another iterator counting bug

author Kent Overstreet <kent.overstreet@gmail.com>

Fri, 15 May 2020 01:46:09 +0000 (21:46 -0400)

committer Kent Overstreet <kent.overstreet@gmail.com>

Fri, 15 May 2020 01:48:52 +0000 (21:48 -0400)
author Kent Overstreet <kent.overstreet@gmail.com>
Fri, 15 May 2020 01:46:09 +0000 (21:46 -0400)
committer Kent Overstreet <kent.overstreet@gmail.com>
Fri, 15 May 2020 01:48:52 +0000 (21:48 -0400)
diff --git a/.bcachefs_revision b/.bcachefs_revision

index 87373ae5368823c04bd562ffb4b490e11357a0bc..90fe044168f6f89c03457320b472d67333e7278e 100644 (file)
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-a27d7265e75f6d65c2b972ce4ac27abfc153c230
+e1f6739c4a9fee1db7d94a5087a253041542cb62
diff --git a/Makefile b/Makefile

index 0721fd359b97084b8aef57ba7b07a63c5f690018..9c762cbe8ac4d9828f16e5c018fe3bfe7c1bd2dd 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -143,10 +143,16 @@ update-bcachefs-sources:
         git rm -rf --ignore-unmatch libbcachefs
         test -d libbcachefs || mkdir libbcachefs
         cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/
+       git add libbcachefs/*.[ch]
         cp $(LINUX_DIR)/include/trace/events/bcachefs.h include/trace/events/
+       git add include/trace/events/bcachefs.h
+       cp $(LINUX_DIR)/kernel/locking/six.c linux/
+       git add linux/six.c
+       cp $(LINUX_DIR)/include/linux/six.h include/linux/
+       git add include/linux/six.h
         $(RM) libbcachefs/*.mod.c
         git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision
-       git add libbcachefs/*.[ch] include/trace/events/bcachefs.h .bcachefs_revision
+       git add .bcachefs_revision
  
  .PHONE: update-commit-bcachefs-sources
  update-commit-bcachefs-sources: update-bcachefs-sources
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h

index d95d8da374ab20a1ff68ebebd24ea20e81de834b..1a7f024e8f5c9de52e3c794d67c0b0f6ba33e907 100644 (file)
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -5,7 +5,7 @@ struct lock_class_key {};
  struct task_struct;
  
  # define lock_acquire(l, s, t, r, c, n, i)     do { } while (0)
-# define lock_release(l, n, i)                 do { } while (0)
+# define lock_release(l, i)                    do { } while (0)
  # define lock_set_class(l, n, k, s, i)         do { } while (0)
  # define lock_set_subclass(l, s, i)            do { } while (0)
  # define lockdep_set_current_reclaim_state(g)  do { } while (0)
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h

index d7e898b02491704900398e0b519db902235c97d8..01a9cc736cab86cb7ab5f316d9302bca715a75df 100644 (file)
--- a/include/trace/events/bcachefs.h
+++ b/include/trace/events/bcachefs.h
@@ -499,6 +499,23 @@ TRACE_EVENT(copygc,
                 __entry->buckets_moved, __entry->buckets_not_moved)
  );
  
+TRACE_EVENT(transaction_restart_ip,
+       TP_PROTO(unsigned long caller, unsigned long ip),
+       TP_ARGS(caller, ip),
+
+       TP_STRUCT__entry(
+               __field(unsigned long,          caller  )
+               __field(unsigned long,          ip      )
+       ),
+
+       TP_fast_assign(
+               __entry->caller = caller;
+               __entry->ip     = ip;
+       ),
+
+       TP_printk("%pF %pF", (void *) __entry->caller, (void *) __entry->ip)
+);
+
  DECLARE_EVENT_CLASS(transaction_restart,
         TP_PROTO(unsigned long ip),
         TP_ARGS(ip),
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c

index b2d1b8f9c9b8e86d477fe2b219ad77aa854d3ff8..c37945189c687458db07d6ab9fdb9de0e86c69c7 100644 (file)
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -315,7 +315,9 @@ retry:
         bch2_trans_update(trans, iter, &a->k_i,
                           BTREE_TRIGGER_NORUN);
         ret = bch2_trans_commit(trans, NULL, NULL,
-                               BTREE_INSERT_NOFAIL|flags);
+                               BTREE_INSERT_NOFAIL|
+                               BTREE_INSERT_USE_RESERVE|
+                               flags);
  err:
         if (ret == -EINTR)
                 goto retry;
@@ -1033,7 +1035,16 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
                 set_current_state(TASK_INTERRUPTIBLE);
  
                 spin_lock(&c->freelist_lock);
-               for (i = 0; i < RESERVE_NR; i++)
+               for (i = 0; i < RESERVE_NR; i++) {
+
+                       /*
+                        * Don't strand buckets on the copygc freelist until
+                        * after recovery is finished:
+                        */
+                       if (!test_bit(BCH_FS_STARTED, &c->flags) &&
+                           i == RESERVE_MOVINGGC)
+                               continue;
+
                         if (fifo_push(&ca->free[i], bucket)) {
                                 fifo_pop(&ca->free_inc, bucket);
  
@@ -1043,6 +1054,7 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
                                 spin_unlock(&c->freelist_lock);
                                 goto out;
                         }
+               }
  
                 if (ca->allocator_state != ALLOCATOR_BLOCKED_FULL) {
                         ca->allocator_state = ALLOCATOR_BLOCKED_FULL;
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h

index d9c09b4feac48b50f02a26987422d3332a58ce54..fa9593764f0c01d52505c13e5a2d68e7dd8ab6ef 100644 (file)
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -179,7 +179,6 @@
  #undef pr_fmt
  #define pr_fmt(fmt) "bcachefs: %s() " fmt "\n", __func__
  
-#include <linux/stddef.h>
  #include <linux/bug.h>
  #include <linux/bio.h>
  #include <linux/closure.h>
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h

index 0772c5864d88c21ffc09ac1a5f24c1320f00a015..616863ef77d4f51a4c8339678710cb92668ed7cd 100644 (file)
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -72,7 +72,6 @@
   * inode number, 64 bit offset, 96 bit version field, etc.) for negligible cost.
   */
  
-#include <linux/stddef.h>
  #include <asm/types.h>
  #include <asm/byteorder.h>
  #include <linux/kernel.h>
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c

index c97e1e9002cb6efa42e817d4da532644098ddcd7..55ef4032b37c159ee9b78fc2b2288a6a7338608a 100644 (file)
--- a/libbcachefs/bkey_methods.c
+++ b/libbcachefs/bkey_methods.c
@@ -283,49 +283,64 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
         const struct bkey_ops *ops;
         struct bkey uk;
         struct bkey_s u;
-
-       if (big_endian != CPU_BIG_ENDIAN)
-               bch2_bkey_swab_key(f, k);
-
-       if (version < bcachefs_metadata_version_bkey_renumber)
-               bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
-
-       if (version < bcachefs_metadata_version_inode_btree_change &&
-           btree_id == BTREE_ID_INODES) {
+       int i;
+
+       /*
+        * Do these operations in reverse order in the write path:
+        */
+
+       for (i = 0; i < 4; i++)
+       switch (!write ? i : 3 - i) {
+       case 0:
+               if (big_endian != CPU_BIG_ENDIAN)
+                       bch2_bkey_swab_key(f, k);
+               break;
+       case 1:
+               if (version < bcachefs_metadata_version_bkey_renumber)
+                       bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write);
+               break;
+       case 2:
+               if (version < bcachefs_metadata_version_inode_btree_change &&
+                   btree_id == BTREE_ID_INODES) {
+                       if (!bkey_packed(k)) {
+                               struct bkey_i *u = packed_to_bkey(k);
+                               swap(u->k.p.inode, u->k.p.offset);
+                       } else if (f->bits_per_field[BKEY_FIELD_INODE] &&
+                                  f->bits_per_field[BKEY_FIELD_OFFSET]) {
+                               struct bkey_format tmp = *f, *in = f, *out = &tmp;
+
+                               swap(tmp.bits_per_field[BKEY_FIELD_INODE],
+                                    tmp.bits_per_field[BKEY_FIELD_OFFSET]);
+                               swap(tmp.field_offset[BKEY_FIELD_INODE],
+                                    tmp.field_offset[BKEY_FIELD_OFFSET]);
+
+                               if (!write)
+                                       swap(in, out);
+
+                               uk = __bch2_bkey_unpack_key(in, k);
+                               swap(uk.p.inode, uk.p.offset);
+                               BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
+                       }
+               }
+               break;
+       case 3:
                 if (!bkey_packed(k)) {
-                       struct bkey_i *u = packed_to_bkey(k);
-                       swap(u->k.p.inode, u->k.p.offset);
-               } else if (f->bits_per_field[BKEY_FIELD_INODE] &&
-                          f->bits_per_field[BKEY_FIELD_OFFSET]) {
-                       struct bkey_format tmp = *f, *in = f, *out = &tmp;
-
-                       swap(tmp.bits_per_field[BKEY_FIELD_INODE],
-                            tmp.bits_per_field[BKEY_FIELD_OFFSET]);
-                       swap(tmp.field_offset[BKEY_FIELD_INODE],
-                            tmp.field_offset[BKEY_FIELD_OFFSET]);
-
-                       if (!write)
-                               swap(in, out);
-
-                       uk = __bch2_bkey_unpack_key(in, k);
-                       swap(uk.p.inode, uk.p.offset);
-                       BUG_ON(!bch2_bkey_pack_key(k, &uk, out));
+                       u = bkey_i_to_s(packed_to_bkey(k));
+               } else {
+                       uk = __bch2_bkey_unpack_key(f, k);
+                       u.k = &uk;
+                       u.v = bkeyp_val(f, k);
                 }
-       }
  
-       if (!bkey_packed(k)) {
-               u = bkey_i_to_s(packed_to_bkey(k));
-       } else {
-               uk = __bch2_bkey_unpack_key(f, k);
-               u.k = &uk;
-               u.v = bkeyp_val(f, k);
-       }
+               if (big_endian != CPU_BIG_ENDIAN)
+                       bch2_bkey_swab_val(u);
  
-       if (big_endian != CPU_BIG_ENDIAN)
-               bch2_bkey_swab_val(u);
+               ops = &bch2_bkey_ops[k->type];
  
-       ops = &bch2_bkey_ops[k->type];
-
-       if (ops->compat)
-               ops->compat(btree_id, version, big_endian, write, u);
+               if (ops->compat)
+                       ops->compat(btree_id, version, big_endian, write, u);
+               break;
+       default:
+               BUG();
+       }
  }
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h

index 132cc95a4c0276b615797fae18a8325990054e3c..98cca30778eacf05ea9330d13f75620b4c10d9de 100644 (file)
--- a/libbcachefs/btree_cache.h
+++ b/libbcachefs/btree_cache.h
@@ -94,7 +94,7 @@ static inline unsigned btree_blocks(struct bch_fs *c)
         return c->opts.btree_node_size >> c->block_bits;
  }
  
-#define BTREE_SPLIT_THRESHOLD(c)               (btree_max_u64s(c) * 3 / 4)
+#define BTREE_SPLIT_THRESHOLD(c)               (btree_max_u64s(c) * 2 / 3)
  
  #define BTREE_FOREGROUND_MERGE_THRESHOLD(c)    (btree_max_u64s(c) * 1 / 3)
  #define BTREE_FOREGROUND_MERGE_HYSTERESIS(c)                   \
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c

index 674a1dac46b937cff4d4c31dc92ea0fea31eb575..146f2428fe04ced98b545b545518d7c14903dc42 100644 (file)
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -699,8 +699,10 @@ static int bch2_gc_start(struct bch_fs *c,
  
         c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64),
                                          sizeof(u64), GFP_KERNEL);
-       if (!c->usage_gc)
+       if (!c->usage_gc) {
+               bch_err(c, "error allocating c->usage_gc");
                 return -ENOMEM;
+       }
  
         for_each_member_device(ca, c, i) {
                 BUG_ON(ca->buckets[1]);
@@ -711,19 +713,23 @@ static int bch2_gc_start(struct bch_fs *c,
                                 GFP_KERNEL|__GFP_ZERO);
                 if (!ca->buckets[1]) {
                         percpu_ref_put(&ca->ref);
+                       bch_err(c, "error allocating ca->buckets[gc]");
                         return -ENOMEM;
                 }
  
                 ca->usage[1] = alloc_percpu(struct bch_dev_usage);
                 if (!ca->usage[1]) {
+                       bch_err(c, "error allocating ca->usage[gc]");
                         percpu_ref_put(&ca->ref);
                         return -ENOMEM;
                 }
         }
  
         ret = bch2_ec_mem_alloc(c, true);
-       if (ret)
+       if (ret) {
+               bch_err(c, "error allocating ec gc mem");
                 return ret;
+       }
  
         percpu_down_write(&c->mark_lock);
  
@@ -933,7 +939,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
                 return;
         }
  
-       as = bch2_btree_update_start(c, iter->btree_id,
+       as = bch2_btree_update_start(iter->trans, iter->btree_id,
                         btree_update_reserve_required(c, parent) + nr_old_nodes,
                         BTREE_INSERT_NOFAIL|
                         BTREE_INSERT_USE_RESERVE,
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c

index ac8b98861aae1f5178db3e16c002ba7775a8bbdd..63063748d4f5f3168698811d5556cb2602a13acc 100644 (file)
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -736,6 +736,17 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
                 struct btree_node *bn =
                         container_of(i, struct btree_node, keys);
                 /* These indicate that we read the wrong btree node: */
+
+               if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+                       struct bch_btree_ptr_v2 *bp =
+                               &bkey_i_to_btree_ptr_v2(&b->key)->v;
+
+                       /* XXX endianness */
+                       btree_err_on(bp->seq != bn->keys.seq,
+                                    BTREE_ERR_MUST_RETRY, c, b, NULL,
+                                    "incorrect sequence number (wrong btree node)");
+               }
+
                 btree_err_on(BTREE_NODE_ID(bn) != b->btree_id,
                              BTREE_ERR_MUST_RETRY, c, b, i,
                              "incorrect btree id");
@@ -1626,6 +1637,11 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
          * reflect that those writes were done and the data flushed from the
          * journal:
          *
+        * Also on journal error, the pending write may have updates that were
+        * never journalled (interior nodes, see btree_update_nodes_written()) -
+        * it's critical that we don't do the write in that case otherwise we
+        * will have updates visible that weren't in the journal:
+        *
          * Make sure to update b->written so bch2_btree_init_next() doesn't
          * break:
          */
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h

index aaad2d289e79d9507d17f56602196331589cd13d..9081d3fc238a7fc47b9395b625c00ee48f30f766 100644 (file)
--- a/libbcachefs/btree_locking.h
+++ b/libbcachefs/btree_locking.h
@@ -165,8 +165,7 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter,
         struct btree_iter *linked;
  
         trans_for_each_iter(iter->trans, linked)
-               if (linked != iter &&
-                   linked->l[level].b == b &&
+               if (linked->l[level].b == b &&
                     btree_node_locked_type(linked, level) >= want) {
                         six_lock_increment(&b->lock, want);
                         return true;
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c

index f6f2517d3e6e8c9aeac3be4a753889d24af40ddf..75b70187a95472564995da9a85499e4bab652a11 100644 (file)
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -586,12 +586,12 @@ static void __bch2_btree_update_free(struct btree_update *as)
         bch2_journal_pin_drop(&c->journal, &as->journal);
         bch2_journal_pin_flush(&c->journal, &as->journal);
  
-       BUG_ON((as->nr_new_nodes || as->nr_pending) &&
-              !bch2_journal_error(&c->journal));;
+       BUG_ON(as->nr_new_nodes || as->nr_pending);
  
         if (as->reserve)
                 bch2_btree_reserve_put(c, as->reserve);
  
+       list_del(&as->unwritten_list);
         list_del(&as->list);
  
         closure_debug_destroy(&as->cl);
@@ -609,37 +609,28 @@ static void bch2_btree_update_free(struct btree_update *as)
         mutex_unlock(&c->btree_interior_update_lock);
  }
  
-static void btree_update_nodes_reachable(struct btree_update *as, u64 seq)
+static inline bool six_trylock_intentwrite(struct six_lock *lock)
  {
-       struct bch_fs *c = as->c;
-
-       while (as->nr_new_nodes) {
-               struct btree *b = as->new_nodes[--as->nr_new_nodes];
+       if (!six_trylock_intent(lock))
+               return false;
  
-               BUG_ON(b->will_make_reachable != (unsigned long) as);
-               b->will_make_reachable = 0;
-
-               /*
-                * b->will_make_reachable prevented it from being written, so
-                * write it now if it needs to be written:
-                */
-               btree_node_lock_type(c, b, SIX_LOCK_read);
-               bch2_btree_node_write_cond(c, b, btree_node_need_write(b));
-               six_unlock_read(&b->lock);
+       if (!six_trylock_write(lock)) {
+               six_unlock_intent(lock);
+               return false;
         }
  
-       while (as->nr_pending)
-               bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending],
-                                           seq);
+       return true;
  }
  
  static void btree_update_nodes_written(struct closure *cl)
  {
         struct btree_update *as = container_of(cl, struct btree_update, cl);
+       struct btree *nodes_need_write[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES + 1];
+       unsigned nr_nodes_need_write;
         struct journal_res res = { 0 };
         struct bch_fs *c = as->c;
+       struct btree_root *r;
         struct btree *b;
-       struct bset *i;
         int ret;
  
         /*
@@ -650,6 +641,7 @@ static void btree_update_nodes_written(struct closure *cl)
         mutex_lock(&c->btree_interior_update_lock);
         as->nodes_written = true;
  again:
+       nr_nodes_need_write = 0;
         as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
                                       struct btree_update, unwritten_list);
         if (!as || !as->nodes_written) {
@@ -658,31 +650,57 @@ again:
         }
  
         b = as->b;
-       if (b && !six_trylock_intent(&b->lock)) {
+       if (b && !six_trylock_intentwrite(&b->lock)) {
                 mutex_unlock(&c->btree_interior_update_lock);
+
                 btree_node_lock_type(c, b, SIX_LOCK_intent);
+               six_lock_write(&b->lock);
+
+               six_unlock_write(&b->lock);
                 six_unlock_intent(&b->lock);
+
                 mutex_lock(&c->btree_interior_update_lock);
                 goto again;
         }
  
-       list_del(&as->unwritten_list);
-
         ret = bch2_journal_res_get(&c->journal, &res, as->journal_u64s,
+                                  JOURNAL_RES_GET_NONBLOCK|
                                    JOURNAL_RES_GET_RESERVED);
-       if (ret) {
-               BUG_ON(!bch2_journal_error(&c->journal));
-               /* can't unblock btree writes */
-               goto free_update;
+       if (ret == -EAGAIN) {
+               unsigned u64s = as->journal_u64s;
+
+               if (b) {
+                       six_unlock_write(&b->lock);
+                       six_unlock_intent(&b->lock);
+               }
+
+               mutex_unlock(&c->btree_interior_update_lock);
+
+               ret = bch2_journal_res_get(&c->journal, &res, u64s,
+                                          JOURNAL_RES_GET_CHECK|
+                                          JOURNAL_RES_GET_RESERVED);
+               if (!ret) {
+                       mutex_lock(&c->btree_interior_update_lock);
+                       goto again;
+               }
         }
  
-       {
+       if (!ret) {
                 struct journal_buf *buf = &c->journal.buf[res.idx];
                 struct jset_entry *entry = vstruct_idx(buf->data, res.offset);
  
                 res.offset      += as->journal_u64s;
                 res.u64s        -= as->journal_u64s;
                 memcpy_u64s(entry, as->journal_entries, as->journal_u64s);
+       } else {
+               /*
+                * On journal error we have to run most of the normal path so
+                * that shutdown works - unblocking btree node writes in
+                * particular and writing them if needed - except for
+                * journalling the update:
+                */
+
+               BUG_ON(!bch2_journal_error(&c->journal));
         }
  
         switch (as->mode) {
@@ -690,26 +708,41 @@ again:
                 BUG();
         case BTREE_INTERIOR_UPDATING_NODE:
                 /* @b is the node we did the final insert into: */
-               BUG_ON(!res.ref);
  
-               six_lock_write(&b->lock);
+               /*
+                * On failure to get a journal reservation, we still have to
+                * unblock the write and allow most of the write path to happen
+                * so that shutdown works, but the i->journal_seq mechanism
+                * won't work to prevent the btree write from being visible (we
+                * didn't get a journal sequence number) - instead
+                * __bch2_btree_node_write() doesn't do the actual write if
+                * we're in journal error state:
+                */
+
                 list_del(&as->write_blocked_list);
  
-               i = btree_bset_last(b);
-               i->journal_seq = cpu_to_le64(
-                       max(res.seq,
-                           le64_to_cpu(i->journal_seq)));
+               if (!ret) {
+                       struct bset *i = btree_bset_last(b);
+
+                       i->journal_seq = cpu_to_le64(
+                               max(res.seq,
+                                   le64_to_cpu(i->journal_seq)));
+
+                       bch2_btree_add_journal_pin(c, b, res.seq);
+               }
+
+               nodes_need_write[nr_nodes_need_write++] = b;
  
-               bch2_btree_add_journal_pin(c, b, res.seq);
                 six_unlock_write(&b->lock);
+               six_unlock_intent(&b->lock);
                 break;
  
         case BTREE_INTERIOR_UPDATING_AS:
                 BUG_ON(b);
                 break;
  
-       case BTREE_INTERIOR_UPDATING_ROOT: {
-               struct btree_root *r = &c->btree_roots[as->btree_id];
+       case BTREE_INTERIOR_UPDATING_ROOT:
+               r = &c->btree_roots[as->btree_id];
  
                 BUG_ON(b);
  
@@ -721,25 +754,24 @@ again:
                 mutex_unlock(&c->btree_root_lock);
                 break;
         }
-       }
  
         bch2_journal_pin_drop(&c->journal, &as->journal);
  
         bch2_journal_res_put(&c->journal, &res);
         bch2_journal_preres_put(&c->journal, &as->journal_preres);
-free_update:
-       /* Do btree write after dropping journal res: */
-       if (b) {
-               /*
-                * b->write_blocked prevented it from being written, so
-                * write it now if it needs to be written:
-                */
-               btree_node_write_if_need(c, b, SIX_LOCK_intent);
-               six_unlock_intent(&b->lock);
+
+       while (as->nr_new_nodes) {
+               b = as->new_nodes[--as->nr_new_nodes];
+
+               BUG_ON(b->will_make_reachable != (unsigned long) as);
+               b->will_make_reachable = 0;
+
+               nodes_need_write[nr_nodes_need_write++] = b;
         }
  
-       if (!ret)
-               btree_update_nodes_reachable(as, res.seq);
+       while (as->nr_pending)
+               bch2_btree_node_free_ondisk(c,
+                       &as->pending[--as->nr_pending], res.seq);
  
         __bch2_btree_update_free(as);
         /*
@@ -747,6 +779,22 @@ free_update:
          * nodes to be writeable:
          */
         closure_wake_up(&c->btree_interior_update_wait);
+
+       /*
+        * Can't take btree node locks while holding btree_interior_update_lock:
+        * */
+       mutex_unlock(&c->btree_interior_update_lock);
+
+       /* Do btree writes after dropping journal res/locks: */
+       while (nr_nodes_need_write) {
+               b = nodes_need_write[--nr_nodes_need_write];
+
+               btree_node_lock_type(c, b, SIX_LOCK_read);
+               bch2_btree_node_write_cond(c, b, btree_node_need_write(b));
+               six_unlock_read(&b->lock);
+       }
+
+       mutex_lock(&c->btree_interior_update_lock);
         goto again;
  }
  
@@ -949,17 +997,41 @@ void bch2_btree_update_done(struct btree_update *as)
  }
  
  struct btree_update *
-bch2_btree_update_start(struct bch_fs *c, enum btree_id id,
+bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
                         unsigned nr_nodes, unsigned flags,
                         struct closure *cl)
  {
+       struct bch_fs *c = trans->c;
+       struct journal_preres journal_preres = { 0 };
         struct btree_reserve *reserve;
         struct btree_update *as;
         int ret;
  
+       ret = bch2_journal_preres_get(&c->journal, &journal_preres,
+                                     BTREE_UPDATE_JOURNAL_RES,
+                                     JOURNAL_RES_GET_NONBLOCK);
+       if (ret == -EAGAIN) {
+               if (flags & BTREE_INSERT_NOUNLOCK)
+                       return ERR_PTR(-EINTR);
+
+               bch2_trans_unlock(trans);
+
+               ret = bch2_journal_preres_get(&c->journal, &journal_preres,
+                                             BTREE_UPDATE_JOURNAL_RES, 0);
+               if (ret)
+                       return ERR_PTR(ret);
+
+               if (!bch2_trans_relock(trans)) {
+                       bch2_journal_preres_put(&c->journal, &journal_preres);
+                       return ERR_PTR(-EINTR);
+               }
+       }
+
         reserve = bch2_btree_reserve_get(c, nr_nodes, flags, cl);
-       if (IS_ERR(reserve))
+       if (IS_ERR(reserve)) {
+               bch2_journal_preres_put(&c->journal, &journal_preres);
                 return ERR_CAST(reserve);
+       }
  
         as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
         memset(as, 0, sizeof(*as));
@@ -969,18 +1041,11 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id,
         as->btree_id    = id;
         as->reserve     = reserve;
         INIT_LIST_HEAD(&as->write_blocked_list);
+       INIT_LIST_HEAD(&as->unwritten_list);
+       as->journal_preres = journal_preres;
  
         bch2_keylist_init(&as->parent_keys, as->inline_keys);
  
-       ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-                                     ARRAY_SIZE(as->journal_entries), 0);
-       if (ret) {
-               bch2_btree_reserve_put(c, reserve);
-               closure_debug_destroy(&as->cl);
-               mempool_free(as, &c->btree_interior_update_pool);
-               return ERR_PTR(ret);
-       }
-
         mutex_lock(&c->btree_interior_update_lock);
         list_add_tail(&as->list, &c->btree_interior_update_list);
         mutex_unlock(&c->btree_interior_update_lock);
@@ -1531,8 +1596,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
         /* Hack, because gc and splitting nodes doesn't mix yet: */
         if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
             !down_read_trylock(&c->gc_lock)) {
-               if (flags & BTREE_INSERT_NOUNLOCK)
+               if (flags & BTREE_INSERT_NOUNLOCK) {
+                       trace_transaction_restart_ip(trans->ip, _THIS_IP_);
                         return -EINTR;
+               }
  
                 bch2_trans_unlock(trans);
                 down_read(&c->gc_lock);
@@ -1551,7 +1618,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
                 goto out;
         }
  
-       as = bch2_btree_update_start(c, iter->btree_id,
+       as = bch2_btree_update_start(trans, iter->btree_id,
                 btree_update_reserve_required(c, b), flags,
                 !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
         if (IS_ERR(as)) {
@@ -1560,6 +1627,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
                         BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
                         bch2_trans_unlock(trans);
                         ret = -EINTR;
+
+                       trace_transaction_restart_ip(trans->ip, _THIS_IP_);
                 }
                 goto out;
         }
@@ -1663,8 +1732,9 @@ retry:
                 goto err_unlock;
         }
  
-       as = bch2_btree_update_start(c, iter->btree_id,
+       as = bch2_btree_update_start(trans, iter->btree_id,
                          btree_update_reserve_required(c, parent) + 1,
+                        flags|
                          BTREE_INSERT_NOFAIL|
                          BTREE_INSERT_USE_RESERVE,
                          !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
@@ -1776,7 +1846,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
         struct btree *n, *parent = btree_node_parent(iter, b);
         struct btree_update *as;
  
-       as = bch2_btree_update_start(c, iter->btree_id,
+       as = bch2_btree_update_start(iter->trans, iter->btree_id,
                 (parent
                  ? btree_update_reserve_required(c, parent)
                  : 0) + 1,
@@ -2043,7 +2113,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
                 new_hash = bch2_btree_node_mem_alloc(c);
         }
  
-       as = bch2_btree_update_start(c, iter->btree_id,
+       as = bch2_btree_update_start(iter->trans, iter->btree_id,
                 parent ? btree_update_reserve_required(c, parent) : 0,
                 BTREE_INSERT_NOFAIL|
                 BTREE_INSERT_USE_RESERVE|
diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h

index aef8adf8c0321238ae91d81cfcda1de506da9fdd..2fddf5d31eb9a684d7d6921648205c9ddfc77279 100644 (file)
--- a/libbcachefs/btree_update_interior.h
+++ b/libbcachefs/btree_update_interior.h
@@ -32,6 +32,9 @@ struct pending_btree_node_free {
         __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
  };
  
+#define BTREE_UPDATE_JOURNAL_RES               \
+       ((BKEY_BTREE_PTR_U64s_MAX + 1) * (BTREE_MAX_DEPTH - 1) * 2)
+
  /*
   * Tracks an in progress split/rewrite of a btree node and the update to the
   * parent node:
@@ -105,8 +108,7 @@ struct btree_update {
         unsigned                        nr_new_nodes;
  
         unsigned                        journal_u64s;
-       u64                             journal_entries[
-               (BKEY_BTREE_PTR_U64s_MAX + 1) * (BTREE_MAX_DEPTH - 1) * 2];
+       u64                             journal_entries[BTREE_UPDATE_JOURNAL_RES];
  
         /* Only here to reduce stack usage on recursive splits: */
         struct keylist                  parent_keys;
@@ -132,7 +134,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
  
  void bch2_btree_update_done(struct btree_update *);
  struct btree_update *
-bch2_btree_update_start(struct bch_fs *, enum btree_id, unsigned,
+bch2_btree_update_start(struct btree_trans *, enum btree_id, unsigned,
                         unsigned, struct closure *);
  
  void bch2_btree_interior_update_will_free_node(struct btree_update *,
diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c

index 6f1afa4a31199fb11ed7135c10d6c6803500a1d5..3d88719ba86c01760802c481f37273e2cf2b0a42 100644 (file)
--- a/libbcachefs/checksum.c
+++ b/libbcachefs/checksum.c
@@ -13,6 +13,7 @@
  #include <crypto/chacha.h>
  #include <crypto/hash.h>
  #include <crypto/poly1305.h>
+#include <crypto/skcipher.h>
  #include <keys/user-type.h>
  
  static u64 bch2_checksum_init(unsigned type)
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c

index 0713286d7999bbbcbb58fcfedd2e18b51192cfb0..c56938f21baa67f445b0e25174bd636efb3049a9 100644 (file)
--- a/libbcachefs/compress.c
+++ b/libbcachefs/compress.c
@@ -39,6 +39,24 @@ static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
         BUG();
  }
  
+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
+{
+       struct bio_vec bv;
+       struct bvec_iter iter;
+       void *expected_start = NULL;
+
+       __bio_for_each_bvec(bv, bio, iter, start) {
+               if (expected_start &&
+                   expected_start != page_address(bv.bv_page) + bv.bv_offset)
+                       return false;
+
+               expected_start = page_address(bv.bv_page) +
+                       bv.bv_offset + bv.bv_len;
+       }
+
+       return true;
+}
+
  static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
                                        struct bvec_iter start, int rw)
  {
@@ -48,27 +66,28 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
         unsigned nr_pages = 0;
         struct page *stack_pages[16];
         struct page **pages = NULL;
-       bool first = true;
-       unsigned prev_end = PAGE_SIZE;
         void *data;
  
         BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
  
-#ifndef CONFIG_HIGHMEM
-       __bio_for_each_bvec(bv, bio, iter, start) {
-               if (bv.bv_len == start.bi_size)
-                       return (struct bbuf) {
-                               .b = page_address(bv.bv_page) + bv.bv_offset,
-                               .type = BB_NONE, .rw = rw
-                       };
-       }
-#endif
+       if (!IS_ENABLED(CONFIG_HIGHMEM) &&
+           bio_phys_contig(bio, start))
+               return (struct bbuf) {
+                       .b = page_address(bio_iter_page(bio, start)) +
+                               bio_iter_offset(bio, start),
+                       .type = BB_NONE, .rw = rw
+               };
+
+       /* check if we can map the pages contiguously: */
         __bio_for_each_segment(bv, bio, iter, start) {
-               if ((!first && bv.bv_offset) ||
-                   prev_end != PAGE_SIZE)
+               if (iter.bi_size != start.bi_size &&
+                   bv.bv_offset)
+                       goto bounce;
+
+               if (bv.bv_len < iter.bi_size &&
+                   bv.bv_offset + bv.bv_len < PAGE_SIZE)
                         goto bounce;
  
-               prev_end = bv.bv_offset + bv.bv_len;
                 nr_pages++;
         }
  
@@ -172,20 +191,21 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
         }
         case BCH_COMPRESSION_TYPE_zstd: {
                 ZSTD_DCtx *ctx;
-               size_t len;
+               size_t real_src_len = le32_to_cpup(src_data.b);
+
+               if (real_src_len > src_len - 4)
+                       goto err;
  
                 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
                 ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
  
-               src_len = le32_to_cpup(src_data.b);
-
-               len = ZSTD_decompressDCtx(ctx,
+               ret = ZSTD_decompressDCtx(ctx,
                                 dst_data,       dst_len,
-                               src_data.b + 4, src_len);
+                               src_data.b + 4, real_src_len);
  
                 mempool_free(workspace, &c->decompress_workspace);
  
-               if (len != dst_len)
+               if (ret != dst_len)
                         goto err;
                 break;
         }
@@ -264,7 +284,8 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
         if (ret)
                 goto err;
  
-       if (dst_data.type != BB_NONE)
+       if (dst_data.type != BB_NONE &&
+           dst_data.type != BB_VMAP)
                 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
  err:
         bio_unmap_or_unbounce(c, dst_data);
@@ -407,7 +428,8 @@ static unsigned __bio_compress(struct bch_fs *c,
         memset(dst_data.b + *dst_len, 0, pad);
         *dst_len += pad;
  
-       if (dst_data.type != BB_NONE)
+       if (dst_data.type != BB_NONE &&
+           dst_data.type != BB_VMAP)
                 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
  
         BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
@@ -512,7 +534,6 @@ void bch2_fs_compress_exit(struct bch_fs *c)
  static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
  {
         size_t max_extent = c->sb.encoded_extent_max << 9;
-       size_t order = get_order(max_extent);
         size_t decompress_workspace_size = 0;
         bool decompress_workspace_needed;
         ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
@@ -547,14 +568,14 @@ have_compressed:
  
         if (!mempool_initialized(&c->compression_bounce[READ])) {
                 ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
-                                                 1, order);
+                                                 1, max_extent);
                 if (ret)
                         goto out;
         }
  
         if (!mempool_initialized(&c->compression_bounce[WRITE])) {
                 ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
-                                                 1, order);
+                                                 1, max_extent);
                 if (ret)
                         goto out;
         }
diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c

index 2a7d913bdda3f053d6df6612f9a37ecc7408a404..fd011df3cb9943b2dd9c2ae0f3a80191263c5b41 100644 (file)
--- a/libbcachefs/extent_update.c
+++ b/libbcachefs/extent_update.c
@@ -34,16 +34,10 @@ static int count_iters_for_insert(struct btree_trans *trans,
                                   unsigned offset,
                                   struct bpos *end,
                                   unsigned *nr_iters,
-                                 unsigned max_iters,
-                                 bool overwrite)
+                                 unsigned max_iters)
  {
-       int ret = 0;
+       int ret = 0, ret2 = 0;
  
-       /*
-        * The extent update path requires an _additional_ iterator for each
-        * extent we're inserting and overwriting:
-        */
-       *nr_iters += 1;
         if (*nr_iters >= max_iters) {
                 *end = bpos_min(*end, k.k->p);
                 ret = 1;
@@ -70,16 +64,20 @@ static int count_iters_for_insert(struct btree_trans *trans,
  
                 for_each_btree_key(trans, iter,
                                    BTREE_ID_REFLINK, POS(0, idx + offset),
-                                  BTREE_ITER_SLOTS, r_k, ret) {
+                                  BTREE_ITER_SLOTS, r_k, ret2) {
                         if (bkey_cmp(bkey_start_pos(r_k.k),
                                      POS(0, idx + sectors)) >= 0)
                                 break;
  
+                       /* extent_update_to_keys(), for the reflink_v update */
+                       *nr_iters += 1;
+
                         *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k);
  
                         if (*nr_iters >= max_iters) {
                                 struct bpos pos = bkey_start_pos(k.k);
-                               pos.offset += r_k.k->p.offset - idx;
+                               pos.offset += min_t(u64, k.k->size,
+                                                   r_k.k->p.offset - idx);
  
                                 *end = bpos_min(*end, pos);
                                 ret = 1;
@@ -92,7 +90,7 @@ static int count_iters_for_insert(struct btree_trans *trans,
         }
         }
  
-       return ret;
+       return ret2 ?: ret;
  }
  
  #define EXTENT_ITERS_MAX       (BTREE_ITER_MAX / 3)
@@ -121,8 +119,11 @@ int bch2_extent_atomic_end(struct btree_iter *iter,
  
         *end = bpos_min(insert->k.p, b->key.k.p);
  
+       /* extent_update_to_keys(): */
+       nr_iters += 1;
+
         ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end,
-                                    &nr_iters, EXTENT_ITERS_MAX / 2, false);
+                                    &nr_iters, EXTENT_ITERS_MAX / 2);
         if (ret < 0)
                 return ret;
  
@@ -139,8 +140,20 @@ int bch2_extent_atomic_end(struct btree_iter *iter,
                         offset = bkey_start_offset(&insert->k) -
                                 bkey_start_offset(k.k);
  
+               /* extent_handle_overwrites(): */
+               switch (bch2_extent_overlap(&insert->k, k.k)) {
+               case BCH_EXTENT_OVERLAP_ALL:
+               case BCH_EXTENT_OVERLAP_FRONT:
+                       nr_iters += 1;
+                       break;
+               case BCH_EXTENT_OVERLAP_BACK:
+               case BCH_EXTENT_OVERLAP_MIDDLE:
+                       nr_iters += 2;
+                       break;
+               }
+
                 ret = count_iters_for_insert(trans, k, offset, end,
-                                       &nr_iters, EXTENT_ITERS_MAX, true);
+                                       &nr_iters, EXTENT_ITERS_MAX);
                 if (ret)
                         break;
  
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c

index 3f66457d22724acdc20cda3dec6aefd678769167..d1a4ab04fbbf9e8ce4d2b2a4dee7399fc8fc1a52 100644 (file)
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -180,7 +180,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
                 return;
  
         bch2_fs_inconsistent_on(!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-               !bch2_bkey_replicas_marked(c, k, false), c,
+               !bch2_bkey_replicas_marked_locked(c, k, false), c,
                 "btree key bad (replicas not marked in superblock):\n%s",
                 (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
  
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c

index 0aa3afade4ea03900518727f60e6b6949f8c073e..7de61f7f4314f9309403ffb60b31334857e89943 100644 (file)
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -1239,7 +1239,8 @@ do_io:
                 if (w->io &&
                     (w->io->op.res.nr_replicas != nr_replicas_this_write ||
                      bio_full(&w->io->op.wbio.bio, PAGE_SIZE) ||
-                    w->io->op.wbio.bio.bi_iter.bi_size >= (256U << 20) ||
+                    w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >=
+                    (BIO_MAX_PAGES * PAGE_SIZE) ||
                      bio_end_sector(&w->io->op.wbio.bio) != sector))
                         bch2_writepage_do_io(w);
  
@@ -1814,12 +1815,22 @@ static long bch2_dio_write_loop(struct dio_write *dio)
                 goto loop;
  
         while (1) {
+               size_t extra = dio->iter.count -
+                       min(BIO_MAX_PAGES * PAGE_SIZE, dio->iter.count);
+
                 if (kthread)
                         use_mm(dio->mm);
                 BUG_ON(current->faults_disabled_mapping);
                 current->faults_disabled_mapping = mapping;
  
+               /*
+                * Don't issue more than 2MB at once, the bcachefs io path in
+                * io.c can't bounce more than that:
+                */
+
+               dio->iter.count -= extra;
                 ret = bio_iov_iter_get_pages(bio, &dio->iter);
+               dio->iter.count += extra;
  
                 current->faults_disabled_mapping = NULL;
                 if (kthread)
diff --git a/libbcachefs/io.c b/libbcachefs/io.c

index 19059702428ab909911d3b293c89e3d1112b2cd9..5801a036391e9be34a693083e41764df8d740915 100644 (file)
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -588,7 +588,9 @@ static void bch2_write_index(struct closure *cl)
  
         __bch2_write_index(op);
  
-       if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
+       if (!(op->flags & BCH_WRITE_DONE)) {
+               continue_at(cl, __bch2_write, index_update_wq(op));
+       } else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
                 bch2_journal_flush_seq_async(&c->journal,
                                              *op_journal_seq(op),
                                              cl);
@@ -1103,8 +1105,15 @@ again:
                 if (ret < 0)
                         goto err;
  
-               if (ret)
+               if (ret) {
                         skip_put = false;
+               } else {
+                       /*
+                        * for the skip_put optimization this has to be set
+                        * before we submit the bio:
+                        */
+                       op->flags |= BCH_WRITE_DONE;
+               }
  
                 bio->bi_end_io  = bch2_write_endio;
                 bio->bi_private = &op->cl;
@@ -1127,16 +1136,30 @@ again:
         return;
  err:
         op->error = ret;
+       op->flags |= BCH_WRITE_DONE;
  
         continue_at(cl, bch2_write_index, index_update_wq(op));
         return;
  flush_io:
+       /*
+        * If the write can't all be submitted at once, we generally want to
+        * block synchronously as that signals backpressure to the caller.
+        *
+        * However, if we're running out of a workqueue, we can't block here
+        * because we'll be blocking other work items from completing:
+        */
+       if (current->flags & PF_WQ_WORKER) {
+               continue_at(cl, bch2_write_index, index_update_wq(op));
+               return;
+       }
+
         closure_sync(cl);
  
         if (!bch2_keylist_empty(&op->insert_keys)) {
                 __bch2_write_index(op);
  
                 if (op->error) {
+                       op->flags |= BCH_WRITE_DONE;
                         continue_at_nobarrier(cl, bch2_write_done, NULL);
                         return;
                 }
@@ -1182,6 +1205,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
         bch2_keylist_push(&op->insert_keys);
  
         op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
+       op->flags |= BCH_WRITE_DONE;
+
         continue_at_nobarrier(cl, bch2_write_index, NULL);
         return;
  err:
diff --git a/libbcachefs/io.h b/libbcachefs/io.h

index e45dcf9635ae7189faff496b19035db78a0a6e1c..c4c84730634523ba65ae099fb6b13d4f043ddaed 100644 (file)
--- a/libbcachefs/io.h
+++ b/libbcachefs/io.h
@@ -36,6 +36,7 @@ enum bch_write_flags {
         /* Internal: */
         BCH_WRITE_JOURNAL_SEQ_PTR       = (1 << 10),
         BCH_WRITE_SKIP_CLOSURE_PUT      = (1 << 11),
+       BCH_WRITE_DONE                  = (1 << 12),
  };
  
  static inline u64 *op_journal_seq(struct bch_write_op *op)
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h

index ec61137df00a545906414963fd99e08f450d37df..78f5fac06bf59765a488d4f142aed4197ccd21eb 100644 (file)
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -269,7 +269,7 @@ static inline void bch2_journal_res_put(struct journal *j,
         if (!res->ref)
                 return;
  
-       lock_release(&j->res_map, 0, _THIS_IP_);
+       lock_release(&j->res_map, _THIS_IP_);
  
         while (res->u64s)
                 bch2_journal_add_entry(j, res,
@@ -344,7 +344,9 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
                 return ret;
  out:
         if (!(flags & JOURNAL_RES_GET_CHECK)) {
-               lock_acquire_shared(&j->res_map, 0, 0, NULL, _THIS_IP_);
+               lock_acquire_shared(&j->res_map, 0,
+                                   (flags & JOURNAL_RES_GET_NONBLOCK) != 0,
+                                   NULL, _THIS_IP_);
                 EBUG_ON(!res->ref);
         }
         return 0;
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c

index db3afd9084743443f78ac420c52274bec0843373..d34434f6245454618d9f1ae468eecb3776f1b0d7 100644 (file)
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -322,14 +322,12 @@ void bch2_journal_pin_drop(struct journal *j,
         spin_unlock(&j->lock);
  }
  
-void __bch2_journal_pin_add(struct journal *j, u64 seq,
+static void bch2_journal_pin_add_locked(struct journal *j, u64 seq,
                             struct journal_entry_pin *pin,
                             journal_pin_flush_fn flush_fn)
  {
         struct journal_entry_pin_list *pin_list = journal_seq_pin(j, seq);
  
-       spin_lock(&j->lock);
-
         __journal_pin_drop(j, pin);
  
         BUG_ON(!atomic_read(&pin_list->count));
@@ -339,7 +337,14 @@ void __bch2_journal_pin_add(struct journal *j, u64 seq,
         pin->flush      = flush_fn;
  
         list_add(&pin->list, flush_fn ? &pin_list->list : &pin_list->flushed);
+}
  
+void __bch2_journal_pin_add(struct journal *j, u64 seq,
+                           struct journal_entry_pin *pin,
+                           journal_pin_flush_fn flush_fn)
+{
+       spin_lock(&j->lock);
+       bch2_journal_pin_add_locked(j, seq, pin, flush_fn);
         spin_unlock(&j->lock);
  
         /*
@@ -354,9 +359,13 @@ void bch2_journal_pin_copy(struct journal *j,
                            struct journal_entry_pin *src,
                            journal_pin_flush_fn flush_fn)
  {
+       spin_lock(&j->lock);
+
         if (journal_pin_active(src) &&
             (!journal_pin_active(dst) || src->seq < dst->seq))
-               __bch2_journal_pin_add(j, src->seq, dst, flush_fn);
+               bch2_journal_pin_add_locked(j, src->seq, dst, flush_fn);
+
+       spin_unlock(&j->lock);
  }
  
  /**
diff --git a/libbcachefs/move.c b/libbcachefs/move.c

index 4afda95f4017de6acb312d220babda8219219d09..67e495bc8aba947c18ad662e0b4197bc433978da 100644 (file)
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -70,19 +70,26 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                                    BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
  
         while (1) {
-               struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
+               struct bkey_s_c k;
                 struct bkey_i *insert;
-               struct bkey_i_extent *new =
-                       bkey_i_to_extent(bch2_keylist_front(keys));
+               struct bkey_i_extent *new;
                 BKEY_PADDED(k) _new, _insert;
                 const union bch_extent_entry *entry;
                 struct extent_ptr_decoded p;
                 bool did_work = false;
                 int nr;
  
+               bch2_trans_reset(&trans, 0);
+
+               k = bch2_btree_iter_peek_slot(iter);
                 ret = bkey_err(k);
-               if (ret)
+               if (ret) {
+                       if (ret == -EINTR)
+                               continue;
                         break;
+               }
+
+               new = bkey_i_to_extent(bch2_keylist_front(keys));
  
                 if (bversion_cmp(k.k->version, new->k.version) ||
                     !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c

index e7787c5063ce3d7932fe5295702cae12b52ded9f..d3032a46e7f31d226144215f956d78b62860dc79 100644 (file)
--- a/libbcachefs/quota.c
+++ b/libbcachefs/quota.c
@@ -710,25 +710,15 @@ found:
         return ret;
  }
  
-static int bch2_set_quota(struct super_block *sb, struct kqid qid,
-                         struct qc_dqblk *qdq)
+static int bch2_set_quota_trans(struct btree_trans *trans,
+                               struct bkey_i_quota *new_quota,
+                               struct qc_dqblk *qdq)
  {
-       struct bch_fs *c = sb->s_fs_info;
-       struct btree_trans trans;
         struct btree_iter *iter;
         struct bkey_s_c k;
-       struct bkey_i_quota new_quota;
         int ret;
  
-       if (sb->s_flags & SB_RDONLY)
-               return -EROFS;
-
-       bkey_quota_init(&new_quota.k_i);
-       new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
-
-       bch2_trans_init(&trans, c, 0, 0);
-
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_QUOTAS, new_quota.k.p,
+       iter = bch2_trans_get_iter(trans, BTREE_ID_QUOTAS, new_quota->k.p,
                                    BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
         k = bch2_btree_iter_peek_slot(iter);
  
@@ -736,32 +726,43 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
         if (unlikely(ret))
                 return ret;
  
-       switch (k.k->type) {
-       case KEY_TYPE_quota:
-               new_quota.v = *bkey_s_c_to_quota(k).v;
-               break;
-       }
+       if (k.k->type == KEY_TYPE_quota)
+               new_quota->v = *bkey_s_c_to_quota(k).v;
  
         if (qdq->d_fieldmask & QC_SPC_SOFT)
-               new_quota.v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
+               new_quota->v.c[Q_SPC].softlimit = cpu_to_le64(qdq->d_spc_softlimit >> 9);
         if (qdq->d_fieldmask & QC_SPC_HARD)
-               new_quota.v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
+               new_quota->v.c[Q_SPC].hardlimit = cpu_to_le64(qdq->d_spc_hardlimit >> 9);
  
         if (qdq->d_fieldmask & QC_INO_SOFT)
-               new_quota.v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
+               new_quota->v.c[Q_INO].softlimit = cpu_to_le64(qdq->d_ino_softlimit);
         if (qdq->d_fieldmask & QC_INO_HARD)
-               new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
+               new_quota->v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
+
+       return bch2_trans_update(trans, iter, &new_quota->k_i, 0);
+}
  
-       bch2_trans_update(&trans, iter, &new_quota.k_i, 0);
+static int bch2_set_quota(struct super_block *sb, struct kqid qid,
+                         struct qc_dqblk *qdq)
+{
+       struct bch_fs *c = sb->s_fs_info;
+       struct btree_trans trans;
+       struct bkey_i_quota new_quota;
+       int ret;
  
-       ret = bch2_trans_commit(&trans, NULL, NULL, 0);
+       if (sb->s_flags & SB_RDONLY)
+               return -EROFS;
  
-       bch2_trans_exit(&trans);
+       bkey_quota_init(&new_quota.k_i);
+       new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
  
-       if (ret)
-               return ret;
+       bch2_trans_init(&trans, c, 0, 0);
  
-       ret = __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
+       ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
+                           bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
+               __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
+
+       bch2_trans_exit(&trans);
  
         return ret;
  }
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c

index ab1934325948e200a45642c65a76c050b13e9779..e15a2b1dc5d0eebf9046322591cf5b35bb88f4ee 100644 (file)
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -204,17 +204,21 @@ static int bch2_rebalance_thread(void *arg)
                         prev_run_time;
  
                 if (w.dev_most_full_percent < 20 && throttle > 0) {
-                       r->state = REBALANCE_THROTTLED;
                         r->throttled_until_iotime = io_start +
                                 div_u64(w.dev_most_full_capacity *
                                         (20 - w.dev_most_full_percent),
                                         50);
-                       r->throttled_until_cputime = start + throttle;
  
-                       bch2_kthread_io_clock_wait(clock,
-                               r->throttled_until_iotime,
-                               throttle);
-                       continue;
+                       if (atomic_long_read(&clock->now) + clock->max_slop <
+                           r->throttled_until_iotime) {
+                               r->throttled_until_cputime = start + throttle;
+                               r->state = REBALANCE_THROTTLED;
+
+                               bch2_kthread_io_clock_wait(clock,
+                                       r->throttled_until_iotime,
+                                       throttle);
+                               continue;
+                       }
                 }
  
                 /* minimum 1 mb/sec: */
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c

index be4908575f72463eddc895ce4050e11f5cec6693..67a7128fd9af90a99c0efe05a7a6cd09da4bca23 100644 (file)
--- a/libbcachefs/replicas.c
+++ b/libbcachefs/replicas.c
@@ -299,8 +299,10 @@ static int replicas_table_update(struct bch_fs *c,
                                                 GFP_NOIO)) ||
             !(new_scratch  = kmalloc(bytes, GFP_NOIO)) ||
             (c->usage_gc &&
-            !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO))))
+            !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) {
+               bch_err(c, "error updating replicas table: memory allocation failure");
                 goto err;
+       }
  
         if (c->usage_base)
                 __replicas_table_update(new_base,               new_r,
@@ -362,7 +364,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
                                 struct bch_replicas_entry *new_entry)
  {
         struct bch_replicas_cpu new_r, new_gc;
-       int ret = -ENOMEM;
+       int ret = 0;
  
         verify_replicas_entry(new_entry);
  
@@ -409,14 +411,16 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
                 swap(new_gc, c->replicas_gc);
         percpu_up_write(&c->mark_lock);
  out:
-       ret = 0;
-err:
         mutex_unlock(&c->sb_lock);
  
         kfree(new_r.entries);
         kfree(new_gc.entries);
  
         return ret;
+err:
+       bch_err(c, "error adding replicas entry: memory allocation failure");
+       ret = -ENOMEM;
+       goto out;
  }
  
  int bch2_mark_replicas(struct bch_fs *c,
@@ -561,6 +565,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
                                          GFP_NOIO);
         if (!c->replicas_gc.entries) {
                 mutex_unlock(&c->sb_lock);
+               bch_err(c, "error allocating c->replicas_gc");
                 return -ENOMEM;
         }
  
@@ -586,8 +591,10 @@ retry:
         nr              = READ_ONCE(c->replicas.nr);
         new.entry_size  = READ_ONCE(c->replicas.entry_size);
         new.entries     = kcalloc(nr, new.entry_size, GFP_KERNEL);
-       if (!new.entries)
+       if (!new.entries) {
+               bch_err(c, "error allocating c->replicas_gc");
                 return -ENOMEM;
+       }
  
         mutex_lock(&c->sb_lock);
         percpu_down_write(&c->mark_lock);
diff --git a/libbcachefs/siphash.c b/libbcachefs/siphash.c

index 4565a843c1a6128ef86f4bd744a9015f152b4644..c062edb3fbc24e6fd5889d1ac138b3e2c10ab9db 100644 (file)
--- a/libbcachefs/siphash.c
+++ b/libbcachefs/siphash.c
@@ -44,7 +44,6 @@
   * https://131002.net/siphash/
   */
  
-#include <linux/stddef.h>
  #include <asm/byteorder.h>
  #include <asm/unaligned.h>
  #include <linux/bitops.h>
diff --git a/libbcachefs/super.c b/libbcachefs/super.c

index d2c275ce79ab0ed33c54345c002f5f87931abc15..d347389771e0ea73b236190018145e219bf395fc 100644 (file)
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -889,6 +889,8 @@ int bch2_fs_start(struct bch_fs *c)
         if (bch2_fs_init_fault("fs_start"))
                 goto err;
  
+       set_bit(BCH_FS_STARTED, &c->flags);
+
         if (c->opts.read_only || c->opts.nochanges) {
                 bch2_fs_read_only(c);
         } else {
@@ -900,7 +902,6 @@ int bch2_fs_start(struct bch_fs *c)
                         goto err;
         }
  
-       set_bit(BCH_FS_STARTED, &c->flags);
         print_mount_opts(c);
         ret = 0;
  out:
diff --git a/linux/six.c b/linux/six.c

index 9fa58b6fadc96b1e8ddba6d9403bd8dfae5eb33d..c778123505d076d67a8ef09c329a89c2c68654af 100644 (file)
--- a/linux/six.c
+++ b/linux/six.c
@@ -15,7 +15,7 @@
  #endif
  
  #define six_acquire(l, t)      lock_acquire(l, 0, t, 0, 0, NULL, _RET_IP_)
-#define six_release(l)         lock_release(l, 0, _RET_IP_)
+#define six_release(l)         lock_release(l, _RET_IP_)
  
  struct six_lock_vals {
         /* Value we add to the lock in order to take the lock: */
author	Kent Overstreet <kent.overstreet@gmail.com>
	Fri, 15 May 2020 01:46:09 +0000 (21:46 -0400)
committer	Kent Overstreet <kent.overstreet@gmail.com>
	Fri, 15 May 2020 01:48:52 +0000 (21:48 -0400)
.bcachefs_revision		patch \| blob \| history
Makefile		patch \| blob \| history
include/linux/lockdep.h		patch \| blob \| history
include/trace/events/bcachefs.h		patch \| blob \| history
libbcachefs/alloc_background.c		patch \| blob \| history
libbcachefs/bcachefs.h		patch \| blob \| history
libbcachefs/bcachefs_format.h		patch \| blob \| history
libbcachefs/bkey_methods.c		patch \| blob \| history
libbcachefs/btree_cache.h		patch \| blob \| history
libbcachefs/btree_gc.c		patch \| blob \| history
libbcachefs/btree_io.c		patch \| blob \| history
libbcachefs/btree_locking.h		patch \| blob \| history
libbcachefs/btree_update_interior.c		patch \| blob \| history
libbcachefs/btree_update_interior.h		patch \| blob \| history
libbcachefs/checksum.c		patch \| blob \| history
libbcachefs/compress.c		patch \| blob \| history
libbcachefs/extent_update.c		patch \| blob \| history
libbcachefs/extents.c		patch \| blob \| history
libbcachefs/fs-io.c		patch \| blob \| history
libbcachefs/io.c		patch \| blob \| history
libbcachefs/io.h		patch \| blob \| history
libbcachefs/journal.h		patch \| blob \| history
libbcachefs/journal_reclaim.c		patch \| blob \| history
libbcachefs/move.c		patch \| blob \| history
libbcachefs/quota.c		patch \| blob \| history
libbcachefs/rebalance.c		patch \| blob \| history
libbcachefs/replicas.c		patch \| blob \| history
libbcachefs/siphash.c		patch \| blob \| history
libbcachefs/super.c		patch \| blob \| history
linux/six.c		patch \| blob \| history