]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/alloc_foreground.c
Update bcachefs sources to 2272c5f5b7 bcachefs: Mark stripe buckets with correct...
[bcachefs-tools-debian] / libbcachefs / alloc_foreground.c
index f1cfb90b6d54c9cd6f5088cdbf9b11471ba1b06b..af1c1e2b5e97e75c74e5c3a6500139a81e4f4e00 100644 (file)
@@ -58,6 +58,17 @@ const char * const bch2_alloc_reserves[] = {
  * reference _after_ doing the index update that makes its allocation reachable.
  */
 
+void bch2_reset_alloc_cursors(struct bch_fs *c)
+{
+       struct bch_dev *ca;
+       unsigned i;
+
+       rcu_read_lock();
+       for_each_member_device_rcu(ca, c, i, NULL)
+               ca->alloc_cursor = 0;
+       rcu_read_unlock();
+}
+
 static void bch2_open_bucket_hash_add(struct bch_fs *c, struct open_bucket *ob)
 {
        open_bucket_idx_t idx = ob - c->open_buckets;
@@ -143,26 +154,17 @@ static void open_bucket_free_unused(struct bch_fs *c,
                                    struct write_point *wp,
                                    struct open_bucket *ob)
 {
-       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-       bool may_realloc = wp->data_type == BCH_DATA_user;
-
-       BUG_ON(ca->open_buckets_partial_nr >
-              ARRAY_SIZE(ca->open_buckets_partial));
-
-       if (ca->open_buckets_partial_nr <
-           ARRAY_SIZE(ca->open_buckets_partial) &&
-           may_realloc) {
-               spin_lock(&c->freelist_lock);
-               ob->on_partial_list = true;
-               ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
-                       ob - c->open_buckets;
-               spin_unlock(&c->freelist_lock);
+       BUG_ON(c->open_buckets_partial_nr >=
+              ARRAY_SIZE(c->open_buckets_partial));
 
-               closure_wake_up(&c->open_buckets_wait);
-               closure_wake_up(&c->freelist_wait);
-       } else {
-               bch2_open_bucket_put(c, ob);
-       }
+       spin_lock(&c->freelist_lock);
+       ob->on_partial_list = true;
+       c->open_buckets_partial[c->open_buckets_partial_nr++] =
+               ob - c->open_buckets;
+       spin_unlock(&c->freelist_lock);
+
+       closure_wake_up(&c->open_buckets_wait);
+       closure_wake_up(&c->freelist_wait);
 }
 
 /* _only_ for allocating the journal on a new device: */
@@ -248,7 +250,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
 
        ob->valid       = true;
        ob->sectors_free = ca->mi.bucket_size;
-       ob->alloc_reserve = reserve;
        ob->dev         = ca->dev_idx;
        ob->gen         = a->gen;
        ob->bucket      = bucket;
@@ -272,7 +273,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
        }
 
        spin_unlock(&c->freelist_lock);
-
        return ob;
 }
 
@@ -376,32 +376,6 @@ err:
        return ob;
 }
 
-static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch_dev *ca,
-                                                   enum alloc_reserve reserve)
-{
-       struct open_bucket *ob;
-       int i;
-
-       spin_lock(&c->freelist_lock);
-
-       for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
-               ob = c->open_buckets + ca->open_buckets_partial[i];
-
-               if (reserve <= ob->alloc_reserve) {
-                       array_remove_item(ca->open_buckets_partial,
-                                         ca->open_buckets_partial_nr,
-                                         i);
-                       ob->on_partial_list = false;
-                       ob->alloc_reserve = reserve;
-                       spin_unlock(&c->freelist_lock);
-                       return ob;
-               }
-       }
-
-       spin_unlock(&c->freelist_lock);
-       return NULL;
-}
-
 /*
  * This path is for before the freespace btree is initialized:
  *
@@ -418,12 +392,11 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
        struct btree_iter iter;
        struct bkey_s_c k;
        struct open_bucket *ob = NULL;
+       u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+       u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor));
        int ret;
-
-       s->cur_bucket = max_t(u64, s->cur_bucket, ca->mi.first_bucket);
-       s->cur_bucket = max_t(u64, s->cur_bucket, ca->new_fs_bucket_idx);
-
-       for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, s->cur_bucket),
+again:
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
                           BTREE_ITER_SLOTS, k, ret) {
                struct bch_alloc_v4 a_convert;
                const struct bch_alloc_v4 *a;
@@ -448,9 +421,17 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
        }
        bch2_trans_iter_exit(trans, &iter);
 
-       s->cur_bucket = iter.pos.offset;
+       ca->alloc_cursor = alloc_cursor;
+
+       if (!ob && ret)
+               ob = ERR_PTR(ret);
+
+       if (!ob && alloc_cursor > alloc_start) {
+               alloc_cursor = alloc_start;
+               goto again;
+       }
 
-       return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
+       return ob;
 }
 
 static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
@@ -462,33 +443,34 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
        struct btree_iter iter;
        struct bkey_s_c k;
        struct open_bucket *ob = NULL;
+       u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(ca->alloc_cursor));
+       u64 alloc_cursor = alloc_start;
        int ret;
 
        BUG_ON(ca->new_fs_bucket_idx);
-
-       /*
-        * XXX:
-        * On transaction restart, we'd like to restart from the bucket we were
-        * at previously
-        */
+again:
        for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
-                                    POS(ca->dev_idx, s->cur_bucket), 0, k, ret) {
+                                    POS(ca->dev_idx, alloc_cursor), 0, k, ret) {
                if (k.k->p.inode != ca->dev_idx)
                        break;
 
-               for (s->cur_bucket = max(s->cur_bucket, bkey_start_offset(k.k));
-                    s->cur_bucket < k.k->p.offset;
-                    s->cur_bucket++) {
+               for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
+                    alloc_cursor < k.k->p.offset;
+                    alloc_cursor++) {
                        ret = btree_trans_too_many_iters(trans);
-                       if (ret)
+                       if (ret) {
+                               ob = ERR_PTR(ret);
                                break;
+                       }
 
                        s->buckets_seen++;
 
                        ob = try_alloc_bucket(trans, ca, reserve,
-                                             s->cur_bucket, s, k, cl);
-                       if (ob)
+                                             alloc_cursor, s, k, cl);
+                       if (ob) {
+                               iter.path->preserve = false;
                                break;
+                       }
                }
 
                if (ob || ret)
@@ -496,7 +478,17 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
        }
        bch2_trans_iter_exit(trans, &iter);
 
-       return ob ?: ERR_PTR(ret);
+       ca->alloc_cursor = alloc_cursor;
+
+       if (!ob && ret)
+               ob = ERR_PTR(ret);
+
+       if (!ob && alloc_start > ca->mi.first_bucket) {
+               alloc_cursor = alloc_start = ca->mi.first_bucket;
+               goto again;
+       }
+
+       return ob;
 }
 
 /**
@@ -507,16 +499,14 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
 static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
                                      struct bch_dev *ca,
                                      enum alloc_reserve reserve,
-                                     bool may_alloc_partial,
                                      struct closure *cl,
                                      struct bch_dev_usage *usage)
 {
        struct bch_fs *c = trans->c;
        struct open_bucket *ob = NULL;
        bool freespace = READ_ONCE(ca->mi.freespace_initialized);
-       u64 start = freespace ? 0 : ca->bucket_alloc_trans_early_cursor;
        u64 avail;
-       struct bucket_alloc_state s = { .cur_bucket = start };
+       struct bucket_alloc_state s = { 0 };
        bool waiting = false;
 again:
        bch2_dev_usage_read_fast(ca, usage);
@@ -547,12 +537,6 @@ again:
 
        if (waiting)
                closure_wake_up(&c->freelist_wait);
-
-       if (may_alloc_partial) {
-               ob = try_alloc_partial_bucket(c, ca, reserve);
-               if (ob)
-                       return ob;
-       }
 alloc:
        ob = likely(freespace)
                ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
@@ -561,28 +545,29 @@ alloc:
        if (s.skipped_need_journal_commit * 2 > avail)
                bch2_journal_flush_async(&c->journal, NULL);
 
-       if (!ob && !freespace && start) {
-               start = s.cur_bucket = 0;
-               goto alloc;
-       }
-
        if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
                freespace = false;
                goto alloc;
        }
-
-       if (!freespace)
-               ca->bucket_alloc_trans_early_cursor = s.cur_bucket;
 err:
        if (!ob)
                ob = ERR_PTR(-BCH_ERR_no_buckets_found);
 
        if (!IS_ERR(ob))
-               trace_and_count(c, bucket_alloc, ca, bch2_alloc_reserves[reserve],
-                               may_alloc_partial, ob->bucket);
+               trace_and_count(c, bucket_alloc, ca,
+                               bch2_alloc_reserves[reserve],
+                               ob->bucket,
+                               usage->d[BCH_DATA_free].buckets,
+                               avail,
+                               bch2_copygc_wait_amount(c),
+                               c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
+                               &s,
+                               cl == NULL,
+                               "");
        else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
-               trace_and_count(c, bucket_alloc_fail,
-                               ca, bch2_alloc_reserves[reserve],
+               trace_and_count(c, bucket_alloc_fail, ca,
+                               bch2_alloc_reserves[reserve],
+                               0,
                                usage->d[BCH_DATA_free].buckets,
                                avail,
                                bch2_copygc_wait_amount(c),
@@ -596,7 +581,6 @@ err:
 
 struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
                                      enum alloc_reserve reserve,
-                                     bool may_alloc_partial,
                                      struct closure *cl)
 {
        struct bch_dev_usage usage;
@@ -604,7 +588,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
 
        bch2_trans_do(c, NULL, NULL, 0,
                      PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
-                                                       may_alloc_partial, cl, &usage)));
+                                                       cl, &usage)));
        return ob;
 }
 
@@ -661,12 +645,10 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
        bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
 }
 
-#define BUCKET_MAY_ALLOC_PARTIAL       (1 << 0)
-#define BUCKET_ALLOC_USE_DURABILITY    (1 << 1)
-
-static void add_new_bucket(struct bch_fs *c,
+static int add_new_bucket(struct bch_fs *c,
                           struct open_buckets *ptrs,
                           struct bch_devs_mask *devs_may_alloc,
+                          unsigned nr_replicas,
                           unsigned *nr_effective,
                           bool *have_cache,
                           unsigned flags,
@@ -675,23 +657,30 @@ static void add_new_bucket(struct bch_fs *c,
        unsigned durability =
                bch_dev_bkey_exists(c, ob->dev)->mi.durability;
 
+       BUG_ON(*nr_effective >= nr_replicas);
+
        __clear_bit(ob->dev, devs_may_alloc->d);
-       *nr_effective   += (flags & BUCKET_ALLOC_USE_DURABILITY)
-               ? durability : 1;
+       *nr_effective   += durability;
        *have_cache     |= !durability;
 
        ob_push(c, ptrs, ob);
+
+       if (*nr_effective >= nr_replicas)
+               return 1;
+       if (ob->ec)
+               return 1;
+       return 0;
 }
 
-static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
+int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
                      struct open_buckets *ptrs,
                      struct dev_stripe_state *stripe,
                      struct bch_devs_mask *devs_may_alloc,
                      unsigned nr_replicas,
                      unsigned *nr_effective,
                      bool *have_cache,
+                     enum bch_data_type data_type,
                      enum alloc_reserve reserve,
-                     unsigned flags,
                      struct closure *cl)
 {
        struct bch_fs *c = trans->c;
@@ -724,8 +713,7 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
                        continue;
                }
 
-               ob = bch2_bucket_alloc_trans(trans, ca, reserve,
-                               flags & BUCKET_MAY_ALLOC_PARTIAL, cl, &usage);
+               ob = bch2_bucket_alloc_trans(trans, ca, reserve, cl, &usage);
                if (!IS_ERR(ob))
                        bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
                percpu_ref_put(&ca->ref);
@@ -737,10 +725,11 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
                        continue;
                }
 
-               add_new_bucket(c, ptrs, devs_may_alloc,
-                              nr_effective, have_cache, flags, ob);
+               ob->data_type = data_type;
 
-               if (*nr_effective >= nr_replicas) {
+               if (add_new_bucket(c, ptrs, devs_may_alloc,
+                                  nr_replicas, nr_effective,
+                                  have_cache, 0, ob)) {
                        ret = 0;
                        break;
                }
@@ -749,24 +738,6 @@ static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
        return ret;
 }
 
-int bch2_bucket_alloc_set(struct bch_fs *c,
-                     struct open_buckets *ptrs,
-                     struct dev_stripe_state *stripe,
-                     struct bch_devs_mask *devs_may_alloc,
-                     unsigned nr_replicas,
-                     unsigned *nr_effective,
-                     bool *have_cache,
-                     enum alloc_reserve reserve,
-                     unsigned flags,
-                     struct closure *cl)
-{
-       return bch2_trans_do(c, NULL, NULL, 0,
-                     bch2_bucket_alloc_set_trans(&trans, ptrs, stripe,
-                                             devs_may_alloc, nr_replicas,
-                                             nr_effective, have_cache, reserve,
-                                             flags, cl));
-}
-
 /* Allocate from stripes: */
 
 /*
@@ -775,26 +746,24 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
  * it's to a device we don't want:
  */
 
-static int bucket_alloc_from_stripe(struct bch_fs *c,
+static int bucket_alloc_from_stripe(struct btree_trans *trans,
                         struct open_buckets *ptrs,
                         struct write_point *wp,
                         struct bch_devs_mask *devs_may_alloc,
                         u16 target,
-                        unsigned erasure_code,
                         unsigned nr_replicas,
                         unsigned *nr_effective,
                         bool *have_cache,
                         unsigned flags,
                         struct closure *cl)
 {
+       struct bch_fs *c = trans->c;
        struct dev_alloc_list devs_sorted;
        struct ec_stripe_head *h;
        struct open_bucket *ob;
        struct bch_dev *ca;
        unsigned i, ec_idx;
-
-       if (!erasure_code)
-               return 0;
+       int ret = 0;
 
        if (nr_replicas < 2)
                return 0;
@@ -802,11 +771,11 @@ static int bucket_alloc_from_stripe(struct bch_fs *c,
        if (ec_open_bucket(c, ptrs))
                return 0;
 
-       h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1,
+       h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1,
                                    wp == &c->copygc_write_point,
                                    cl);
        if (IS_ERR(h))
-               return -PTR_ERR(h);
+               return PTR_ERR(h);
        if (!h)
                return 0;
 
@@ -829,54 +798,122 @@ got_bucket:
        ob->ec_idx      = ec_idx;
        ob->ec          = h->s;
 
-       add_new_bucket(c, ptrs, devs_may_alloc,
-                      nr_effective, have_cache, flags, ob);
+       ret = add_new_bucket(c, ptrs, devs_may_alloc,
+                            nr_replicas, nr_effective,
+                            have_cache, flags, ob);
        atomic_inc(&h->s->pin);
 out_put_head:
        bch2_ec_stripe_head_put(c, h);
-       return 0;
+       return ret;
 }
 
 /* Sector allocator */
 
-static void get_buckets_from_writepoint(struct bch_fs *c,
-                                       struct open_buckets *ptrs,
-                                       struct write_point *wp,
-                                       struct bch_devs_mask *devs_may_alloc,
-                                       unsigned nr_replicas,
-                                       unsigned *nr_effective,
-                                       bool *have_cache,
-                                       unsigned flags,
-                                       bool need_ec)
+static bool want_bucket(struct bch_fs *c,
+                       struct write_point *wp,
+                       struct bch_devs_mask *devs_may_alloc,
+                       bool *have_cache, bool ec,
+                       struct open_bucket *ob)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+
+       if (!test_bit(ob->dev, devs_may_alloc->d))
+               return false;
+
+       if (ob->data_type != wp->data_type)
+               return false;
+
+       if (!ca->mi.durability &&
+           (wp->data_type != BCH_DATA_user || !*have_cache))
+               return false;
+
+       if (ec != (ob->ec != NULL))
+               return false;
+
+       return true;
+}
+
+static int bucket_alloc_set_writepoint(struct bch_fs *c,
+                                      struct open_buckets *ptrs,
+                                      struct write_point *wp,
+                                      struct bch_devs_mask *devs_may_alloc,
+                                      unsigned nr_replicas,
+                                      unsigned *nr_effective,
+                                      bool *have_cache,
+                                      bool ec, unsigned flags)
 {
        struct open_buckets ptrs_skip = { .nr = 0 };
        struct open_bucket *ob;
        unsigned i;
+       int ret = 0;
 
        open_bucket_for_each(c, &wp->ptrs, ob, i) {
-               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
-
-               if (*nr_effective < nr_replicas &&
-                   test_bit(ob->dev, devs_may_alloc->d) &&
-                   (ca->mi.durability ||
-                    (wp->data_type == BCH_DATA_user && !*have_cache)) &&
-                   (ob->ec || !need_ec)) {
-                       add_new_bucket(c, ptrs, devs_may_alloc,
-                                      nr_effective, have_cache,
-                                      flags, ob);
-               } else {
+               if (!ret && want_bucket(c, wp, devs_may_alloc,
+                                       have_cache, ec, ob))
+                       ret = add_new_bucket(c, ptrs, devs_may_alloc,
+                                      nr_replicas, nr_effective,
+                                      have_cache, flags, ob);
+               else
                        ob_push(c, &ptrs_skip, ob);
-               }
        }
        wp->ptrs = ptrs_skip;
+
+       return ret;
 }
 
-static int open_bucket_add_buckets(struct btree_trans *trans,
+static int bucket_alloc_set_partial(struct bch_fs *c,
+                                   struct open_buckets *ptrs,
+                                   struct write_point *wp,
+                                   struct bch_devs_mask *devs_may_alloc,
+                                   unsigned nr_replicas,
+                                   unsigned *nr_effective,
+                                   bool *have_cache, bool ec,
+                                   enum alloc_reserve reserve,
+                                   unsigned flags)
+{
+       int i, ret = 0;
+
+       if (!c->open_buckets_partial_nr)
+               return 0;
+
+       spin_lock(&c->freelist_lock);
+
+       for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) {
+               struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
+
+               if (want_bucket(c, wp, devs_may_alloc, have_cache, ec, ob)) {
+                       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
+                       struct bch_dev_usage usage;
+                       u64 avail;
+
+                       bch2_dev_usage_read_fast(ca, &usage);
+                       avail = dev_buckets_free(ca, usage, reserve);
+                       if (!avail)
+                               continue;
+
+                       array_remove_item(c->open_buckets_partial,
+                                         c->open_buckets_partial_nr,
+                                         i);
+                       ob->on_partial_list = false;
+
+                       ret = add_new_bucket(c, ptrs, devs_may_alloc,
+                                            nr_replicas, nr_effective,
+                                            have_cache, flags, ob);
+                       if (ret)
+                               break;
+               }
+       }
+
+       spin_unlock(&c->freelist_lock);
+       return ret;
+}
+
+static int __open_bucket_add_buckets(struct btree_trans *trans,
                        struct open_buckets *ptrs,
                        struct write_point *wp,
                        struct bch_devs_list *devs_have,
                        u16 target,
-                       unsigned erasure_code,
+                       bool erasure_code,
                        unsigned nr_replicas,
                        unsigned *nr_effective,
                        bool *have_cache,
@@ -888,8 +925,8 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
        struct bch_devs_mask devs;
        struct open_bucket *ob;
        struct closure *cl = NULL;
-       int ret;
        unsigned i;
+       int ret;
 
        rcu_read_lock();
        devs = target_rw_devs(c, wp->data_type, target);
@@ -902,52 +939,82 @@ static int open_bucket_add_buckets(struct btree_trans *trans,
        open_bucket_for_each(c, ptrs, ob, i)
                __clear_bit(ob->dev, devs.d);
 
+       if (erasure_code && ec_open_bucket(c, ptrs))
+               return 0;
+
+       ret = bucket_alloc_set_writepoint(c, ptrs, wp, &devs,
+                                nr_replicas, nr_effective,
+                                have_cache, erasure_code, flags);
+       if (ret)
+               return ret;
+
+       ret = bucket_alloc_set_partial(c, ptrs, wp, &devs,
+                                nr_replicas, nr_effective,
+                                have_cache, erasure_code, reserve, flags);
+       if (ret)
+               return ret;
+
        if (erasure_code) {
-               if (!ec_open_bucket(c, ptrs)) {
-                       get_buckets_from_writepoint(c, ptrs, wp, &devs,
-                                                   nr_replicas, nr_effective,
-                                                   have_cache, flags, true);
-                       if (*nr_effective >= nr_replicas)
-                               return 0;
+               ret = bucket_alloc_from_stripe(trans, ptrs, wp, &devs,
+                                        target,
+                                        nr_replicas, nr_effective,
+                                        have_cache, flags, _cl);
+       } else {
+retry_blocking:
+               /*
+                * Try nonblocking first, so that if one device is full we'll try from
+                * other devices:
+                */
+               ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
+                                       nr_replicas, nr_effective, have_cache,
+                                       wp->data_type, reserve, cl);
+               if (ret &&
+                   !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
+                   !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
+                   !cl && _cl) {
+                       cl = _cl;
+                       goto retry_blocking;
                }
 
-               if (!ec_open_bucket(c, ptrs)) {
-                       ret = bucket_alloc_from_stripe(c, ptrs, wp, &devs,
-                                                target, erasure_code,
-                                                nr_replicas, nr_effective,
-                                                have_cache, flags, _cl);
-                       if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
-                           bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
-                           bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
-                               return ret;
-                       if (*nr_effective >= nr_replicas)
-                               return 0;
-               }
        }
 
-       get_buckets_from_writepoint(c, ptrs, wp, &devs,
-                                   nr_replicas, nr_effective,
-                                   have_cache, flags, false);
-       if (*nr_effective >= nr_replicas)
-               return 0;
+       return ret;
+}
 
-retry_blocking:
-       /*
-        * Try nonblocking first, so that if one device is full we'll try from
-        * other devices:
-        */
-       ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
+static int open_bucket_add_buckets(struct btree_trans *trans,
+                       struct open_buckets *ptrs,
+                       struct write_point *wp,
+                       struct bch_devs_list *devs_have,
+                       u16 target,
+                       unsigned erasure_code,
+                       unsigned nr_replicas,
+                       unsigned *nr_effective,
+                       bool *have_cache,
+                       enum alloc_reserve reserve,
+                       unsigned flags,
+                       struct closure *cl)
+{
+       int ret;
+
+       if (erasure_code) {
+               ret = __open_bucket_add_buckets(trans, ptrs, wp,
+                               devs_have, target, erasure_code,
                                nr_replicas, nr_effective, have_cache,
                                reserve, flags, cl);
-       if (ret &&
-           !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
-           !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
-           !cl && _cl) {
-               cl = _cl;
-               goto retry_blocking;
+               if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+                   bch2_err_matches(ret, BCH_ERR_operation_blocked) ||
+                   bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
+                   bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
+                       return ret;
+               if (*nr_effective >= nr_replicas)
+                       return 0;
        }
 
-       return ret;
+       ret = __open_bucket_add_buckets(trans, ptrs, wp,
+                       devs_have, target, false,
+                       nr_replicas, nr_effective, have_cache,
+                       reserve, flags, cl);
+       return ret < 0 ? ret : 0;
 }
 
 void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
@@ -1060,7 +1127,7 @@ static bool try_decrease_writepoints(struct bch_fs *c,
        return true;
 }
 
-static void bch2_trans_mutex_lock(struct btree_trans *trans,
+static void bch2_trans_mutex_lock_norelock(struct btree_trans *trans,
                                  struct mutex *lock)
 {
        if (!mutex_trylock(lock)) {
@@ -1078,7 +1145,7 @@ static struct write_point *writepoint_find(struct btree_trans *trans,
 
        if (!(write_point & 1UL)) {
                wp = (struct write_point *) write_point;
-               bch2_trans_mutex_lock(trans, &wp->lock);
+               bch2_trans_mutex_lock_norelock(trans, &wp->lock);
                return wp;
        }
 
@@ -1087,7 +1154,7 @@ restart_find:
        wp = __writepoint_find(head, write_point);
        if (wp) {
 lock_wp:
-               bch2_trans_mutex_lock(trans, &wp->lock);
+               bch2_trans_mutex_lock_norelock(trans, &wp->lock);
                if (wp->write_point == write_point)
                        goto out;
                mutex_unlock(&wp->lock);
@@ -1100,8 +1167,8 @@ restart_find_oldest:
                if (!oldest || time_before64(wp->last_used, oldest->last_used))
                        oldest = wp;
 
-       bch2_trans_mutex_lock(trans, &oldest->lock);
-       bch2_trans_mutex_lock(trans, &c->write_points_hash_lock);
+       bch2_trans_mutex_lock_norelock(trans, &oldest->lock);
+       bch2_trans_mutex_lock_norelock(trans, &c->write_points_hash_lock);
        if (oldest >= c->write_points + c->write_points_nr ||
            try_increase_writepoints(c)) {
                mutex_unlock(&c->write_points_hash_lock);
@@ -1130,30 +1197,26 @@ out:
  * Get us an open_bucket we can allocate from, return with it locked:
  */
 int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
-                                  unsigned target,
-                                  unsigned erasure_code,
-                                  struct write_point_specifier write_point,
-                                  struct bch_devs_list *devs_have,
-                                  unsigned nr_replicas,
-                                  unsigned nr_replicas_required,
-                                  enum alloc_reserve reserve,
-                                  unsigned flags,
-                                  struct closure *cl,
-                                  struct write_point **wp_ret)
+                            unsigned target,
+                            unsigned erasure_code,
+                            struct write_point_specifier write_point,
+                            struct bch_devs_list *devs_have,
+                            unsigned nr_replicas,
+                            unsigned nr_replicas_required,
+                            enum alloc_reserve reserve,
+                            unsigned flags,
+                            struct closure *cl,
+                            struct write_point **wp_ret)
 {
        struct bch_fs *c = trans->c;
        struct write_point *wp;
        struct open_bucket *ob;
        struct open_buckets ptrs;
        unsigned nr_effective, write_points_nr;
-       unsigned ob_flags = 0;
        bool have_cache;
        int ret;
        int i;
 
-       if (!(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS))
-               ob_flags |= BUCKET_ALLOC_USE_DURABILITY;
-
        BUG_ON(!nr_replicas || !nr_replicas_required);
 retry:
        ptrs.nr         = 0;
@@ -1163,9 +1226,6 @@ retry:
 
        *wp_ret = wp = writepoint_find(trans, write_point.v);
 
-       if (wp->data_type == BCH_DATA_user)
-               ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
-
        /* metadata may not allocate on cache devices: */
        if (wp->data_type != BCH_DATA_user)
                have_cache = true;
@@ -1175,13 +1235,13 @@ retry:
                                              target, erasure_code,
                                              nr_replicas, &nr_effective,
                                              &have_cache, reserve,
-                                             ob_flags, cl);
+                                             flags, cl);
        } else {
                ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
                                              target, erasure_code,
                                              nr_replicas, &nr_effective,
                                              &have_cache, reserve,
-                                             ob_flags, NULL);
+                                             flags, NULL);
                if (!ret ||
                    bch2_err_matches(ret, BCH_ERR_transaction_restart))
                        goto alloc_done;
@@ -1190,7 +1250,7 @@ retry:
                                              0, erasure_code,
                                              nr_replicas, &nr_effective,
                                              &have_cache, reserve,
-                                             ob_flags, cl);
+                                             flags, cl);
        }
 alloc_done:
        BUG_ON(!ret && nr_effective < nr_replicas);
@@ -1318,6 +1378,20 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *c)
        }
 }
 
+static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob)
+{
+       unsigned data_type = ob->data_type;
+       barrier(); /* READ_ONCE() doesn't work on bitfields */
+
+       prt_printf(out, "%zu ref %u %s%s%s %u:%llu gen %u\n",
+                  ob - c->open_buckets,
+                  atomic_read(&ob->pin),
+                  data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
+                  ob->ec ? " ec" : "",
+                  ob->on_partial_list ? " partial" : "",
+                  ob->dev, ob->bucket, ob->gen);
+}
+
 void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
 {
        struct open_bucket *ob;
@@ -1326,13 +1400,49 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
             ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
             ob++) {
                spin_lock(&ob->lock);
-               if (ob->valid && !ob->on_partial_list) {
-                       prt_printf(out, "%zu ref %u type %s %u:%llu:%u\n",
-                              ob - c->open_buckets,
-                              atomic_read(&ob->pin),
-                              bch2_data_types[ob->data_type],
-                              ob->dev, ob->bucket, ob->gen);
-               }
+               if (ob->valid && !ob->on_partial_list)
+                       bch2_open_bucket_to_text(out, c, ob);
                spin_unlock(&ob->lock);
        }
 }
+
+void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       unsigned i;
+
+       spin_lock(&c->freelist_lock);
+       for (i = 0; i < c->open_buckets_partial_nr; i++)
+               bch2_open_bucket_to_text(out, c,
+                               c->open_buckets + c->open_buckets_partial[i]);
+       spin_unlock(&c->freelist_lock);
+}
+
+static const char * const bch2_write_point_states[] = {
+#define x(n)   #n,
+       WRITE_POINT_STATES()
+#undef x
+       NULL
+};
+
+void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       struct write_point *wp;
+       unsigned i;
+
+       for (wp = c->write_points;
+            wp < c->write_points + ARRAY_SIZE(c->write_points);
+            wp++) {
+               prt_printf(out, "%lu: ", wp->write_point);
+               prt_human_readable_u64(out, wp->sectors_allocated);
+
+               prt_printf(out, " last wrote: ");
+               bch2_pr_time_units(out, sched_clock() - wp->last_used);
+
+               for (i = 0; i < WRITE_POINT_STATE_NR; i++) {
+                       prt_printf(out, " %s: ", bch2_write_point_states[i]);
+                       bch2_pr_time_units(out, wp->time[i]);
+               }
+
+               prt_newline(out);
+       }
+}