+ prt_printf(&buf, "non free bucket in freespace btree\n"
+ " freespace key ");
+ bch2_bkey_val_to_text(&buf, c, freespace_k);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+ ob = ERR_PTR(-EIO);
+ goto err;
+ }
+
+ if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
+ test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+ prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
+ " freespace key ",
+ genbits, alloc_freespace_genbits(*a) >> 56);
+ bch2_bkey_val_to_text(&buf, c, freespace_k);
+ prt_printf(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_trans_inconsistent(trans, "%s", buf.buf);
+ ob = ERR_PTR(-EIO);
+ goto err;
+
+ }
+
+ if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
+ struct bch_backpointer bp;
+ struct bpos bp_pos = POS_MIN;
+
+ ret = bch2_get_next_backpointer(trans, POS(ca->dev_idx, b), -1,
+ &bp_pos, &bp,
+ BTREE_ITER_NOPRESERVE);
+ if (ret) {
+ ob = ERR_PTR(ret);
+ goto err;
+ }
+
+ if (!bkey_eq(bp_pos, POS_MAX)) {
+ /*
+ * Bucket may have data in it - we don't call
+ * bc2h_trans_inconnsistent() because fsck hasn't
+ * finished yet
+ */
+ ob = NULL;
+ goto err;
+ }
+ }
+
+ ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl);
+ if (!ob)
+ iter.path->preserve = false;
+err:
+ if (iter.trans && iter.path)
+ set_btree_iter_dontneed(&iter);
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+ return ob;
+}
+
+/*
+ * This path is for before the freespace btree is initialized:
+ *
+ * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
+ * journal buckets - journal buckets will be < ca->new_fs_bucket_idx
+ */
+static noinline struct open_bucket *
+bch2_bucket_alloc_early(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum bch_watermark watermark,
+ struct bucket_alloc_state *s,
+ struct closure *cl)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct open_bucket *ob = NULL;
+ u64 alloc_start = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+ u64 alloc_cursor = max(alloc_start, READ_ONCE(ca->alloc_cursor));
+ int ret;
+again:
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, alloc_cursor),
+ BTREE_ITER_SLOTS, k, ret) {
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+
+ if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
+ break;
+
+ if (ca->new_fs_bucket_idx &&
+ is_superblock_bucket(ca, k.k->p.offset))
+ continue;
+
+ a = bch2_alloc_to_v4(k, &a_convert);
+
+ if (a->data_type != BCH_DATA_free)
+ continue;
+
+ s->buckets_seen++;
+
+ ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
+ if (ob)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ ca->alloc_cursor = alloc_cursor;
+
+ if (!ob && ret)
+ ob = ERR_PTR(ret);
+
+ if (!ob && alloc_cursor > alloc_start) {
+ alloc_cursor = alloc_start;
+ goto again;
+ }
+
+ return ob;
+}
+
+static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum bch_watermark watermark,
+ struct bucket_alloc_state *s,
+ struct closure *cl)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct open_bucket *ob = NULL;
+ u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(ca->alloc_cursor));
+ u64 alloc_cursor = alloc_start;
+ int ret;
+
+ BUG_ON(ca->new_fs_bucket_idx);
+again:
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
+ POS(ca->dev_idx, alloc_cursor), 0, k, ret) {
+ if (k.k->p.inode != ca->dev_idx)
+ break;
+
+ for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
+ alloc_cursor < k.k->p.offset;
+ alloc_cursor++) {
+ ret = btree_trans_too_many_iters(trans);
+ if (ret) {
+ ob = ERR_PTR(ret);
+ break;
+ }
+
+ s->buckets_seen++;
+
+ ob = try_alloc_bucket(trans, ca, watermark,
+ alloc_cursor, s, k, cl);
+ if (ob) {
+ iter.path->preserve = false;
+ break;
+ }
+ }
+
+ if (ob || ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ ca->alloc_cursor = alloc_cursor;
+
+ if (!ob && ret)
+ ob = ERR_PTR(ret);
+
+ if (!ob && alloc_start > ca->mi.first_bucket) {
+ alloc_cursor = alloc_start = ca->mi.first_bucket;
+ goto again;
+ }
+
+ return ob;
+}
+
+/**
+ * bch_bucket_alloc - allocate a single bucket from a specific device
+ *
+ * Returns index of bucket on success, 0 on failure
+ */
+static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum bch_watermark watermark,
+ struct closure *cl,
+ struct bch_dev_usage *usage)
+{
+ struct bch_fs *c = trans->c;
+ struct open_bucket *ob = NULL;
+ bool freespace = READ_ONCE(ca->mi.freespace_initialized);
+ u64 avail;
+ struct bucket_alloc_state s = { 0 };
+ bool waiting = false;
+again:
+ bch2_dev_usage_read_fast(ca, usage);
+ avail = dev_buckets_free(ca, *usage, watermark);
+
+ if (usage->d[BCH_DATA_need_discard].buckets > avail)
+ bch2_do_discards(c);
+
+ if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
+ bch2_do_gc_gens(c);
+
+ if (should_invalidate_buckets(ca, *usage))
+ bch2_do_invalidates(c);
+
+ if (!avail) {
+ if (cl && !waiting) {
+ closure_wait(&c->freelist_wait, cl);
+ waiting = true;
+ goto again;
+ }
+
+ if (!c->blocked_allocate)
+ c->blocked_allocate = local_clock();
+
+ ob = ERR_PTR(-BCH_ERR_freelist_empty);
+ goto err;
+ }
+
+ if (waiting)
+ closure_wake_up(&c->freelist_wait);
+alloc:
+ ob = likely(freespace)
+ ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
+ : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
+
+ if (s.skipped_need_journal_commit * 2 > avail)
+ bch2_journal_flush_async(&c->journal, NULL);
+
+ if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+ freespace = false;
+ goto alloc;
+ }
+err:
+ if (!ob)
+ ob = ERR_PTR(-BCH_ERR_no_buckets_found);
+
+ if (!IS_ERR(ob))
+ trace_and_count(c, bucket_alloc, ca,
+ bch2_watermarks[watermark],
+ ob->bucket,
+ usage->d[BCH_DATA_free].buckets,
+ avail,
+ bch2_copygc_wait_amount(c),
+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
+ &s,
+ cl == NULL,
+ "");
+ else if (!bch2_err_matches(PTR_ERR(ob), BCH_ERR_transaction_restart))
+ trace_and_count(c, bucket_alloc_fail, ca,
+ bch2_watermarks[watermark],
+ 0,
+ usage->d[BCH_DATA_free].buckets,
+ avail,
+ bch2_copygc_wait_amount(c),
+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
+ &s,
+ cl == NULL,
+ bch2_err_str(PTR_ERR(ob)));
+
+ return ob;
+}
+
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+ enum bch_watermark watermark,
+ struct closure *cl)
+{
+ struct bch_dev_usage usage;
+ struct open_bucket *ob;
+
+ bch2_trans_do(c, NULL, NULL, 0,
+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, watermark,
+ cl, &usage)));