#include "subvolume.h"
#include "super.h"
#include "super-io.h"
+#include "trace.h"
#include <linux/blkdev.h>
#include <linux/prefetch.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
-#include <trace/events/bcachefs.h>
-
const char *bch2_blk_status_to_str(blk_status_t status)
{
if (status == BLK_STS_REMOVED)
struct page *page;
if (likely(!*using_mempool)) {
- page = alloc_page(GFP_NOIO);
+ page = alloc_page(GFP_NOFS);
if (unlikely(!page)) {
mutex_lock(&c->bio_bounce_pages_lock);
*using_mempool = true;
}
} else {
pool_alloc:
- page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
+ page = mempool_alloc(&c->bio_bounce_pages, GFP_NOFS);
}
return page;
bch2_trans_copy_iter(&iter, extent_iter);
- for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, old, ret) {
+ for_each_btree_key_upto_continue_norestart(iter,
+ new->k.p, BTREE_ITER_SLOTS, old, ret) {
s64 sectors = min(new->k.p.offset, old.k->p.offset) -
max(bkey_start_offset(&new->k),
bkey_start_offset(old.k));
unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
int ret;
- bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
- SPOS(0,
- extent_iter->pos.inode,
- extent_iter->snapshot),
- BTREE_ITER_INTENT|BTREE_ITER_CACHED);
- k = bch2_bkey_get_mut(trans, &iter);
+ k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes,
+ SPOS(0,
+ extent_iter->pos.inode,
+ extent_iter->snapshot),
+ BTREE_ITER_CACHED);
ret = PTR_ERR_OR_ZERO(k);
if (unlikely(ret))
- goto err;
+ return ret;
if (unlikely(k->k.type != KEY_TYPE_inode_v3)) {
k = bch2_inode_to_v3(trans, k);
struct open_buckets open_buckets;
struct bkey_s_c k;
struct bkey_buf old, new;
+ unsigned sectors_allocated;
bool have_reservation = false;
bool unwritten = opts.nocow &&
c->sb.version >= bcachefs_metadata_version_unwritten_extents;
closure_init_stack(&cl);
open_buckets.nr = 0;
retry:
+ sectors_allocated = 0;
+
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
if (ret)
&devs_have,
opts.data_replicas,
opts.data_replicas,
- RESERVE_none, 0, &cl, &wp);
- if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
+ BCH_WATERMARK_normal, 0, &cl, &wp);
+ if (ret) {
bch2_trans_unlock(trans);
closure_sync(&cl);
- goto retry;
- }
- if (ret)
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+ goto retry;
return ret;
+ }
sectors = min(sectors, wp->sectors_free);
+ sectors_allocated = sectors;
bch2_key_resize(&e->k, sectors);
goto retry;
}
+ if (!ret && sectors_allocated)
+ bch2_increment_clock(c, sectors_allocated, WRITE);
+
bch2_open_buckets_put(c, &open_buckets);
bch2_disk_reservation_put(c, &disk_res);
bch2_bkey_buf_exit(&new, c);
if (to_entry(ptr + 1) < ptrs.end) {
n = to_wbio(bio_alloc_clone(NULL, &wbio->bio,
- GFP_NOIO, &ca->replica_set));
+ GFP_NOFS, &ca->replica_set));
n->bio.bi_end_io = wbio->bio.bi_end_io;
n->bio.bi_private = wbio->bio.bi_private;
struct bch_fs *c = op->c;
bch2_disk_reservation_put(c, &op->res);
- bch2_write_ref_put(c, BCH_WRITE_REF_write);
+ if (!(op->flags & BCH_WRITE_MOVE))
+ bch2_write_ref_put(c, BCH_WRITE_REF_write);
bch2_keylist_free(&op->insert_keys, op->inline_keys);
bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct write_point *wp = op->wp;
struct workqueue_struct *wq = index_update_wq(op);
+ unsigned long flags;
if ((op->flags & BCH_WRITE_DONE) &&
(op->flags & BCH_WRITE_MOVE))
bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
- barrier();
-
- /*
- * We're not using wp->writes_lock here, so this is racey: that's ok,
- * because this is just for diagnostic purposes, and we're running out
- * of interrupt context here so if we were to take the log we'd have to
- * switch to spin_lock_irq()/irqsave(), which is not free:
- */
+ spin_lock_irqsave(&wp->writes_lock, flags);
if (wp->state == WRITE_POINT_waiting_io)
__wp_update_state(wp, WRITE_POINT_waiting_work);
+ list_add_tail(&op->wp_list, &wp->writes);
+ spin_unlock_irqrestore (&wp->writes_lock, flags);
- op->btree_update_ready = true;
queue_work(wq, &wp->index_update_work);
}
static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp)
{
- op->btree_update_ready = false;
op->wp = wp;
- spin_lock(&wp->writes_lock);
- list_add_tail(&op->wp_list, &wp->writes);
- if (wp->state == WRITE_POINT_stopped)
+ if (wp->state == WRITE_POINT_stopped) {
+ spin_lock_irq(&wp->writes_lock);
__wp_update_state(wp, WRITE_POINT_waiting_io);
- spin_unlock(&wp->writes_lock);
+ spin_unlock_irq(&wp->writes_lock);
+ }
}
void bch2_write_point_do_index_updates(struct work_struct *work)
struct bch_write_op *op;
while (1) {
- spin_lock(&wp->writes_lock);
- list_for_each_entry(op, &wp->writes, wp_list)
- if (op->btree_update_ready) {
- list_del(&op->wp_list);
- goto unlock;
- }
- op = NULL;
-unlock:
+ spin_lock_irq(&wp->writes_lock);
+ op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
+ if (op)
+ list_del(&op->wp_list);
wp_update_state(wp, op != NULL);
- spin_unlock(&wp->writes_lock);
+ spin_unlock_irq(&wp->writes_lock);
if (!op)
break;
pages = min(pages, BIO_MAX_VECS);
bio = bio_alloc_bioset(NULL, pages, 0,
- GFP_NOIO, &c->bio_write);
+ GFP_NOFS, &c->bio_write);
wbio = wbio_init(bio);
wbio->put_bio = true;
/* copy WRITE_SYNC flag */
BUG_ON(total_output != total_input);
dst = bio_split(src, total_input >> 9,
- GFP_NOIO, &c->bio_write);
+ GFP_NOFS, &c->bio_write);
wbio_init(dst)->put_bio = true;
/* copy WRITE_SYNC flag */
dst->bi_opf = src->bi_opf;
return 0;
}
- new = bch2_bkey_make_mut(trans, k);
+ new = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
struct btree_iter iter;
struct bkey_s_c k;
struct bkey_ptrs_c ptrs;
- const struct bch_extent_ptr *ptr, *ptr2;
+ const struct bch_extent_ptr *ptr;
struct {
struct bpos b;
unsigned gen;
bucket_to_u64(buckets[nr_buckets].b));
prefetch(buckets[nr_buckets].l);
- nr_buckets++;
if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
goto err_get_ioref;
+ nr_buckets++;
+
if (ptr->unwritten)
op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
}
}
return;
err_get_ioref:
- bkey_for_each_ptr(ptrs, ptr2) {
- if (ptr2 == ptr)
- break;
-
- percpu_ref_put(&bch_dev_bkey_exists(c, ptr2->dev)->io_ref);
- }
+ for (i = 0; i < nr_buckets; i++)
+ percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
/* Fall back to COW path: */
goto out;
bch2_bucket_nocow_unlock(&c->nocow_locks,
buckets[i].b,
BUCKET_NOCOW_LOCK_UPDATE);
-
- bkey_for_each_ptr(ptrs, ptr2)
- percpu_ref_put(&bch_dev_bkey_exists(c, ptr2->dev)->io_ref);
+ for (i = 0; i < nr_buckets; i++)
+ percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
/* We can retry this: */
- ret = BCH_ERR_transaction_restart;
+ ret = -BCH_ERR_transaction_restart;
goto out;
}
}
again:
memset(&op->failed, 0, sizeof(op->failed));
- op->btree_update_ready = false;
do {
struct bkey_i *key_to_write;
&op->devs_have,
op->nr_replicas,
op->nr_replicas_required,
- op->alloc_reserve,
+ op->watermark,
op->flags,
(op->flags & (BCH_WRITE_ALLOC_NOWAIT|
BCH_WRITE_ONLY_SPECIFIED_DEVS))
goto err;
}
- if (c->opts.nochanges ||
+ if (c->opts.nochanges) {
+ op->error = -BCH_ERR_erofs_no_writes;
+ goto err;
+ }
+
+ if (!(op->flags & BCH_WRITE_MOVE) &&
!bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
op->error = -BCH_ERR_erofs_no_writes;
goto err;
op->end_io(op);
}
+static const char * const bch2_write_flags[] = {
+#define x(f) #f,
+ BCH_WRITE_FLAGS()
+#undef x
+ NULL
+};
+
+void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
+{
+ prt_str(out, "pos: ");
+ bch2_bpos_to_text(out, op->pos);
+ prt_newline(out);
+ printbuf_indent_add(out, 2);
+
+ prt_str(out, "started: ");
+ bch2_pr_time_units(out, local_clock() - op->start_time);
+ prt_newline(out);
+
+ prt_str(out, "flags: ");
+ prt_bitflags(out, bch2_write_flags, op->flags);
+ prt_newline(out);
+
+ prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl));
+ prt_newline(out);
+
+ printbuf_indent_sub(out, 2);
+}
+
/* Cache promotion on read */
struct promote_op {
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
return NULL;
- op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
+ op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS);
if (!op)
goto err;
*/
*rbio = kzalloc(sizeof(struct bch_read_bio) +
sizeof(struct bio_vec) * pages,
- GFP_NOIO);
+ GFP_NOFS);
if (!*rbio)
goto err;
bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
- GFP_NOIO))
+ GFP_NOFS))
goto err;
(*rbio)->bounce = true;
.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
},
btree_id, k);
- if (ret == -BCH_ERR_nocow_lock_blocked) {
+ /*
+ * possible errors: -BCH_ERR_nocow_lock_blocked,
+ * -BCH_ERR_ENOSPC_disk_reservation:
+ */
+ if (ret) {
ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
bch_promote_params);
BUG_ON(ret);
goto err;
}
- BUG_ON(ret);
op->write.op.end_io = promote_done;
return op;
if (crc_is_compressed(rbio->pick.crc))
return 0;
- bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->data_pos,
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(&iter);
+ k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
if ((ret = bkey_err(k)))
goto out;
reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
*offset_into_extent;
- bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink,
- POS(0, reflink_offset),
- BTREE_ITER_SLOTS);
- k = bch2_btree_iter_peek_slot(&iter);
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink,
+ POS(0, reflink_offset), 0);
ret = bkey_err(k);
if (ret)
goto err;
rbio = rbio_init(bio_alloc_bioset(NULL,
DIV_ROUND_UP(sectors, PAGE_SECTORS),
0,
- GFP_NOIO,
+ GFP_NOFS,
&c->bio_read_split),
orig->opts);
* from the whole bio, in which case we don't want to retry and
* lose the error)
*/
- rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOIO,
+ rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
&c->bio_read_split),
orig->opts);
rbio->bio.bi_iter = iter;
int bch2_fs_io_init(struct bch_fs *c)
{
if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
- BIOSET_NEED_BVECS) ||
- bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
- BIOSET_NEED_BVECS) ||
- bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
- BIOSET_NEED_BVECS) ||
- mempool_init_page_pool(&c->bio_bounce_pages,
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_bio_read_init;
+
+ if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_bio_read_split_init;
+
+ if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
+ BIOSET_NEED_BVECS))
+ return -BCH_ERR_ENOMEM_bio_write_init;
+
+ if (mempool_init_page_pool(&c->bio_bounce_pages,
max_t(unsigned,
c->opts.btree_node_size,
c->opts.encoded_extent_max) /
- PAGE_SIZE, 0) ||
- rhashtable_init(&c->promote_table, &bch_promote_params))
- return -ENOMEM;
+ PAGE_SIZE, 0))
+ return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
+
+ if (rhashtable_init(&c->promote_table, &bch_promote_params))
+ return -BCH_ERR_ENOMEM_promote_table_init;
return 0;
}