]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/io.c
Update bcachefs sources to 504729f99c bcachefs: Allow answering y or n to all fsck...
[bcachefs-tools-debian] / libbcachefs / io.c
index 64925db22cdc937419881e5ed1ae78c4a5a1f3ad..c0371e23a4bc0f1cd9abc445a572b4e98a1cca94 100644 (file)
@@ -151,11 +151,11 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
 void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
 {
        struct bvec_iter_all iter;
-       struct bio_vec *bv;
+       struct bio_vec bv;
 
        bio_for_each_segment_all(bv, bio, iter)
-               if (bv->bv_page != ZERO_PAGE(0))
-                       mempool_free(bv->bv_page, &c->bio_bounce_pages);
+               if (bv.bv_page != ZERO_PAGE(0))
+                       mempool_free(bv.bv_page, &c->bio_bounce_pages);
        bio->bi_vcnt = 0;
 }
 
@@ -218,7 +218,8 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
 
        bch2_trans_copy_iter(&iter, extent_iter);
 
-       for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, old, ret) {
+       for_each_btree_key_upto_continue_norestart(iter,
+                               new->k.p, BTREE_ITER_SLOTS, old, ret) {
                s64 sectors = min(new->k.p.offset, old.k->p.offset) -
                        max(bkey_start_offset(&new->k),
                            bkey_start_offset(old.k));
@@ -384,6 +385,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
        struct open_buckets open_buckets;
        struct bkey_s_c k;
        struct bkey_buf old, new;
+       unsigned sectors_allocated;
        bool have_reservation = false;
        bool unwritten = opts.nocow &&
            c->sb.version >= bcachefs_metadata_version_unwritten_extents;
@@ -394,6 +396,8 @@ int bch2_extent_fallocate(struct btree_trans *trans,
        closure_init_stack(&cl);
        open_buckets.nr = 0;
 retry:
+       sectors_allocated = 0;
+
        k = bch2_btree_iter_peek_slot(iter);
        ret = bkey_err(k);
        if (ret)
@@ -450,15 +454,16 @@ retry:
                                opts.data_replicas,
                                opts.data_replicas,
                                RESERVE_none, 0, &cl, &wp);
-               if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
+               if (ret) {
                        bch2_trans_unlock(trans);
                        closure_sync(&cl);
-                       goto retry;
-               }
-               if (ret)
+                       if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+                               goto retry;
                        return ret;
+               }
 
                sectors = min(sectors, wp->sectors_free);
+               sectors_allocated = sectors;
 
                bch2_key_resize(&e->k, sectors);
 
@@ -485,6 +490,9 @@ out:
                goto retry;
        }
 
+       if (!ret && sectors_allocated)
+               bch2_increment_clock(c, sectors_allocated, WRITE);
+
        bch2_open_buckets_put(c, &open_buckets);
        bch2_disk_reservation_put(c, &disk_res);
        bch2_bkey_buf_exit(&new, c);
@@ -705,7 +713,8 @@ static void bch2_write_done(struct closure *cl)
        struct bch_fs *c = op->c;
 
        bch2_disk_reservation_put(c, &op->res);
-       bch2_write_ref_put(c, BCH_WRITE_REF_write);
+       if (!(op->flags & BCH_WRITE_MOVE))
+               bch2_write_ref_put(c, BCH_WRITE_REF_write);
        bch2_keylist_free(&op->insert_keys, op->inline_keys);
 
        bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time);
@@ -734,7 +743,7 @@ static noinline int bch2_write_drop_io_error_ptrs(struct bch_write_op *op)
                }
 
                if (dst != src)
-                       memmove_u64s_down(dst, src, src->u64s);
+                       memmove_u64s_down(dst, src, src->k.u64s);
                dst = bkey_next(dst);
        }
 
@@ -834,22 +843,32 @@ static void bch2_write_index(struct closure *cl)
        struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
        struct write_point *wp = op->wp;
        struct workqueue_struct *wq = index_update_wq(op);
+       unsigned long flags;
 
-       barrier();
+       if ((op->flags & BCH_WRITE_DONE) &&
+           (op->flags & BCH_WRITE_MOVE))
+               bch2_bio_free_pages_pool(op->c, &op->wbio.bio);
 
-       /*
-        * We're not using wp->writes_lock here, so this is racey: that's ok,
-        * because this is just for diagnostic purposes, and we're running out
-        * of interrupt context here so if we were to take the log we'd have to
-        * switch to spin_lock_irq()/irqsave(), which is not free:
-        */
+       spin_lock_irqsave(&wp->writes_lock, flags);
        if (wp->state == WRITE_POINT_waiting_io)
                __wp_update_state(wp, WRITE_POINT_waiting_work);
+       list_add_tail(&op->wp_list, &wp->writes);
+       spin_unlock_irqrestore (&wp->writes_lock, flags);
 
-       op->btree_update_ready = true;
        queue_work(wq, &wp->index_update_work);
 }
 
+static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp)
+{
+       op->wp = wp;
+
+       if (wp->state == WRITE_POINT_stopped) {
+               spin_lock_irq(&wp->writes_lock);
+               __wp_update_state(wp, WRITE_POINT_waiting_io);
+               spin_unlock_irq(&wp->writes_lock);
+       }
+}
+
 void bch2_write_point_do_index_updates(struct work_struct *work)
 {
        struct write_point *wp =
@@ -857,16 +876,12 @@ void bch2_write_point_do_index_updates(struct work_struct *work)
        struct bch_write_op *op;
 
        while (1) {
-               spin_lock(&wp->writes_lock);
-               list_for_each_entry(op, &wp->writes, wp_list)
-                       if (op->btree_update_ready) {
-                               list_del(&op->wp_list);
-                               goto unlock;
-                       }
-               op = NULL;
-unlock:
+               spin_lock_irq(&wp->writes_lock);
+               op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list);
+               if (op)
+                       list_del(&op->wp_list);
                wp_update_state(wp, op != NULL);
-               spin_unlock(&wp->writes_lock);
+               spin_unlock_irq(&wp->writes_lock);
 
                if (!op)
                        break;
@@ -1467,7 +1482,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
        struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_ptrs_c ptrs;
-       const struct bch_extent_ptr *ptr, *ptr2;
+       const struct bch_extent_ptr *ptr;
        struct {
                struct bpos     b;
                unsigned        gen;
@@ -1522,11 +1537,12 @@ retry:
                                                  bucket_to_u64(buckets[nr_buckets].b));
 
                        prefetch(buckets[nr_buckets].l);
-                       nr_buckets++;
 
                        if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
                                goto err_get_ioref;
 
+                       nr_buckets++;
+
                        if (ptr->unwritten)
                                op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
                }
@@ -1617,12 +1633,8 @@ err:
        }
        return;
 err_get_ioref:
-       bkey_for_each_ptr(ptrs, ptr2) {
-               if (ptr2 == ptr)
-                       break;
-
-               percpu_ref_put(&bch_dev_bkey_exists(c, ptr2->dev)->io_ref);
-       }
+       for (i = 0; i < nr_buckets; i++)
+               percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
 
        /* Fall back to COW path: */
        goto out;
@@ -1631,9 +1643,8 @@ err_bucket_stale:
                bch2_bucket_nocow_unlock(&c->nocow_locks,
                                         buckets[i].b,
                                         BUCKET_NOCOW_LOCK_UPDATE);
-
-       bkey_for_each_ptr(ptrs, ptr2)
-               percpu_ref_put(&bch_dev_bkey_exists(c, ptr2->dev)->io_ref);
+       for (i = 0; i < nr_buckets; i++)
+               percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
 
        /* We can retry this: */
        ret = BCH_ERR_transaction_restart;
@@ -1650,14 +1661,13 @@ static void __bch2_write(struct bch_write_op *op)
 
        nofs_flags = memalloc_nofs_save();
 
-       if (unlikely(op->opts.nocow)) {
+       if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) {
                bch2_nocow_write(op);
                if (op->flags & BCH_WRITE_DONE)
                        goto out_nofs_restore;
        }
 again:
        memset(&op->failed, 0, sizeof(op->failed));
-       op->btree_update_ready = false;
 
        do {
                struct bkey_i *key_to_write;
@@ -1708,15 +1718,6 @@ again:
                bch2_alloc_sectors_done_inlined(c, wp);
 err:
                if (ret <= 0) {
-                       if (!(op->flags & BCH_WRITE_SYNC)) {
-                               spin_lock(&wp->writes_lock);
-                               op->wp = wp;
-                               list_add_tail(&op->wp_list, &wp->writes);
-                               if (wp->state == WRITE_POINT_stopped)
-                                       __wp_update_state(wp, WRITE_POINT_waiting_io);
-                               spin_unlock(&wp->writes_lock);
-                       }
-
                        op->flags |= BCH_WRITE_DONE;
 
                        if (ret < 0) {
@@ -1755,6 +1756,7 @@ err:
                        goto again;
                bch2_write_done(&op->cl);
        } else {
+               bch2_write_queue(op, wp);
                continue_at(&op->cl, bch2_write_index, NULL);
        }
 out_nofs_restore:
@@ -1845,7 +1847,12 @@ void bch2_write(struct closure *cl)
                goto err;
        }
 
-       if (c->opts.nochanges ||
+       if (c->opts.nochanges) {
+               op->error = -BCH_ERR_erofs_no_writes;
+               goto err;
+       }
+
+       if (!(op->flags & BCH_WRITE_MOVE) &&
            !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) {
                op->error = -BCH_ERR_erofs_no_writes;
                goto err;
@@ -1873,6 +1880,34 @@ err:
                op->end_io(op);
 }
 
+const char * const bch2_write_flags[] = {
+#define x(f)   #f,
+       BCH_WRITE_FLAGS()
+#undef x
+       NULL
+};
+
+void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
+{
+       prt_str(out, "pos: ");
+       bch2_bpos_to_text(out, op->pos);
+       prt_newline(out);
+       printbuf_indent_add(out, 2);
+
+       prt_str(out, "started: ");
+       bch2_pr_time_units(out, local_clock() - op->start_time);
+       prt_newline(out);
+
+       prt_str(out, "flags: ");
+       prt_bitflags(out, bch2_write_flags, op->flags);
+       prt_newline(out);
+
+       prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl));
+       prt_newline(out);
+
+       printbuf_indent_sub(out, 2);
+}
+
 /* Cache promotion on read */
 
 struct promote_op {
@@ -2998,18 +3033,26 @@ void bch2_fs_io_exit(struct bch_fs *c)
 int bch2_fs_io_init(struct bch_fs *c)
 {
        if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
-                       BIOSET_NEED_BVECS) ||
-           bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
-                       BIOSET_NEED_BVECS) ||
-           bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
-                       BIOSET_NEED_BVECS) ||
-           mempool_init_page_pool(&c->bio_bounce_pages,
+                       BIOSET_NEED_BVECS))
+               return -BCH_ERR_ENOMEM_bio_read_init;
+
+       if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
+                       BIOSET_NEED_BVECS))
+               return -BCH_ERR_ENOMEM_bio_read_split_init;
+
+       if (bioset_init(&c->bio_write, 1, offsetof(struct bch_write_bio, bio),
+                       BIOSET_NEED_BVECS))
+               return -BCH_ERR_ENOMEM_bio_write_init;
+
+       if (mempool_init_page_pool(&c->bio_bounce_pages,
                                   max_t(unsigned,
                                         c->opts.btree_node_size,
                                         c->opts.encoded_extent_max) /
-                                  PAGE_SIZE, 0) ||
-           rhashtable_init(&c->promote_table, &bch_promote_params))
-               return -ENOMEM;
+                                  PAGE_SIZE, 0))
+               return -BCH_ERR_ENOMEM_bio_bounce_pages_init;
+
+       if (rhashtable_init(&c->promote_table, &bch_promote_params))
+               return -BCH_ERR_ENOMEM_promote_table_init;
 
        return 0;
 }