]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/io.c
Update bcachefs sources to ee560a3929 bcachefs: Print version, options earlier in...
[bcachefs-tools-debian] / libbcachefs / io.c
index d11feb100d7ec928e5a4bc3eac60c7693531620f..5bacc6a9dd8fb57cc6e58610db92b006fb0e3f80 100644 (file)
 #include "subvolume.h"
 #include "super.h"
 #include "super-io.h"
+#include "trace.h"
 
 #include <linux/blkdev.h>
 #include <linux/prefetch.h>
 #include <linux/random.h>
 #include <linux/sched/mm.h>
 
-#include <trace/events/bcachefs.h>
-
 const char *bch2_blk_status_to_str(blk_status_t status)
 {
        if (status == BLK_STS_REMOVED)
@@ -164,7 +163,7 @@ static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
        struct page *page;
 
        if (likely(!*using_mempool)) {
-               page = alloc_page(GFP_NOIO);
+               page = alloc_page(GFP_NOFS);
                if (unlikely(!page)) {
                        mutex_lock(&c->bio_bounce_pages_lock);
                        *using_mempool = true;
@@ -173,7 +172,7 @@ static struct page *__bio_alloc_page_pool(struct bch_fs *c, bool *using_mempool)
                }
        } else {
 pool_alloc:
-               page = mempool_alloc(&c->bio_bounce_pages, GFP_NOIO);
+               page = mempool_alloc(&c->bio_bounce_pages, GFP_NOFS);
        }
 
        return page;
@@ -258,15 +257,14 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans,
        unsigned inode_update_flags = BTREE_UPDATE_NOJOURNAL;
        int ret;
 
-       bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
-                            SPOS(0,
-                                 extent_iter->pos.inode,
-                                 extent_iter->snapshot),
-                            BTREE_ITER_INTENT|BTREE_ITER_CACHED);
-       k = bch2_bkey_get_mut(trans, &iter);
+       k = bch2_bkey_get_mut_noupdate(trans, &iter, BTREE_ID_inodes,
+                             SPOS(0,
+                                  extent_iter->pos.inode,
+                                  extent_iter->snapshot),
+                             BTREE_ITER_CACHED);
        ret = PTR_ERR_OR_ZERO(k);
        if (unlikely(ret))
-               goto err;
+               return ret;
 
        if (unlikely(k->k.type != KEY_TYPE_inode_v3)) {
                k = bch2_inode_to_v3(trans, k);
@@ -385,6 +383,7 @@ int bch2_extent_fallocate(struct btree_trans *trans,
        struct open_buckets open_buckets;
        struct bkey_s_c k;
        struct bkey_buf old, new;
+       unsigned sectors_allocated;
        bool have_reservation = false;
        bool unwritten = opts.nocow &&
            c->sb.version >= bcachefs_metadata_version_unwritten_extents;
@@ -395,6 +394,8 @@ int bch2_extent_fallocate(struct btree_trans *trans,
        closure_init_stack(&cl);
        open_buckets.nr = 0;
 retry:
+       sectors_allocated = 0;
+
        k = bch2_btree_iter_peek_slot(iter);
        ret = bkey_err(k);
        if (ret)
@@ -450,16 +451,17 @@ retry:
                                &devs_have,
                                opts.data_replicas,
                                opts.data_replicas,
-                               RESERVE_none, 0, &cl, &wp);
-               if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
+                               BCH_WATERMARK_normal, 0, &cl, &wp);
+               if (ret) {
                        bch2_trans_unlock(trans);
                        closure_sync(&cl);
-                       goto retry;
-               }
-               if (ret)
+                       if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
+                               goto retry;
                        return ret;
+               }
 
                sectors = min(sectors, wp->sectors_free);
+               sectors_allocated = sectors;
 
                bch2_key_resize(&e->k, sectors);
 
@@ -486,6 +488,9 @@ out:
                goto retry;
        }
 
+       if (!ret && sectors_allocated)
+               bch2_increment_clock(c, sectors_allocated, WRITE);
+
        bch2_open_buckets_put(c, &open_buckets);
        bch2_disk_reservation_put(c, &disk_res);
        bch2_bkey_buf_exit(&new, c);
@@ -655,7 +660,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
 
                if (to_entry(ptr + 1) < ptrs.end) {
                        n = to_wbio(bio_alloc_clone(NULL, &wbio->bio,
-                                               GFP_NOIO, &ca->replica_set));
+                                               GFP_NOFS, &ca->replica_set));
 
                        n->bio.bi_end_io        = wbio->bio.bi_end_io;
                        n->bio.bi_private       = wbio->bio.bi_private;
@@ -971,7 +976,7 @@ static struct bio *bch2_write_bio_alloc(struct bch_fs *c,
        pages = min(pages, BIO_MAX_VECS);
 
        bio = bio_alloc_bioset(NULL, pages, 0,
-                              GFP_NOIO, &c->bio_write);
+                              GFP_NOFS, &c->bio_write);
        wbio                    = wbio_init(bio);
        wbio->put_bio           = true;
        /* copy WRITE_SYNC flag */
@@ -1073,11 +1078,12 @@ static enum prep_encoded_ret {
        /* Can we just write the entire extent as is? */
        if (op->crc.uncompressed_size == op->crc.live_size &&
            op->crc.compressed_size <= wp->sectors_free &&
-           (op->crc.compression_type == op->compression_type ||
+           (op->crc.compression_type == bch2_compression_opt_to_type(op->compression_opt) ||
             op->incompressible)) {
                if (!crc_is_compressed(op->crc) &&
                    op->csum_type != op->crc.csum_type &&
-                   bch2_write_rechecksum(c, op, op->csum_type))
+                   bch2_write_rechecksum(c, op, op->csum_type) &&
+                   !c->opts.no_data_io)
                        return PREP_ENCODED_CHECKSUM_ERR;
 
                return PREP_ENCODED_DO_WRITE;
@@ -1097,7 +1103,7 @@ static enum prep_encoded_ret {
                csum = bch2_checksum_bio(c, op->crc.csum_type,
                                         extent_nonce(op->version, op->crc),
                                         bio);
-               if (bch2_crc_cmp(op->crc.csum, csum))
+               if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
                        return PREP_ENCODED_CHECKSUM_ERR;
 
                if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
@@ -1115,13 +1121,14 @@ static enum prep_encoded_ret {
         */
        if ((op->crc.live_size != op->crc.uncompressed_size ||
             op->crc.csum_type != op->csum_type) &&
-           bch2_write_rechecksum(c, op, op->csum_type))
+           bch2_write_rechecksum(c, op, op->csum_type) &&
+           !c->opts.no_data_io)
                return PREP_ENCODED_CHECKSUM_ERR;
 
        /*
         * If we want to compress the data, it has to be decrypted:
         */
-       if ((op->compression_type ||
+       if ((op->compression_opt ||
             bch2_csum_type_is_encryption(op->crc.csum_type) !=
             bch2_csum_type_is_encryption(op->csum_type)) &&
            bch2_write_decrypt(op))
@@ -1168,7 +1175,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
        }
 
        if (ec_buf ||
-           op->compression_type ||
+           op->compression_opt ||
            (op->csum_type &&
             !(op->flags & BCH_WRITE_PAGES_STABLE)) ||
            (bch2_csum_type_is_encryption(op->csum_type) &&
@@ -1191,16 +1198,16 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
                    dst->bi_iter.bi_size < c->opts.encoded_extent_max)
                        break;
 
-               BUG_ON(op->compression_type &&
+               BUG_ON(op->compression_opt &&
                       (op->flags & BCH_WRITE_DATA_ENCODED) &&
                       bch2_csum_type_is_encryption(op->crc.csum_type));
-               BUG_ON(op->compression_type && !bounce);
+               BUG_ON(op->compression_opt && !bounce);
 
                crc.compression_type = op->incompressible
                        ? BCH_COMPRESSION_TYPE_incompressible
-                       : op->compression_type
+                       : op->compression_opt
                        ? bch2_bio_compress(c, dst, &dst_len, src, &src_len,
-                                           op->compression_type)
+                                           op->compression_opt)
                        : 0;
                if (!crc_is_compressed(crc)) {
                        dst_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
@@ -1309,7 +1316,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
                BUG_ON(total_output != total_input);
 
                dst = bio_split(src, total_input >> 9,
-                               GFP_NOIO, &c->bio_write);
+                               GFP_NOFS, &c->bio_write);
                wbio_init(dst)->put_bio = true;
                /* copy WRITE_SYNC flag */
                dst->bi_opf             = src->bi_opf;
@@ -1388,7 +1395,7 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans,
                return 0;
        }
 
-       new = bch2_bkey_make_mut(trans, k);
+       new = bch2_bkey_make_mut_noupdate(trans, k);
        ret = PTR_ERR_OR_ZERO(new);
        if (ret)
                return ret;
@@ -1475,7 +1482,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
        struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_ptrs_c ptrs;
-       const struct bch_extent_ptr *ptr, *ptr2;
+       const struct bch_extent_ptr *ptr;
        struct {
                struct bpos     b;
                unsigned        gen;
@@ -1530,11 +1537,12 @@ retry:
                                                  bucket_to_u64(buckets[nr_buckets].b));
 
                        prefetch(buckets[nr_buckets].l);
-                       nr_buckets++;
 
                        if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
                                goto err_get_ioref;
 
+                       nr_buckets++;
+
                        if (ptr->unwritten)
                                op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
                }
@@ -1625,12 +1633,8 @@ err:
        }
        return;
 err_get_ioref:
-       bkey_for_each_ptr(ptrs, ptr2) {
-               if (ptr2 == ptr)
-                       break;
-
-               percpu_ref_put(&bch_dev_bkey_exists(c, ptr2->dev)->io_ref);
-       }
+       for (i = 0; i < nr_buckets; i++)
+               percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
 
        /* Fall back to COW path: */
        goto out;
@@ -1639,12 +1643,11 @@ err_bucket_stale:
                bch2_bucket_nocow_unlock(&c->nocow_locks,
                                         buckets[i].b,
                                         BUCKET_NOCOW_LOCK_UPDATE);
-
-       bkey_for_each_ptr(ptrs, ptr2)
-               percpu_ref_put(&bch_dev_bkey_exists(c, ptr2->dev)->io_ref);
+       for (i = 0; i < nr_buckets; i++)
+               percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
 
        /* We can retry this: */
-       ret = BCH_ERR_transaction_restart;
+       ret = -BCH_ERR_transaction_restart;
        goto out;
 }
 
@@ -1695,7 +1698,7 @@ again:
                                &op->devs_have,
                                op->nr_replicas,
                                op->nr_replicas_required,
-                               op->alloc_reserve,
+                               op->watermark,
                                op->flags,
                                (op->flags & (BCH_WRITE_ALLOC_NOWAIT|
                                              BCH_WRITE_ONLY_SPECIFIED_DEVS))
@@ -1877,7 +1880,7 @@ err:
                op->end_io(op);
 }
 
-const char * const bch2_write_flags[] = {
+static const char * const bch2_write_flags[] = {
 #define x(f)   #f,
        BCH_WRITE_FLAGS()
 #undef x
@@ -1889,6 +1892,7 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
        prt_str(out, "pos: ");
        bch2_bpos_to_text(out, op->pos);
        prt_newline(out);
+       printbuf_indent_add(out, 2);
 
        prt_str(out, "started: ");
        bch2_pr_time_units(out, local_clock() - op->start_time);
@@ -1897,6 +1901,11 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op)
        prt_str(out, "flags: ");
        prt_bitflags(out, bch2_write_flags, op->flags);
        prt_newline(out);
+
+       prt_printf(out, "ref: %u", closure_nr_remaining(&op->cl));
+       prt_newline(out);
+
+       printbuf_indent_sub(out, 2);
 }
 
 /* Cache promotion on read */
@@ -2006,7 +2015,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
        if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
                return NULL;
 
-       op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOIO);
+       op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_NOFS);
        if (!op)
                goto err;
 
@@ -2019,7 +2028,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
         */
        *rbio = kzalloc(sizeof(struct bch_read_bio) +
                        sizeof(struct bio_vec) * pages,
-                       GFP_NOIO);
+                       GFP_NOFS);
        if (!*rbio)
                goto err;
 
@@ -2027,7 +2036,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
        bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0);
 
        if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9,
-                                GFP_NOIO))
+                                GFP_NOFS))
                goto err;
 
        (*rbio)->bounce         = true;
@@ -2050,14 +2059,17 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
                                .write_flags    = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
                        },
                        btree_id, k);
-       if (ret == -BCH_ERR_nocow_lock_blocked) {
+       /*
+        * possible errors: -BCH_ERR_nocow_lock_blocked,
+        * -BCH_ERR_ENOSPC_disk_reservation:
+        */
+       if (ret) {
                ret = rhashtable_remove_fast(&c->promote_table, &op->hash,
                                        bch_promote_params);
                BUG_ON(ret);
                goto err;
        }
 
-       BUG_ON(ret);
        op->write.op.end_io = promote_done;
 
        return op;
@@ -2304,9 +2316,8 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
        if (crc_is_compressed(rbio->pick.crc))
                return 0;
 
-       bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->data_pos,
-                            BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek_slot(&iter);
+       k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos,
+                              BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
        if ((ret = bkey_err(k)))
                goto out;
 
@@ -2407,7 +2418,8 @@ static void __bch2_read_endio(struct work_struct *work)
                if (ret)
                        goto decrypt_err;
 
-               if (bch2_bio_uncompress(c, src, dst, dst_iter, crc))
+               if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
+                   !c->opts.no_data_io)
                        goto decompression_err;
        } else {
                /* don't need to decrypt the entire bio: */
@@ -2542,10 +2554,8 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
        reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
                *offset_into_extent;
 
-       bch2_trans_iter_init(trans, &iter, BTREE_ID_reflink,
-                            POS(0, reflink_offset),
-                            BTREE_ITER_SLOTS);
-       k = bch2_btree_iter_peek_slot(&iter);
+       k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink,
+                              POS(0, reflink_offset), 0);
        ret = bkey_err(k);
        if (ret)
                goto err;
@@ -2742,7 +2752,7 @@ get_bio:
                rbio = rbio_init(bio_alloc_bioset(NULL,
                                                  DIV_ROUND_UP(sectors, PAGE_SECTORS),
                                                  0,
-                                                 GFP_NOIO,
+                                                 GFP_NOFS,
                                                  &c->bio_read_split),
                                 orig->opts);
 
@@ -2758,7 +2768,7 @@ get_bio:
                 * from the whole bio, in which case we don't want to retry and
                 * lose the error)
                 */
-               rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOIO,
+               rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS,
                                                 &c->bio_read_split),
                                 orig->opts);
                rbio->bio.bi_iter = iter;