]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
update bcache sources
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 16 Mar 2017 22:51:41 +0000 (14:51 -0800)
committerKent Overstreet <kent.overstreet@gmail.com>
Thu, 16 Mar 2017 22:51:41 +0000 (14:51 -0800)
19 files changed:
.bcache_revision
include/linux/bcache-ioctl.h
include/linux/bcache.h
libbcache/bcache.h
libbcache/btree_gc.c
libbcache/buckets.c
libbcache/chardev.c
libbcache/checksum.c
libbcache/compress.c
libbcache/extents.c
libbcache/fs-io.c
libbcache/io.c
libbcache/io.h
libbcache/io_types.h
libbcache/notify.h
libbcache/request.c
libbcache/super.c
libbcache/super.h
libbcache/util.h

index 434bc959e01fedbf319fc956e3d1d5edcfa6c7d2..72b9b1754eb288c555439eddec1a88e9987d994b 100644 (file)
@@ -1 +1 @@
-BCACHE_REVISION=3ea79179e3101fb50de8730a809d00d189f05be5
+BCACHE_REVISION=84b6390084721a37c0f7a261240093ad659f9a65
index 2d07666c97ce24ba5a31aac3088d267cbd12959a..ca769369f72d4770aec5afa0d0378a8aecbd1adc 100644 (file)
@@ -78,6 +78,14 @@ struct bch_ioctl_disk_set_state {
 #define BCH_REWRITE_RECOMPRESS         (1 << 0)
 #define BCH_REWRITE_DECREASE_REPLICAS  (1 << 1)
 
+enum bch_data_ops {
+       BCH_DATA_SCRUB,
+};
+
+struct bch_data_op {
+       __u8                    type;
+};
+
 struct bch_ioctl_data {
        __u32                   flags;
        __u32                   pad;
index f4c2f275bf780247cb8f4dec9fa66535ee6da884..c221747b4a4b3730c497cda8e2aad84bd9632093 100644 (file)
@@ -886,6 +886,10 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N,     struct bch_sb_field_crypt, kdf_flags,  0, 16);
 LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
 LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
 
+struct bch_sb_field_replication {
+       struct bch_sb_field     field;
+};
+
 /*
  * @offset     - sector where this sb was written
  * @version    - on disk format version
index 80d789acb80eace2d2b6a35fdcdfd1e44ce7435d..1d0e998c53ed0af58674a70654bcba8a1e8f1987 100644 (file)
@@ -716,8 +716,6 @@ struct bch_fs {
        void                    *zlib_workspace;
        struct mutex            zlib_workspace_lock;
        mempool_t               compression_bounce[2];
-       struct bio_decompress_worker __percpu
-                               *bio_decompress_worker;
 
        struct crypto_blkcipher *chacha20;
        struct crypto_shash     *poly1305;
index 9fa4a2a49cf191f5ece2c87d0470f3eaa4395ff7..5270d442ef90afa31fa624aced38e958074f219a 100644 (file)
@@ -933,14 +933,14 @@ int bch_initial_gc(struct bch_fs *c, struct list_head *journal)
 {
        enum btree_id id;
 
-       bch_mark_metadata(c);
-
        for (id = 0; id < BTREE_ID_NR; id++)
                bch_initial_gc_btree(c, id);
 
        if (journal)
                bch_journal_mark(c, journal);
 
+       bch_mark_metadata(c);
+
        /*
         * Skip past versions that might have possibly been used (as nonces),
         * but hadn't had their pointers written:
index a28d493035882ebb05a900b9f7d64249518f909f..7be943d142cab98828d60c3da1ae2334b9cd308f 100644 (file)
@@ -462,7 +462,7 @@ static void bch_mark_pointer(struct bch_fs *c,
                 * the allocator invalidating a bucket after we've already
                 * checked the gen
                 */
-               if (gen_after(old.gen, ptr->gen)) {
+               if (gen_after(new.gen, ptr->gen)) {
                        EBUG_ON(type != S_CACHED &&
                                test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
                        return;
@@ -470,7 +470,7 @@ static void bch_mark_pointer(struct bch_fs *c,
 
                EBUG_ON(type != S_CACHED &&
                        !may_make_unavailable &&
-                       is_available_bucket(old) &&
+                       is_available_bucket(new) &&
                        test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
 
                if (type != S_CACHED &&
index c764a9d0121fd49868b5b0fc3650a49c19c86cb3..da6d827f6dc29b258d6ea4ece56670f0814af890 100644 (file)
@@ -201,7 +201,6 @@ static long bch_ioctl_disk_remove(struct bch_fs *c,
 {
        struct bch_ioctl_disk arg;
        struct bch_dev *ca;
-       int ret;
 
        if (copy_from_user(&arg, user_arg, sizeof(arg)))
                return -EFAULT;
@@ -210,10 +209,7 @@ static long bch_ioctl_disk_remove(struct bch_fs *c,
        if (IS_ERR(ca))
                return PTR_ERR(ca);
 
-       ret = bch_dev_remove(c, ca, arg.flags);
-
-       percpu_ref_put(&ca->ref);
-       return ret;
+       return bch_dev_remove(c, ca, arg.flags);
 }
 
 static long bch_ioctl_disk_online(struct bch_fs *c,
@@ -294,7 +290,7 @@ static long bch_ioctl_disk_evacuate(struct bch_fs *c,
        if (IS_ERR(ca))
                return PTR_ERR(ca);
 
-       ret = bch_dev_migrate(c, ca);
+       ret = bch_dev_evacuate(c, ca);
 
        percpu_ref_put(&ca->ref);
        return ret;
@@ -384,12 +380,11 @@ void bch_chardev_exit(void)
 {
        if (!IS_ERR_OR_NULL(bch_chardev_class))
                device_destroy(bch_chardev_class,
-                              MKDEV(bch_chardev_major, 0));
+                              MKDEV(bch_chardev_major, 255));
        if (!IS_ERR_OR_NULL(bch_chardev_class))
                class_destroy(bch_chardev_class);
        if (bch_chardev_major > 0)
                unregister_chrdev(bch_chardev_major, "bcache");
-
 }
 
 int __init bch_chardev_init(void)
index b3fbeb114f2af02f063b632e93189900a880375f..b96050dbb381fbf96478910a0fe525b86d7aadfc 100644 (file)
@@ -292,9 +292,8 @@ struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type,
        case BCH_CSUM_CRC64: {
                u64 crc = bch_checksum_init(type);
 
-               bio_for_each_segment(bv, bio, iter) {
+               bio_for_each_contig_segment(bv, bio, iter) {
                        void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-
                        crc = bch_checksum_update(type,
                                crc, p, bv.bv_len);
                        kunmap_atomic(p);
@@ -312,7 +311,7 @@ struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type,
 
                gen_poly_key(c, desc, nonce);
 
-               bio_for_each_segment(bv, bio, iter) {
+               bio_for_each_contig_segment(bv, bio, iter) {
                        void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
 
                        crypto_shash_update(desc, p, bv.bv_len);
@@ -342,7 +341,7 @@ void bch_encrypt_bio(struct bch_fs *c, unsigned type,
 
        sg_init_table(sgl, ARRAY_SIZE(sgl));
 
-       bio_for_each_segment(bv, bio, iter) {
+       bio_for_each_contig_segment(bv, bio, iter) {
                if (sg == sgl + ARRAY_SIZE(sgl)) {
                        sg_mark_end(sg - 1);
                        do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
index d6a345cb0dbeef1ea894e16ab1ec61f1867486ce..d9a64c381a0dabbe81afbe41605bc15f45eccc68 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/zlib.h>
 
 enum bounced {
+       BOUNCED_CONTIG,
        BOUNCED_MAPPED,
        BOUNCED_KMALLOCED,
        BOUNCED_VMALLOCED,
@@ -54,6 +55,14 @@ static void *__bio_map_or_bounce(struct bch_fs *c,
 
        BUG_ON(bvec_iter_sectors(start) > BCH_ENCODED_EXTENT_MAX);
 
+#ifndef CONFIG_HIGHMEM
+       *bounced = BOUNCED_CONTIG;
+
+       __bio_for_each_contig_segment(bv, bio, iter, start) {
+               if (bv.bv_len == start.bi_size)
+                       return page_address(bv.bv_page) + bv.bv_offset;
+       }
+#endif
        *bounced = BOUNCED_MAPPED;
 
        __bio_for_each_segment(bv, bio, iter, start) {
@@ -443,7 +452,6 @@ void bch_fs_compress_exit(struct bch_fs *c)
        mempool_exit(&c->lz4_workspace_pool);
        mempool_exit(&c->compression_bounce[WRITE]);
        mempool_exit(&c->compression_bounce[READ]);
-       free_percpu(c->bio_decompress_worker);
 }
 
 #define COMPRESSION_WORKSPACE_SIZE                                     \
@@ -453,22 +461,7 @@ void bch_fs_compress_exit(struct bch_fs *c)
 int bch_fs_compress_init(struct bch_fs *c)
 {
        unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9);
-       int ret, cpu;
-
-       if (!c->bio_decompress_worker) {
-               c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
-               if (!c->bio_decompress_worker)
-                       return -ENOMEM;
-
-               for_each_possible_cpu(cpu) {
-                       struct bio_decompress_worker *d =
-                               per_cpu_ptr(c->bio_decompress_worker, cpu);
-
-                       d->c = c;
-                       INIT_WORK(&d->work, bch_bio_decompress_work);
-                       init_llist_head(&d->bio_list);
-               }
-       }
+       int ret;
 
        if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
            !bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
index 76b55f64813d8acafe73e4d9286454ed96dfe2a0..4b422fb1661bf3d4fe9c504624349858c165c9c7 100644 (file)
@@ -322,9 +322,7 @@ static bool should_drop_ptr(const struct bch_fs *c,
                            struct bkey_s_c_extent e,
                            const struct bch_extent_ptr *ptr)
 {
-       struct bch_dev *ca = c->devs[ptr->dev];
-
-       return ptr_stale(ca, ptr);
+       return ptr->cached && ptr_stale(c->devs[ptr->dev], ptr);
 }
 
 static void bch_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
@@ -2153,7 +2151,7 @@ void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k,
                extent_for_each_ptr_crc(e, ptr, crc) {
                        struct bch_dev *ca = c->devs[ptr->dev];
 
-                       if (ptr_stale(ca, ptr))
+                       if (ptr->cached && ptr_stale(ca, ptr))
                                continue;
 
                        if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
index 0aef01418b5a0b99d54a04f050b0189ec598a851..afc8c208dd3ca1153617bcacb7c89fe8aae6b704 100644 (file)
@@ -1613,10 +1613,16 @@ ssize_t bch_direct_IO(struct kiocb *req, struct iov_iter *iter)
        struct file *file = req->ki_filp;
        struct inode *inode = file->f_inode;
        struct bch_fs *c = inode->i_sb->s_fs_info;
+       struct blk_plug plug;
+       ssize_t ret;
 
-       return ((iov_iter_rw(iter) == WRITE)
+       blk_start_plug(&plug);
+       ret = ((iov_iter_rw(iter) == WRITE)
                ? bch_direct_IO_write
                : bch_direct_IO_read)(c, req, file, inode, iter, req->ki_pos);
+       blk_finish_plug(&plug);
+
+       return ret;
 }
 
 static ssize_t
index dbe2671b79a9a5239a74ac5436dabed0d520918f..753c8a3d123be7436d10bd598149749dd8df427d 100644 (file)
@@ -354,8 +354,9 @@ static void bch_write_endio(struct bio *bio)
        struct bch_dev *ca = wbio->ca;
 
        if (bch_dev_nonfatal_io_err_on(bio->bi_error, ca,
-                                      "data write"))
+                                      "data write")) {
                set_closure_fn(cl, bch_write_io_error, index_update_wq(op));
+       }
 
        bch_account_io_completion_time(ca, wbio->submit_time_us,
                                       REQ_OP_WRITE);
@@ -973,8 +974,9 @@ static int bio_checksum_uncompress(struct bch_fs *c,
        return ret;
 }
 
-static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio)
+static void bch_rbio_free(struct bch_read_bio *rbio)
 {
+       struct bch_fs *c = rbio->c;
        struct bio *bio = &rbio->bio;
 
        BUG_ON(rbio->ca);
@@ -988,7 +990,7 @@ static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio)
        bio_put(bio);
 }
 
-static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
+static void bch_rbio_done(struct bch_read_bio *rbio)
 {
        struct bio *orig = &bch_rbio_parent(rbio)->bio;
 
@@ -1000,7 +1002,7 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
                        orig->bi_error = rbio->bio.bi_error;
 
                bio_endio(orig);
-               bch_rbio_free(c, rbio);
+               bch_rbio_free(rbio);
        } else {
                if (rbio->promote)
                        kfree(rbio->promote);
@@ -1010,30 +1012,16 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
        }
 }
 
-/*
- * Decide if we want to retry the read - returns true if read is being retried,
- * false if caller should pass error on up
- */
-static void bch_read_error_maybe_retry(struct bch_fs *c,
-                                      struct bch_read_bio *rbio,
-                                      int error)
+static void bch_rbio_error(struct bch_read_bio *rbio, int error)
 {
-       unsigned long flags;
-
-       if ((error == -EINTR) &&
-           (rbio->flags & BCH_READ_RETRY_IF_STALE)) {
-               atomic_long_inc(&c->cache_read_races);
-               goto retry;
-       }
+       bch_rbio_parent(rbio)->bio.bi_error = error;
+       bch_rbio_done(rbio);
+}
 
-       if (error == -EIO) {
-               /* io error - do we have another replica? */
-       }
+static void bch_rbio_retry(struct bch_fs *c, struct bch_read_bio *rbio)
+{
+       unsigned long flags;
 
-       bch_rbio_parent(rbio)->bio.bi_error = error;
-       bch_rbio_done(c, rbio);
-       return;
-retry:
        percpu_ref_put(&rbio->ca->io_ref);
        rbio->ca = NULL;
 
@@ -1053,13 +1041,26 @@ static void cache_promote_done(struct closure *cl)
 }
 
 /* Inner part that may run in process context */
-static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio)
+static void __bch_read_endio(struct work_struct *work)
 {
+       struct bch_read_bio *rbio =
+               container_of(work, struct bch_read_bio, work);
+       struct bch_fs *c = rbio->c;
        int ret;
 
        ret = bio_checksum_uncompress(c, rbio);
        if (ret) {
-               bch_read_error_maybe_retry(c, rbio, ret);
+               /*
+                * Checksum error: if the bio wasn't bounced, we may have been
+                * reading into buffers owned by userspace (that userspace can
+                * scribble over) - retry the read, bouncing it this time:
+                */
+               if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
+                       rbio->flags |= BCH_READ_FORCE_BOUNCE;
+                       bch_rbio_retry(c, rbio);
+               } else {
+                       bch_rbio_error(rbio, -EIO);
+               }
                return;
        }
 
@@ -1073,64 +1074,51 @@ static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio)
                swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt);
                rbio->promote = NULL;
 
-               bch_rbio_done(c, rbio);
+               bch_rbio_done(rbio);
 
                closure_init(cl, &c->cl);
                closure_call(&promote->write.op.cl, bch_write, c->wq, cl);
                closure_return_with_destructor(cl, cache_promote_done);
        } else {
-               bch_rbio_done(c, rbio);
+               bch_rbio_done(rbio);
        }
 }
 
-void bch_bio_decompress_work(struct work_struct *work)
-{
-       struct bio_decompress_worker *d =
-               container_of(work, struct bio_decompress_worker, work);
-       struct llist_node *list, *next;
-       struct bch_read_bio *rbio;
-
-       while ((list = llist_del_all(&d->bio_list)))
-               for (list = llist_reverse_order(list);
-                    list;
-                    list = next) {
-                       next = llist_next(list);
-                       rbio = container_of(list, struct bch_read_bio, list);
-
-                       __bch_read_endio(d->c, rbio);
-               }
-}
-
 static void bch_read_endio(struct bio *bio)
 {
        struct bch_read_bio *rbio =
                container_of(bio, struct bch_read_bio, bio);
-       struct bch_fs *c = rbio->ca->fs;
-       int stale = ((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
-               ptr_stale(rbio->ca, &rbio->ptr) ? -EINTR : 0;
-       int error = bio->bi_error ?: stale;
+       struct bch_fs *c = rbio->c;
 
-       bch_account_io_completion_time(rbio->ca, rbio->submit_time_us, REQ_OP_READ);
+       if (rbio->flags & BCH_READ_ACCOUNT_TIMES)
+               bch_account_io_completion_time(rbio->ca, rbio->submit_time_us,
+                                              REQ_OP_READ);
 
-       bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read");
-
-       if (error) {
-               bch_read_error_maybe_retry(c, rbio, error);
+       if (bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read")) {
+               /* XXX: retry IO errors when we have another replica */
+               bch_rbio_error(rbio, bio->bi_error);
                return;
        }
 
-       if (rbio->crc.compression_type != BCH_COMPRESSION_NONE ||
-           bch_csum_type_is_encryption(rbio->crc.csum_type)) {
-               struct bio_decompress_worker *d;
+       if (rbio->ptr.cached &&
+           (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
+            ptr_stale(rbio->ca, &rbio->ptr))) {
+               atomic_long_inc(&c->cache_read_races);
 
-               preempt_disable();
-               d = this_cpu_ptr(c->bio_decompress_worker);
-               llist_add(&rbio->list, &d->bio_list);
-               queue_work(system_highpri_wq, &d->work);
-               preempt_enable();
-       } else {
-               __bch_read_endio(c, rbio);
+               if (rbio->flags & BCH_READ_RETRY_IF_STALE)
+                       bch_rbio_retry(c, rbio);
+               else
+                       bch_rbio_error(rbio, -EINTR);
+               return;
        }
+
+       if (rbio->crc.compression_type ||
+           bch_csum_type_is_encryption(rbio->crc.csum_type))
+               queue_work(system_unbound_wq, &rbio->work);
+       else if (rbio->crc.csum_type)
+               queue_work(system_highpri_wq, &rbio->work);
+       else
+               __bch_read_endio(&rbio->work);
 }
 
 static bool should_promote(struct bch_fs *c,
@@ -1194,6 +1182,8 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
        if (pick->crc.compression_type != BCH_COMPRESSION_NONE ||
            (pick->crc.csum_type != BCH_CSUM_NONE &&
             (bvec_iter_sectors(iter) != crc_uncompressed_size(NULL, &pick->crc) ||
+             (bch_csum_type_is_encryption(pick->crc.csum_type) &&
+              (flags & BCH_READ_USER_MAPPED)) ||
              (flags & BCH_READ_FORCE_BOUNCE)))) {
                read_full = true;
                bounce = true;
@@ -1242,11 +1232,12 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
                rbio->orig_bi_end_io = orig->bio.bi_end_io;
        rbio->parent_iter       = iter;
 
-       rbio->inode             = k.k->p.inode;
        rbio->flags             = flags;
        rbio->bounce            = bounce;
        rbio->split             = split;
-       rbio->version           = k.k->version;
+       rbio->c                 = c;
+       rbio->ca                = pick->ca;
+       rbio->ptr               = pick->ptr;
        rbio->crc               = pick->crc;
        /*
         * crc.compressed_size will be 0 if there wasn't any checksum
@@ -1255,9 +1246,10 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
         * only for promoting)
         */
        rbio->crc._compressed_size = bio_sectors(&rbio->bio) - 1;
-       rbio->ptr               = pick->ptr;
-       rbio->ca                = pick->ca;
+       rbio->version           = k.k->version;
        rbio->promote           = promote_op;
+       rbio->inode             = k.k->p.inode;
+       INIT_WORK(&rbio->work, __bch_read_endio);
 
        rbio->bio.bi_bdev       = pick->ca->disk_sb.bdev;
        rbio->bio.bi_opf        = orig->bio.bi_opf;
@@ -1395,12 +1387,11 @@ void bch_read(struct bch_fs *c, struct bch_read_bio *bio, u64 inode)
        bch_increment_clock(c, bio_sectors(&bio->bio), READ);
 
        bch_read_iter(c, bio, bio->bio.bi_iter, inode,
-                     BCH_READ_FORCE_BOUNCE|
                      BCH_READ_RETRY_IF_STALE|
                      BCH_READ_PROMOTE|
-                     BCH_READ_MAY_REUSE_BIO);
+                     BCH_READ_MAY_REUSE_BIO|
+                     BCH_READ_USER_MAPPED);
 }
-EXPORT_SYMBOL(bch_read);
 
 /**
  * bch_read_retry - re-submit a bio originally from bch_read()
@@ -1409,19 +1400,17 @@ static void bch_read_retry(struct bch_fs *c, struct bch_read_bio *rbio)
 {
        struct bch_read_bio *parent = bch_rbio_parent(rbio);
        struct bvec_iter iter = rbio->parent_iter;
+       unsigned flags = rbio->flags;
        u64 inode = rbio->inode;
 
        trace_bcache_read_retry(&rbio->bio);
 
        if (rbio->split)
-               bch_rbio_free(c, rbio);
+               bch_rbio_free(rbio);
        else
                rbio->bio.bi_end_io = rbio->orig_bi_end_io;
 
-       bch_read_iter(c, parent, iter, inode,
-                     BCH_READ_FORCE_BOUNCE|
-                     BCH_READ_RETRY_IF_STALE|
-                     BCH_READ_PROMOTE);
+       bch_read_iter(c, parent, iter, inode, flags);
 }
 
 void bch_read_retry_work(struct work_struct *work)
index 302ed2e0944f3300710de878561d71533fd9f14f..9239ca4aac16beeec035deaa33d5ebb9569b5c23 100644 (file)
@@ -69,6 +69,8 @@ enum bch_read_flags {
        BCH_READ_PROMOTE                = 1 << 2,
        BCH_READ_IS_LAST                = 1 << 3,
        BCH_READ_MAY_REUSE_BIO          = 1 << 4,
+       BCH_READ_ACCOUNT_TIMES          = 1 << 5,
+       BCH_READ_USER_MAPPED            = 1 << 6,
 };
 
 void bch_read(struct bch_fs *, struct bch_read_bio *, u64);
@@ -85,6 +87,4 @@ int bch_discard(struct bch_fs *, struct bpos, struct bpos,
 void bch_read_retry_work(struct work_struct *);
 void bch_wake_delayed_writes(unsigned long data);
 
-void bch_bio_decompress_work(struct work_struct *);
-
 #endif /* _BCACHE_IO_H */
index 3d096876208f4a351b9d31ddeeacf64080bc5d08..ca1b0192fad02806cf541e4d95a86ef2a0f67985 100644 (file)
@@ -29,29 +29,29 @@ struct bch_read_bio {
         */
        struct bvec_iter        parent_iter;
 
-       /*
-        * If we have to retry the read (IO error, checksum failure, read stale
-        * data (raced with allocator), we retry the portion of the parent bio
-        * that failed (i.e. this bio's portion, parent_iter).
-        *
-        * But we need to stash the inode somewhere:
-        */
-       u64                     inode;
-
        unsigned                submit_time_us;
        u16                     flags;
        u8                      bounce:1,
                                split:1;
 
-       struct bversion         version;
-       struct bch_extent_crc128 crc;
-       struct bch_extent_ptr   ptr;
+       struct bch_fs           *c;
        struct bch_dev          *ca;
+       struct bch_extent_ptr   ptr;
+       struct bch_extent_crc128 crc;
+       struct bversion         version;
 
        struct cache_promote_op *promote;
 
-       /* bio_decompress_worker list */
-       struct llist_node       list;
+       /*
+        * If we have to retry the read (IO error, checksum failure, read stale
+        * data (raced with allocator), we retry the portion of the parent bio
+        * that failed (i.e. this bio's portion, parent_iter).
+        *
+        * But we need to stash the inode somewhere:
+        */
+       u64                     inode;
+
+       struct work_struct      work;
 
        struct bio              bio;
 };
@@ -63,7 +63,7 @@ bch_rbio_parent(struct bch_read_bio *rbio)
 }
 
 struct bch_write_bio {
-       struct bch_fs   *c;
+       struct bch_fs           *c;
        struct bch_dev          *ca;
        union {
                struct bio      *orig;
@@ -142,10 +142,4 @@ struct bch_write_op {
        u64                     inline_keys[BKEY_EXTENT_U64s_MAX * 2];
 };
 
-struct bio_decompress_worker {
-       struct bch_fs           *c;
-       struct work_struct              work;
-       struct llist_head               bio_list;
-};
-
 #endif /* _BCACHE_IO_TYPES_H */
index 8823c06cda4eeef2b0c62e7e90beac9f0a0ded03..2c1e367913f03009420825f02ef69b60a679bca8 100644 (file)
@@ -16,9 +16,6 @@ void bch_notify_fs_stopped(struct bch_fs *);
 void bch_notify_dev_read_write(struct bch_dev *);
 void bch_notify_dev_read_only(struct bch_dev *);
 void bch_notify_dev_added(struct bch_dev *);
-void bch_notify_dev_removing(struct bch_dev *);
-void bch_notify_dev_removed(struct bch_dev *);
-void bch_notify_dev_remove_failed(struct bch_dev *);
 void bch_notify_dev_error(struct bch_dev *, bool);
 
 #else
@@ -30,9 +27,6 @@ static inline void bch_notify_fs_stopped(struct bch_fs *c) {}
 static inline void bch_notify_dev_read_write(struct bch_dev *ca) {}
 static inline void bch_notify_dev_read_only(struct bch_dev *ca) {}
 static inline void bch_notify_dev_added(struct bch_dev *ca) {}
-static inline void bch_notify_dev_removing(struct bch_dev *ca) {}
-static inline void bch_notify_dev_removed(struct bch_dev *ca) {}
-static inline void bch_notify_dev_remove_failed(struct bch_dev *ca) {}
 static inline void bch_notify_dev_error(struct bch_dev *ca, bool b) {}
 
 #endif
index e41cfb4c8045f576b8af3699e280948271fd30da..b24770bce001115728e3a6eef2ece96ad54f2bf9 100644 (file)
@@ -500,7 +500,7 @@ retry:
                                s->read_dirty_data = true;
 
                        bch_read_extent(c, &s->rbio, k, &pick,
-                                       BCH_READ_FORCE_BOUNCE|
+                                       BCH_READ_ACCOUNT_TIMES|
                                        BCH_READ_RETRY_IF_STALE|
                                        (!s->bypass ? BCH_READ_PROMOTE : 0)|
                                        (is_last ? BCH_READ_IS_LAST : 0));
index 1e272af2b59e46cb3d3549c786d06c9dbc53c62a..f5f74936cb8b56d91d6e01bd47e25b52e4996e88 100644 (file)
@@ -1453,57 +1453,26 @@ int bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
        return ret;
 }
 
-#if 0
-int bch_dev_migrate_from(struct bch_fs *c, struct bch_dev *ca)
-{
-       /* First, go RO before we try to migrate data off: */
-       ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags);
-       if (ret)
-               return ret;
-
-       bch_notify_dev_removing(ca);
-
-       /* Migrate data, metadata off device: */
-
-       ret = bch_move_data_off_device(ca);
-       if (ret && !(flags & BCH_FORCE_IF_DATA_LOST)) {
-               bch_err(c, "Remove of %s failed, unable to migrate data off",
-                       name);
-               return ret;
-       }
-
-       if (ret)
-               ret = bch_flag_data_bad(ca);
-       if (ret) {
-               bch_err(c, "Remove of %s failed, unable to migrate data off",
-                       name);
-               return ret;
-       }
-
-       ret = bch_move_metadata_off_device(ca);
-       if (ret)
-               return ret;
-}
-#endif
-
 /* Device add/removal: */
 
-static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
+int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 {
        struct bch_sb_field_members *mi;
        unsigned dev_idx = ca->dev_idx;
-       int ret;
+       int ret = -EINVAL;
+
+       mutex_lock(&c->state_lock);
+
+       percpu_ref_put(&ca->ref); /* XXX */
 
        if (ca->mi.state == BCH_MEMBER_STATE_RW) {
                bch_err(ca, "Cannot remove RW device");
-               bch_notify_dev_remove_failed(ca);
-               return -EINVAL;
+               goto err;
        }
 
        if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
                bch_err(ca, "Cannot remove without losing data");
-               bch_notify_dev_remove_failed(ca);
-               return -EINVAL;
+               goto err;
        }
 
        /*
@@ -1514,20 +1483,18 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
        ret = bch_flag_data_bad(ca);
        if (ret) {
                bch_err(ca, "Remove failed");
-               return ret;
+               goto err;
        }
 
        if (ca->mi.has_data || ca->mi.has_metadata) {
-               bch_err(ca, "Can't remove, still has data");
-               return ret;
+               bch_err(ca, "Remove failed, still has data");
+               goto err;
        }
 
        /*
         * Ok, really doing the remove:
         * Drop device's prio pointer before removing it from superblock:
         */
-       bch_notify_dev_removed(ca);
-
        spin_lock(&c->journal.lock);
        c->journal.prio_buckets[dev_idx] = 0;
        spin_unlock(&c->journal.lock);
@@ -1549,19 +1516,10 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
        bch_write_super(c);
 
        mutex_unlock(&c->sb_lock);
-
+       mutex_unlock(&c->state_lock);
        return 0;
-}
-
-int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
-{
-       int ret;
-
-       mutex_lock(&c->state_lock);
-       percpu_ref_put(&ca->ref);
-       ret = __bch_dev_remove(c, ca, flags);
+err:
        mutex_unlock(&c->state_lock);
-
        return ret;
 }
 
@@ -1680,6 +1638,8 @@ err:
 int bch_dev_online(struct bch_fs *c, const char *path)
 {
        struct bcache_superblock sb = { 0 };
+       struct bch_dev *ca;
+       unsigned dev_idx;
        const char *err;
 
        mutex_lock(&c->state_lock);
@@ -1688,17 +1648,27 @@ int bch_dev_online(struct bch_fs *c, const char *path)
        if (err)
                goto err;
 
+       dev_idx = sb.sb->dev_idx;
+
        err = bch_dev_in_fs(c->disk_sb, sb.sb);
        if (err)
                goto err;
 
        mutex_lock(&c->sb_lock);
        if (__bch_dev_online(c, &sb)) {
+               err = "__bch_dev_online() error";
                mutex_unlock(&c->sb_lock);
                goto err;
        }
        mutex_unlock(&c->sb_lock);
 
+       ca = c->devs[dev_idx];
+       if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+               err = __bch_dev_read_write(c, ca);
+               if (err)
+                       goto err;
+       }
+
        mutex_unlock(&c->state_lock);
        return 0;
 err:
@@ -1725,7 +1695,7 @@ int bch_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
        return 0;
 }
 
-int bch_dev_migrate(struct bch_fs *c, struct bch_dev *ca)
+int bch_dev_evacuate(struct bch_fs *c, struct bch_dev *ca)
 {
        int ret;
 
index 79da390e601a1a2bd45c59c158ddb393469b87ce..66c3430840f1ef57ea8e4b5b9737a81184b8c896 100644 (file)
@@ -107,7 +107,7 @@ int bch_dev_remove(struct bch_fs *, struct bch_dev *, int);
 int bch_dev_add(struct bch_fs *, const char *);
 int bch_dev_online(struct bch_fs *, const char *);
 int bch_dev_offline(struct bch_fs *, struct bch_dev *, int);
-int bch_dev_migrate(struct bch_fs *, struct bch_dev *);
+int bch_dev_evacuate(struct bch_fs *, struct bch_dev *);
 
 void bch_fs_detach(struct bch_fs *);
 
index 2b171a13c8b6a43bfd9dadb4fa93645d54ffa8a7..88cbe30188aff088772fc7d5548665a5fe19d7cc 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _BCACHE_UTIL_H
 #define _BCACHE_UTIL_H
 
+#include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/errno.h>
 #include <linux/blkdev.h>
@@ -722,4 +723,33 @@ static inline void memmove_u64s(void *dst, const void *src,
                __memmove_u64s_up(dst, src, u64s);
 }
 
+static inline struct bio_vec next_contig_bvec(struct bio *bio,
+                                             struct bvec_iter *iter)
+{
+       struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+       bio_advance_iter(bio, iter, bv.bv_len);
+#ifndef CONFIG_HIGHMEM
+       while (iter->bi_size) {
+               struct bio_vec next = bio_iter_iovec(bio, *iter);
+
+               if (page_address(bv.bv_page) + bv.bv_offset + bv.bv_len !=
+                   page_address(next.bv_page) + next.bv_offset)
+                       break;
+
+               bv.bv_len += next.bv_len;
+               bio_advance_iter(bio, iter, next.bv_len);
+       }
+#endif
+       return bv;
+}
+
+#define __bio_for_each_contig_segment(bv, bio, iter, start)            \
+       for (iter = (start);                                            \
+            (iter).bi_size &&                                          \
+               ((bv = next_contig_bvec((bio), &(iter))), 1);)
+
+#define bio_for_each_contig_segment(bv, bio, iter)                     \
+       __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter)
+
 #endif /* _BCACHE_UTIL_H */