update bcache sources

author Kent Overstreet <kent.overstreet@gmail.com>

Thu, 16 Mar 2017 22:51:41 +0000 (14:51 -0800)

committer Kent Overstreet <kent.overstreet@gmail.com>

Thu, 16 Mar 2017 22:51:41 +0000 (14:51 -0800)
author Kent Overstreet <kent.overstreet@gmail.com>
Thu, 16 Mar 2017 22:51:41 +0000 (14:51 -0800)
committer Kent Overstreet <kent.overstreet@gmail.com>
Thu, 16 Mar 2017 22:51:41 +0000 (14:51 -0800)
diff --git a/.bcache_revision b/.bcache_revision

index 434bc959e01fedbf319fc956e3d1d5edcfa6c7d2..72b9b1754eb288c555439eddec1a88e9987d994b 100644 (file)
--- a/.bcache_revision
+++ b/.bcache_revision
@@ -1 +1 @@
-BCACHE_REVISION=3ea79179e3101fb50de8730a809d00d189f05be5
+BCACHE_REVISION=84b6390084721a37c0f7a261240093ad659f9a65
diff --git a/include/linux/bcache-ioctl.h b/include/linux/bcache-ioctl.h

index 2d07666c97ce24ba5a31aac3088d267cbd12959a..ca769369f72d4770aec5afa0d0378a8aecbd1adc 100644 (file)
--- a/include/linux/bcache-ioctl.h
+++ b/include/linux/bcache-ioctl.h
@@ -78,6 +78,14 @@ struct bch_ioctl_disk_set_state {
  #define BCH_REWRITE_RECOMPRESS         (1 << 0)
  #define BCH_REWRITE_DECREASE_REPLICAS  (1 << 1)
  
+enum bch_data_ops {
+       BCH_DATA_SCRUB,
+};
+
+struct bch_data_op {
+       __u8                    type;
+};
+
  struct bch_ioctl_data {
         __u32                   flags;
         __u32                   pad;
diff --git a/include/linux/bcache.h b/include/linux/bcache.h

index f4c2f275bf780247cb8f4dec9fa66535ee6da884..c221747b4a4b3730c497cda8e2aad84bd9632093 100644 (file)
--- a/include/linux/bcache.h
+++ b/include/linux/bcache.h
@@ -886,6 +886,10 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N,     struct bch_sb_field_crypt, kdf_flags,  0, 16);
  LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
  LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
  
+struct bch_sb_field_replication {
+       struct bch_sb_field     field;
+};
+
  /*
   * @offset     - sector where this sb was written
   * @version    - on disk format version
diff --git a/libbcache/bcache.h b/libbcache/bcache.h

index 80d789acb80eace2d2b6a35fdcdfd1e44ce7435d..1d0e998c53ed0af58674a70654bcba8a1e8f1987 100644 (file)
--- a/libbcache/bcache.h
+++ b/libbcache/bcache.h
@@ -716,8 +716,6 @@ struct bch_fs {
         void                    *zlib_workspace;
         struct mutex            zlib_workspace_lock;
         mempool_t               compression_bounce[2];
-       struct bio_decompress_worker __percpu
-                               *bio_decompress_worker;
  
         struct crypto_blkcipher *chacha20;
         struct crypto_shash     *poly1305;
diff --git a/libbcache/btree_gc.c b/libbcache/btree_gc.c

index 9fa4a2a49cf191f5ece2c87d0470f3eaa4395ff7..5270d442ef90afa31fa624aced38e958074f219a 100644 (file)
--- a/libbcache/btree_gc.c
+++ b/libbcache/btree_gc.c
@@ -933,14 +933,14 @@ int bch_initial_gc(struct bch_fs *c, struct list_head *journal)
  {
         enum btree_id id;
  
-       bch_mark_metadata(c);
-
         for (id = 0; id < BTREE_ID_NR; id++)
                 bch_initial_gc_btree(c, id);
  
         if (journal)
                 bch_journal_mark(c, journal);
  
+       bch_mark_metadata(c);
+
         /*
          * Skip past versions that might have possibly been used (as nonces),
          * but hadn't had their pointers written:
diff --git a/libbcache/buckets.c b/libbcache/buckets.c

index a28d493035882ebb05a900b9f7d64249518f909f..7be943d142cab98828d60c3da1ae2334b9cd308f 100644 (file)
--- a/libbcache/buckets.c
+++ b/libbcache/buckets.c
@@ -462,7 +462,7 @@ static void bch_mark_pointer(struct bch_fs *c,
                  * the allocator invalidating a bucket after we've already
                  * checked the gen
                  */
-               if (gen_after(old.gen, ptr->gen)) {
+               if (gen_after(new.gen, ptr->gen)) {
                         EBUG_ON(type != S_CACHED &&
                                 test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
                         return;
@@ -470,7 +470,7 @@ static void bch_mark_pointer(struct bch_fs *c,
  
                 EBUG_ON(type != S_CACHED &&
                         !may_make_unavailable &&
-                       is_available_bucket(old) &&
+                       is_available_bucket(new) &&
                         test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
  
                 if (type != S_CACHED &&
diff --git a/libbcache/chardev.c b/libbcache/chardev.c

index c764a9d0121fd49868b5b0fc3650a49c19c86cb3..da6d827f6dc29b258d6ea4ece56670f0814af890 100644 (file)
--- a/libbcache/chardev.c
+++ b/libbcache/chardev.c
@@ -201,7 +201,6 @@ static long bch_ioctl_disk_remove(struct bch_fs *c,
  {
         struct bch_ioctl_disk arg;
         struct bch_dev *ca;
-       int ret;
  
         if (copy_from_user(&arg, user_arg, sizeof(arg)))
                 return -EFAULT;
@@ -210,10 +209,7 @@ static long bch_ioctl_disk_remove(struct bch_fs *c,
         if (IS_ERR(ca))
                 return PTR_ERR(ca);
  
-       ret = bch_dev_remove(c, ca, arg.flags);
-
-       percpu_ref_put(&ca->ref);
-       return ret;
+       return bch_dev_remove(c, ca, arg.flags);
  }
  
  static long bch_ioctl_disk_online(struct bch_fs *c,
@@ -294,7 +290,7 @@ static long bch_ioctl_disk_evacuate(struct bch_fs *c,
         if (IS_ERR(ca))
                 return PTR_ERR(ca);
  
-       ret = bch_dev_migrate(c, ca);
+       ret = bch_dev_evacuate(c, ca);
  
         percpu_ref_put(&ca->ref);
         return ret;
@@ -384,12 +380,11 @@ void bch_chardev_exit(void)
  {
         if (!IS_ERR_OR_NULL(bch_chardev_class))
                 device_destroy(bch_chardev_class,
-                              MKDEV(bch_chardev_major, 0));
+                              MKDEV(bch_chardev_major, 255));
         if (!IS_ERR_OR_NULL(bch_chardev_class))
                 class_destroy(bch_chardev_class);
         if (bch_chardev_major > 0)
                 unregister_chrdev(bch_chardev_major, "bcache");
-
  }
  
  int __init bch_chardev_init(void)
diff --git a/libbcache/checksum.c b/libbcache/checksum.c

index b3fbeb114f2af02f063b632e93189900a880375f..b96050dbb381fbf96478910a0fe525b86d7aadfc 100644 (file)
--- a/libbcache/checksum.c
+++ b/libbcache/checksum.c
@@ -292,9 +292,8 @@ struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type,
         case BCH_CSUM_CRC64: {
                 u64 crc = bch_checksum_init(type);
  
-               bio_for_each_segment(bv, bio, iter) {
+               bio_for_each_contig_segment(bv, bio, iter) {
                         void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
-
                         crc = bch_checksum_update(type,
                                 crc, p, bv.bv_len);
                         kunmap_atomic(p);
@@ -312,7 +311,7 @@ struct bch_csum bch_checksum_bio(struct bch_fs *c, unsigned type,
  
                 gen_poly_key(c, desc, nonce);
  
-               bio_for_each_segment(bv, bio, iter) {
+               bio_for_each_contig_segment(bv, bio, iter) {
                         void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
  
                         crypto_shash_update(desc, p, bv.bv_len);
@@ -342,7 +341,7 @@ void bch_encrypt_bio(struct bch_fs *c, unsigned type,
  
         sg_init_table(sgl, ARRAY_SIZE(sgl));
  
-       bio_for_each_segment(bv, bio, iter) {
+       bio_for_each_contig_segment(bv, bio, iter) {
                 if (sg == sgl + ARRAY_SIZE(sgl)) {
                         sg_mark_end(sg - 1);
                         do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
diff --git a/libbcache/compress.c b/libbcache/compress.c

index d6a345cb0dbeef1ea894e16ab1ec61f1867486ce..d9a64c381a0dabbe81afbe41605bc15f45eccc68 100644 (file)
--- a/libbcache/compress.c
+++ b/libbcache/compress.c
@@ -8,6 +8,7 @@
  #include <linux/zlib.h>
  
  enum bounced {
+       BOUNCED_CONTIG,
         BOUNCED_MAPPED,
         BOUNCED_KMALLOCED,
         BOUNCED_VMALLOCED,
@@ -54,6 +55,14 @@ static void *__bio_map_or_bounce(struct bch_fs *c,
  
         BUG_ON(bvec_iter_sectors(start) > BCH_ENCODED_EXTENT_MAX);
  
+#ifndef CONFIG_HIGHMEM
+       *bounced = BOUNCED_CONTIG;
+
+       __bio_for_each_contig_segment(bv, bio, iter, start) {
+               if (bv.bv_len == start.bi_size)
+                       return page_address(bv.bv_page) + bv.bv_offset;
+       }
+#endif
         *bounced = BOUNCED_MAPPED;
  
         __bio_for_each_segment(bv, bio, iter, start) {
@@ -443,7 +452,6 @@ void bch_fs_compress_exit(struct bch_fs *c)
         mempool_exit(&c->lz4_workspace_pool);
         mempool_exit(&c->compression_bounce[WRITE]);
         mempool_exit(&c->compression_bounce[READ]);
-       free_percpu(c->bio_decompress_worker);
  }
  
  #define COMPRESSION_WORKSPACE_SIZE                                     \
@@ -453,22 +461,7 @@ void bch_fs_compress_exit(struct bch_fs *c)
  int bch_fs_compress_init(struct bch_fs *c)
  {
         unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9);
-       int ret, cpu;
-
-       if (!c->bio_decompress_worker) {
-               c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
-               if (!c->bio_decompress_worker)
-                       return -ENOMEM;
-
-               for_each_possible_cpu(cpu) {
-                       struct bio_decompress_worker *d =
-                               per_cpu_ptr(c->bio_decompress_worker, cpu);
-
-                       d->c = c;
-                       INIT_WORK(&d->work, bch_bio_decompress_work);
-                       init_llist_head(&d->bio_list);
-               }
-       }
+       int ret;
  
         if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
             !bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
diff --git a/libbcache/extents.c b/libbcache/extents.c

index 76b55f64813d8acafe73e4d9286454ed96dfe2a0..4b422fb1661bf3d4fe9c504624349858c165c9c7 100644 (file)
--- a/libbcache/extents.c
+++ b/libbcache/extents.c
@@ -322,9 +322,7 @@ static bool should_drop_ptr(const struct bch_fs *c,
                             struct bkey_s_c_extent e,
                             const struct bch_extent_ptr *ptr)
  {
-       struct bch_dev *ca = c->devs[ptr->dev];
-
-       return ptr_stale(ca, ptr);
+       return ptr->cached && ptr_stale(c->devs[ptr->dev], ptr);
  }
  
  static void bch_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
@@ -2153,7 +2151,7 @@ void bch_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k,
                 extent_for_each_ptr_crc(e, ptr, crc) {
                         struct bch_dev *ca = c->devs[ptr->dev];
  
-                       if (ptr_stale(ca, ptr))
+                       if (ptr->cached && ptr_stale(ca, ptr))
                                 continue;
  
                         if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
diff --git a/libbcache/fs-io.c b/libbcache/fs-io.c

index 0aef01418b5a0b99d54a04f050b0189ec598a851..afc8c208dd3ca1153617bcacb7c89fe8aae6b704 100644 (file)
--- a/libbcache/fs-io.c
+++ b/libbcache/fs-io.c
@@ -1613,10 +1613,16 @@ ssize_t bch_direct_IO(struct kiocb *req, struct iov_iter *iter)
         struct file *file = req->ki_filp;
         struct inode *inode = file->f_inode;
         struct bch_fs *c = inode->i_sb->s_fs_info;
+       struct blk_plug plug;
+       ssize_t ret;
  
-       return ((iov_iter_rw(iter) == WRITE)
+       blk_start_plug(&plug);
+       ret = ((iov_iter_rw(iter) == WRITE)
                 ? bch_direct_IO_write
                 : bch_direct_IO_read)(c, req, file, inode, iter, req->ki_pos);
+       blk_finish_plug(&plug);
+
+       return ret;
  }
  
  static ssize_t
diff --git a/libbcache/io.c b/libbcache/io.c

index dbe2671b79a9a5239a74ac5436dabed0d520918f..753c8a3d123be7436d10bd598149749dd8df427d 100644 (file)
--- a/libbcache/io.c
+++ b/libbcache/io.c
@@ -354,8 +354,9 @@ static void bch_write_endio(struct bio *bio)
         struct bch_dev *ca = wbio->ca;
  
         if (bch_dev_nonfatal_io_err_on(bio->bi_error, ca,
-                                      "data write"))
+                                      "data write")) {
                 set_closure_fn(cl, bch_write_io_error, index_update_wq(op));
+       }
  
         bch_account_io_completion_time(ca, wbio->submit_time_us,
                                        REQ_OP_WRITE);
@@ -973,8 +974,9 @@ static int bio_checksum_uncompress(struct bch_fs *c,
         return ret;
  }
  
-static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio)
+static void bch_rbio_free(struct bch_read_bio *rbio)
  {
+       struct bch_fs *c = rbio->c;
         struct bio *bio = &rbio->bio;
  
         BUG_ON(rbio->ca);
@@ -988,7 +990,7 @@ static void bch_rbio_free(struct bch_fs *c, struct bch_read_bio *rbio)
         bio_put(bio);
  }
  
-static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
+static void bch_rbio_done(struct bch_read_bio *rbio)
  {
         struct bio *orig = &bch_rbio_parent(rbio)->bio;
  
@@ -1000,7 +1002,7 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
                         orig->bi_error = rbio->bio.bi_error;
  
                 bio_endio(orig);
-               bch_rbio_free(c, rbio);
+               bch_rbio_free(rbio);
         } else {
                 if (rbio->promote)
                         kfree(rbio->promote);
@@ -1010,30 +1012,16 @@ static void bch_rbio_done(struct bch_fs *c, struct bch_read_bio *rbio)
         }
  }
  
-/*
- * Decide if we want to retry the read - returns true if read is being retried,
- * false if caller should pass error on up
- */
-static void bch_read_error_maybe_retry(struct bch_fs *c,
-                                      struct bch_read_bio *rbio,
-                                      int error)
+static void bch_rbio_error(struct bch_read_bio *rbio, int error)
  {
-       unsigned long flags;
-
-       if ((error == -EINTR) &&
-           (rbio->flags & BCH_READ_RETRY_IF_STALE)) {
-               atomic_long_inc(&c->cache_read_races);
-               goto retry;
-       }
+       bch_rbio_parent(rbio)->bio.bi_error = error;
+       bch_rbio_done(rbio);
+}
  
-       if (error == -EIO) {
-               /* io error - do we have another replica? */
-       }
+static void bch_rbio_retry(struct bch_fs *c, struct bch_read_bio *rbio)
+{
+       unsigned long flags;
  
-       bch_rbio_parent(rbio)->bio.bi_error = error;
-       bch_rbio_done(c, rbio);
-       return;
-retry:
         percpu_ref_put(&rbio->ca->io_ref);
         rbio->ca = NULL;
  
@@ -1053,13 +1041,26 @@ static void cache_promote_done(struct closure *cl)
  }
  
  /* Inner part that may run in process context */
-static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio)
+static void __bch_read_endio(struct work_struct *work)
  {
+       struct bch_read_bio *rbio =
+               container_of(work, struct bch_read_bio, work);
+       struct bch_fs *c = rbio->c;
         int ret;
  
         ret = bio_checksum_uncompress(c, rbio);
         if (ret) {
-               bch_read_error_maybe_retry(c, rbio, ret);
+               /*
+                * Checksum error: if the bio wasn't bounced, we may have been
+                * reading into buffers owned by userspace (that userspace can
+                * scribble over) - retry the read, bouncing it this time:
+                */
+               if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) {
+                       rbio->flags |= BCH_READ_FORCE_BOUNCE;
+                       bch_rbio_retry(c, rbio);
+               } else {
+                       bch_rbio_error(rbio, -EIO);
+               }
                 return;
         }
  
@@ -1073,64 +1074,51 @@ static void __bch_read_endio(struct bch_fs *c, struct bch_read_bio *rbio)
                 swap(promote->write.wbio.bio.bi_vcnt, rbio->bio.bi_vcnt);
                 rbio->promote = NULL;
  
-               bch_rbio_done(c, rbio);
+               bch_rbio_done(rbio);
  
                 closure_init(cl, &c->cl);
                 closure_call(&promote->write.op.cl, bch_write, c->wq, cl);
                 closure_return_with_destructor(cl, cache_promote_done);
         } else {
-               bch_rbio_done(c, rbio);
+               bch_rbio_done(rbio);
         }
  }
  
-void bch_bio_decompress_work(struct work_struct *work)
-{
-       struct bio_decompress_worker *d =
-               container_of(work, struct bio_decompress_worker, work);
-       struct llist_node *list, *next;
-       struct bch_read_bio *rbio;
-
-       while ((list = llist_del_all(&d->bio_list)))
-               for (list = llist_reverse_order(list);
-                    list;
-                    list = next) {
-                       next = llist_next(list);
-                       rbio = container_of(list, struct bch_read_bio, list);
-
-                       __bch_read_endio(d->c, rbio);
-               }
-}
-
  static void bch_read_endio(struct bio *bio)
  {
         struct bch_read_bio *rbio =
                 container_of(bio, struct bch_read_bio, bio);
-       struct bch_fs *c = rbio->ca->fs;
-       int stale = ((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
-               ptr_stale(rbio->ca, &rbio->ptr) ? -EINTR : 0;
-       int error = bio->bi_error ?: stale;
+       struct bch_fs *c = rbio->c;
  
-       bch_account_io_completion_time(rbio->ca, rbio->submit_time_us, REQ_OP_READ);
+       if (rbio->flags & BCH_READ_ACCOUNT_TIMES)
+               bch_account_io_completion_time(rbio->ca, rbio->submit_time_us,
+                                              REQ_OP_READ);
  
-       bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read");
-
-       if (error) {
-               bch_read_error_maybe_retry(c, rbio, error);
+       if (bch_dev_nonfatal_io_err_on(bio->bi_error, rbio->ca, "data read")) {
+               /* XXX: retry IO errors when we have another replica */
+               bch_rbio_error(rbio, bio->bi_error);
                 return;
         }
  
-       if (rbio->crc.compression_type != BCH_COMPRESSION_NONE ||
-           bch_csum_type_is_encryption(rbio->crc.csum_type)) {
-               struct bio_decompress_worker *d;
+       if (rbio->ptr.cached &&
+           (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) ||
+            ptr_stale(rbio->ca, &rbio->ptr))) {
+               atomic_long_inc(&c->cache_read_races);
  
-               preempt_disable();
-               d = this_cpu_ptr(c->bio_decompress_worker);
-               llist_add(&rbio->list, &d->bio_list);
-               queue_work(system_highpri_wq, &d->work);
-               preempt_enable();
-       } else {
-               __bch_read_endio(c, rbio);
+               if (rbio->flags & BCH_READ_RETRY_IF_STALE)
+                       bch_rbio_retry(c, rbio);
+               else
+                       bch_rbio_error(rbio, -EINTR);
+               return;
         }
+
+       if (rbio->crc.compression_type ||
+           bch_csum_type_is_encryption(rbio->crc.csum_type))
+               queue_work(system_unbound_wq, &rbio->work);
+       else if (rbio->crc.csum_type)
+               queue_work(system_highpri_wq, &rbio->work);
+       else
+               __bch_read_endio(&rbio->work);
  }
  
  static bool should_promote(struct bch_fs *c,
@@ -1194,6 +1182,8 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
         if (pick->crc.compression_type != BCH_COMPRESSION_NONE ||
             (pick->crc.csum_type != BCH_CSUM_NONE &&
              (bvec_iter_sectors(iter) != crc_uncompressed_size(NULL, &pick->crc) ||
+             (bch_csum_type_is_encryption(pick->crc.csum_type) &&
+              (flags & BCH_READ_USER_MAPPED)) ||
               (flags & BCH_READ_FORCE_BOUNCE)))) {
                 read_full = true;
                 bounce = true;
@@ -1242,11 +1232,12 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
                 rbio->orig_bi_end_io = orig->bio.bi_end_io;
         rbio->parent_iter       = iter;
  
-       rbio->inode             = k.k->p.inode;
         rbio->flags             = flags;
         rbio->bounce            = bounce;
         rbio->split             = split;
-       rbio->version           = k.k->version;
+       rbio->c                 = c;
+       rbio->ca                = pick->ca;
+       rbio->ptr               = pick->ptr;
         rbio->crc               = pick->crc;
         /*
          * crc.compressed_size will be 0 if there wasn't any checksum
@@ -1255,9 +1246,10 @@ void bch_read_extent_iter(struct bch_fs *c, struct bch_read_bio *orig,
          * only for promoting)
          */
         rbio->crc._compressed_size = bio_sectors(&rbio->bio) - 1;
-       rbio->ptr               = pick->ptr;
-       rbio->ca                = pick->ca;
+       rbio->version           = k.k->version;
         rbio->promote           = promote_op;
+       rbio->inode             = k.k->p.inode;
+       INIT_WORK(&rbio->work, __bch_read_endio);
  
         rbio->bio.bi_bdev       = pick->ca->disk_sb.bdev;
         rbio->bio.bi_opf        = orig->bio.bi_opf;
@@ -1395,12 +1387,11 @@ void bch_read(struct bch_fs *c, struct bch_read_bio *bio, u64 inode)
         bch_increment_clock(c, bio_sectors(&bio->bio), READ);
  
         bch_read_iter(c, bio, bio->bio.bi_iter, inode,
-                     BCH_READ_FORCE_BOUNCE|
                       BCH_READ_RETRY_IF_STALE|
                       BCH_READ_PROMOTE|
-                     BCH_READ_MAY_REUSE_BIO);
+                     BCH_READ_MAY_REUSE_BIO|
+                     BCH_READ_USER_MAPPED);
  }
-EXPORT_SYMBOL(bch_read);
  
  /**
   * bch_read_retry - re-submit a bio originally from bch_read()
@@ -1409,19 +1400,17 @@ static void bch_read_retry(struct bch_fs *c, struct bch_read_bio *rbio)
  {
         struct bch_read_bio *parent = bch_rbio_parent(rbio);
         struct bvec_iter iter = rbio->parent_iter;
+       unsigned flags = rbio->flags;
         u64 inode = rbio->inode;
  
         trace_bcache_read_retry(&rbio->bio);
  
         if (rbio->split)
-               bch_rbio_free(c, rbio);
+               bch_rbio_free(rbio);
         else
                 rbio->bio.bi_end_io = rbio->orig_bi_end_io;
  
-       bch_read_iter(c, parent, iter, inode,
-                     BCH_READ_FORCE_BOUNCE|
-                     BCH_READ_RETRY_IF_STALE|
-                     BCH_READ_PROMOTE);
+       bch_read_iter(c, parent, iter, inode, flags);
  }
  
  void bch_read_retry_work(struct work_struct *work)
diff --git a/libbcache/io.h b/libbcache/io.h

index 302ed2e0944f3300710de878561d71533fd9f14f..9239ca4aac16beeec035deaa33d5ebb9569b5c23 100644 (file)
--- a/libbcache/io.h
+++ b/libbcache/io.h
@@ -69,6 +69,8 @@ enum bch_read_flags {
         BCH_READ_PROMOTE                = 1 << 2,
         BCH_READ_IS_LAST                = 1 << 3,
         BCH_READ_MAY_REUSE_BIO          = 1 << 4,
+       BCH_READ_ACCOUNT_TIMES          = 1 << 5,
+       BCH_READ_USER_MAPPED            = 1 << 6,
  };
  
  void bch_read(struct bch_fs *, struct bch_read_bio *, u64);
@@ -85,6 +87,4 @@ int bch_discard(struct bch_fs *, struct bpos, struct bpos,
  void bch_read_retry_work(struct work_struct *);
  void bch_wake_delayed_writes(unsigned long data);
  
-void bch_bio_decompress_work(struct work_struct *);
-
  #endif /* _BCACHE_IO_H */
diff --git a/libbcache/io_types.h b/libbcache/io_types.h

index 3d096876208f4a351b9d31ddeeacf64080bc5d08..ca1b0192fad02806cf541e4d95a86ef2a0f67985 100644 (file)
--- a/libbcache/io_types.h
+++ b/libbcache/io_types.h
@@ -29,29 +29,29 @@ struct bch_read_bio {
          */
         struct bvec_iter        parent_iter;
  
-       /*
-        * If we have to retry the read (IO error, checksum failure, read stale
-        * data (raced with allocator), we retry the portion of the parent bio
-        * that failed (i.e. this bio's portion, parent_iter).
-        *
-        * But we need to stash the inode somewhere:
-        */
-       u64                     inode;
-
         unsigned                submit_time_us;
         u16                     flags;
         u8                      bounce:1,
                                 split:1;
  
-       struct bversion         version;
-       struct bch_extent_crc128 crc;
-       struct bch_extent_ptr   ptr;
+       struct bch_fs           *c;
         struct bch_dev          *ca;
+       struct bch_extent_ptr   ptr;
+       struct bch_extent_crc128 crc;
+       struct bversion         version;
  
         struct cache_promote_op *promote;
  
-       /* bio_decompress_worker list */
-       struct llist_node       list;
+       /*
+        * If we have to retry the read (IO error, checksum failure, read stale
+        * data (raced with allocator), we retry the portion of the parent bio
+        * that failed (i.e. this bio's portion, parent_iter).
+        *
+        * But we need to stash the inode somewhere:
+        */
+       u64                     inode;
+
+       struct work_struct      work;
  
         struct bio              bio;
  };
@@ -63,7 +63,7 @@ bch_rbio_parent(struct bch_read_bio *rbio)
  }
  
  struct bch_write_bio {
-       struct bch_fs   *c;
+       struct bch_fs           *c;
         struct bch_dev          *ca;
         union {
                 struct bio      *orig;
@@ -142,10 +142,4 @@ struct bch_write_op {
         u64                     inline_keys[BKEY_EXTENT_U64s_MAX * 2];
  };
  
-struct bio_decompress_worker {
-       struct bch_fs           *c;
-       struct work_struct              work;
-       struct llist_head               bio_list;
-};
-
  #endif /* _BCACHE_IO_TYPES_H */
diff --git a/libbcache/notify.h b/libbcache/notify.h

index 8823c06cda4eeef2b0c62e7e90beac9f0a0ded03..2c1e367913f03009420825f02ef69b60a679bca8 100644 (file)
--- a/libbcache/notify.h
+++ b/libbcache/notify.h
@@ -16,9 +16,6 @@ void bch_notify_fs_stopped(struct bch_fs *);
  void bch_notify_dev_read_write(struct bch_dev *);
  void bch_notify_dev_read_only(struct bch_dev *);
  void bch_notify_dev_added(struct bch_dev *);
-void bch_notify_dev_removing(struct bch_dev *);
-void bch_notify_dev_removed(struct bch_dev *);
-void bch_notify_dev_remove_failed(struct bch_dev *);
  void bch_notify_dev_error(struct bch_dev *, bool);
  
  #else
@@ -30,9 +27,6 @@ static inline void bch_notify_fs_stopped(struct bch_fs *c) {}
  static inline void bch_notify_dev_read_write(struct bch_dev *ca) {}
  static inline void bch_notify_dev_read_only(struct bch_dev *ca) {}
  static inline void bch_notify_dev_added(struct bch_dev *ca) {}
-static inline void bch_notify_dev_removing(struct bch_dev *ca) {}
-static inline void bch_notify_dev_removed(struct bch_dev *ca) {}
-static inline void bch_notify_dev_remove_failed(struct bch_dev *ca) {}
  static inline void bch_notify_dev_error(struct bch_dev *ca, bool b) {}
  
  #endif
diff --git a/libbcache/request.c b/libbcache/request.c

index e41cfb4c8045f576b8af3699e280948271fd30da..b24770bce001115728e3a6eef2ece96ad54f2bf9 100644 (file)
--- a/libbcache/request.c
+++ b/libbcache/request.c
@@ -500,7 +500,7 @@ retry:
                                 s->read_dirty_data = true;
  
                         bch_read_extent(c, &s->rbio, k, &pick,
-                                       BCH_READ_FORCE_BOUNCE|
+                                       BCH_READ_ACCOUNT_TIMES|
                                         BCH_READ_RETRY_IF_STALE|
                                         (!s->bypass ? BCH_READ_PROMOTE : 0)|
                                         (is_last ? BCH_READ_IS_LAST : 0));
diff --git a/libbcache/super.c b/libbcache/super.c

index 1e272af2b59e46cb3d3549c786d06c9dbc53c62a..f5f74936cb8b56d91d6e01bd47e25b52e4996e88 100644 (file)
--- a/libbcache/super.c
+++ b/libbcache/super.c
@@ -1453,57 +1453,26 @@ int bch_dev_set_state(struct bch_fs *c, struct bch_dev *ca,
         return ret;
  }
  
-#if 0
-int bch_dev_migrate_from(struct bch_fs *c, struct bch_dev *ca)
-{
-       /* First, go RO before we try to migrate data off: */
-       ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags);
-       if (ret)
-               return ret;
-
-       bch_notify_dev_removing(ca);
-
-       /* Migrate data, metadata off device: */
-
-       ret = bch_move_data_off_device(ca);
-       if (ret && !(flags & BCH_FORCE_IF_DATA_LOST)) {
-               bch_err(c, "Remove of %s failed, unable to migrate data off",
-                       name);
-               return ret;
-       }
-
-       if (ret)
-               ret = bch_flag_data_bad(ca);
-       if (ret) {
-               bch_err(c, "Remove of %s failed, unable to migrate data off",
-                       name);
-               return ret;
-       }
-
-       ret = bch_move_metadata_off_device(ca);
-       if (ret)
-               return ret;
-}
-#endif
-
  /* Device add/removal: */
  
-static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
+int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
  {
         struct bch_sb_field_members *mi;
         unsigned dev_idx = ca->dev_idx;
-       int ret;
+       int ret = -EINVAL;
+
+       mutex_lock(&c->state_lock);
+
+       percpu_ref_put(&ca->ref); /* XXX */
  
         if (ca->mi.state == BCH_MEMBER_STATE_RW) {
                 bch_err(ca, "Cannot remove RW device");
-               bch_notify_dev_remove_failed(ca);
-               return -EINVAL;
+               goto err;
         }
  
         if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
                 bch_err(ca, "Cannot remove without losing data");
-               bch_notify_dev_remove_failed(ca);
-               return -EINVAL;
+               goto err;
         }
  
         /*
@@ -1514,20 +1483,18 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
         ret = bch_flag_data_bad(ca);
         if (ret) {
                 bch_err(ca, "Remove failed");
-               return ret;
+               goto err;
         }
  
         if (ca->mi.has_data || ca->mi.has_metadata) {
-               bch_err(ca, "Can't remove, still has data");
-               return ret;
+               bch_err(ca, "Remove failed, still has data");
+               goto err;
         }
  
         /*
          * Ok, really doing the remove:
          * Drop device's prio pointer before removing it from superblock:
          */
-       bch_notify_dev_removed(ca);
-
         spin_lock(&c->journal.lock);
         c->journal.prio_buckets[dev_idx] = 0;
         spin_unlock(&c->journal.lock);
@@ -1549,19 +1516,10 @@ static int __bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
         bch_write_super(c);
  
         mutex_unlock(&c->sb_lock);
-
+       mutex_unlock(&c->state_lock);
         return 0;
-}
-
-int bch_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
-{
-       int ret;
-
-       mutex_lock(&c->state_lock);
-       percpu_ref_put(&ca->ref);
-       ret = __bch_dev_remove(c, ca, flags);
+err:
         mutex_unlock(&c->state_lock);
-
         return ret;
  }
  
@@ -1680,6 +1638,8 @@ err:
  int bch_dev_online(struct bch_fs *c, const char *path)
  {
         struct bcache_superblock sb = { 0 };
+       struct bch_dev *ca;
+       unsigned dev_idx;
         const char *err;
  
         mutex_lock(&c->state_lock);
@@ -1688,17 +1648,27 @@ int bch_dev_online(struct bch_fs *c, const char *path)
         if (err)
                 goto err;
  
+       dev_idx = sb.sb->dev_idx;
+
         err = bch_dev_in_fs(c->disk_sb, sb.sb);
         if (err)
                 goto err;
  
         mutex_lock(&c->sb_lock);
         if (__bch_dev_online(c, &sb)) {
+               err = "__bch_dev_online() error";
                 mutex_unlock(&c->sb_lock);
                 goto err;
         }
         mutex_unlock(&c->sb_lock);
  
+       ca = c->devs[dev_idx];
+       if (ca->mi.state == BCH_MEMBER_STATE_RW) {
+               err = __bch_dev_read_write(c, ca);
+               if (err)
+                       goto err;
+       }
+
         mutex_unlock(&c->state_lock);
         return 0;
  err:
@@ -1725,7 +1695,7 @@ int bch_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags)
         return 0;
  }
  
-int bch_dev_migrate(struct bch_fs *c, struct bch_dev *ca)
+int bch_dev_evacuate(struct bch_fs *c, struct bch_dev *ca)
  {
         int ret;
  
diff --git a/libbcache/super.h b/libbcache/super.h

index 79da390e601a1a2bd45c59c158ddb393469b87ce..66c3430840f1ef57ea8e4b5b9737a81184b8c896 100644 (file)
--- a/libbcache/super.h
+++ b/libbcache/super.h
@@ -107,7 +107,7 @@ int bch_dev_remove(struct bch_fs *, struct bch_dev *, int);
  int bch_dev_add(struct bch_fs *, const char *);
  int bch_dev_online(struct bch_fs *, const char *);
  int bch_dev_offline(struct bch_fs *, struct bch_dev *, int);
-int bch_dev_migrate(struct bch_fs *, struct bch_dev *);
+int bch_dev_evacuate(struct bch_fs *, struct bch_dev *);
  
  void bch_fs_detach(struct bch_fs *);
  
diff --git a/libbcache/util.h b/libbcache/util.h

index 2b171a13c8b6a43bfd9dadb4fa93645d54ffa8a7..88cbe30188aff088772fc7d5548665a5fe19d7cc 100644 (file)
--- a/libbcache/util.h
+++ b/libbcache/util.h
@@ -1,6 +1,7 @@
  #ifndef _BCACHE_UTIL_H
  #define _BCACHE_UTIL_H
  
+#include <linux/bio.h>
  #include <linux/blkdev.h>
  #include <linux/errno.h>
  #include <linux/blkdev.h>
@@ -722,4 +723,33 @@ static inline void memmove_u64s(void *dst, const void *src,
                 __memmove_u64s_up(dst, src, u64s);
  }
  
+static inline struct bio_vec next_contig_bvec(struct bio *bio,
+                                             struct bvec_iter *iter)
+{
+       struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+       bio_advance_iter(bio, iter, bv.bv_len);
+#ifndef CONFIG_HIGHMEM
+       while (iter->bi_size) {
+               struct bio_vec next = bio_iter_iovec(bio, *iter);
+
+               if (page_address(bv.bv_page) + bv.bv_offset + bv.bv_len !=
+                   page_address(next.bv_page) + next.bv_offset)
+                       break;
+
+               bv.bv_len += next.bv_len;
+               bio_advance_iter(bio, iter, next.bv_len);
+       }
+#endif
+       return bv;
+}
+
+#define __bio_for_each_contig_segment(bv, bio, iter, start)            \
+       for (iter = (start);                                            \
+            (iter).bi_size &&                                          \
+               ((bv = next_contig_bvec((bio), &(iter))), 1);)
+
+#define bio_for_each_contig_segment(bv, bio, iter)                     \
+       __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter)
+
  #endif /* _BCACHE_UTIL_H */
author	Kent Overstreet <kent.overstreet@gmail.com>
	Thu, 16 Mar 2017 22:51:41 +0000 (14:51 -0800)
committer	Kent Overstreet <kent.overstreet@gmail.com>
	Thu, 16 Mar 2017 22:51:41 +0000 (14:51 -0800)
.bcache_revision		patch \| blob \| history
include/linux/bcache-ioctl.h		patch \| blob \| history
include/linux/bcache.h		patch \| blob \| history
libbcache/bcache.h		patch \| blob \| history
libbcache/btree_gc.c		patch \| blob \| history
libbcache/buckets.c		patch \| blob \| history
libbcache/chardev.c		patch \| blob \| history
libbcache/checksum.c		patch \| blob \| history
libbcache/compress.c		patch \| blob \| history
libbcache/extents.c		patch \| blob \| history
libbcache/fs-io.c		patch \| blob \| history
libbcache/io.c		patch \| blob \| history
libbcache/io.h		patch \| blob \| history
libbcache/io_types.h		patch \| blob \| history
libbcache/notify.h		patch \| blob \| history
libbcache/request.c		patch \| blob \| history
libbcache/super.c		patch \| blob \| history
libbcache/super.h		patch \| blob \| history
libbcache/util.h		patch \| blob \| history