cmd_device_fail

author Kent Overstreet <kent.overstreet@gmail.com>

Thu, 9 Mar 2017 17:27:30 +0000 (08:27 -0900)

committer Kent Overstreet <kent.overstreet@gmail.com>

Thu, 9 Mar 2017 18:14:11 +0000 (09:14 -0900)
author Kent Overstreet <kent.overstreet@gmail.com>
Thu, 9 Mar 2017 17:27:30 +0000 (08:27 -0900)
committer Kent Overstreet <kent.overstreet@gmail.com>
Thu, 9 Mar 2017 18:14:11 +0000 (09:14 -0900)
diff --git a/.bcache_revision b/.bcache_revision

index b86381a15eea3bdf84f1a2d809d94419961d9544..e152ff6e67d9e7b51f549c05e0df07be2dc43d92 100644 (file)
--- a/.bcache_revision
+++ b/.bcache_revision
@@ -1 +1 @@
-BCACHE_REVISION=c1f1a9e1d9b9664db9c9c03cbac455c2750335bc
+BCACHE_REVISION=206668e86912eea889b3f2aaeaac7433da6f9245
diff --git a/bcache.c b/bcache.c

index a0fa860fe9cc1f62669add1e30a5d12cb72a7399..b3c8e468502cbb441516dcc195b7937465371d63 100644 (file)
--- a/bcache.c
+++ b/bcache.c
@@ -43,6 +43,7 @@ static void usage(void)
              "Commands for managing a specific device in a filesystem:\n"
              "  device_show    Show information about a formatted device\n"
              "  device_add     Add a device to an existing (running) filesystem\n"
+            "  device_fail    Mark a device as failed\n"
              "  device_remove  Remove a device from an existing (running) filesystem\n"
              "\n"
              "Repair:\n"
@@ -95,6 +96,8 @@ int main(int argc, char *argv[])
                 return cmd_device_show(argc, argv);
         if (!strcmp(cmd, "device_add"))
                 return cmd_device_add(argc, argv);
+       if (!strcmp(cmd, "device_fail"))
+               return cmd_device_fail(argc, argv);
         if (!strcmp(cmd, "device_remove"))
                 return cmd_device_remove(argc, argv);
  
diff --git a/cmd_device.c b/cmd_device.c

index 505fedc4b7f4e92ce8e68cb438d7f6538a237043..dfb6ef80c8a27123a9e6789314d1ccd34c0957dc 100644 (file)
--- a/cmd_device.c
+++ b/cmd_device.c
@@ -15,6 +15,7 @@
  #include "cmds.h"
  #include "libbcache.h"
  #include "linux/bcache-ioctl.h"
+#include "tools-util.h"
  
  /* This code belongs under show_fs */
  #if 0
@@ -188,14 +189,72 @@ int cmd_device_add(int argc, char *argv[])
                         .dev = (__u64) argv[i],
                 };
  
-               if (ioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &ia))
-                       die("BCH_IOCTL_DISK_ADD error: %s", strerror(errno));
+               xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &ia);
         }
  
         return 0;
  }
  
-static void usage(void)
+static void device_fail_usage(void)
+{
+       puts("bcache device_fail - mark a device as failed\n"
+            "Usage: bcache device_fail filesystem [devices]\n"
+            "\n"
+            "Options:\n"
+            "  -f, --force                 Force removal, even if some data\n"
+            "                              couldn't be migrated\n"
+            "      --force-metadata        Force removal, even if some metadata\n"
+            "                              couldn't be migrated\n"
+            "  -h, --help                  display this help and exit\n"
+            "Report bugs to <linux-bcache@vger.kernel.org>");
+       exit(EXIT_SUCCESS);
+}
+
+int cmd_device_fail(int argc, char *argv[])
+{
+       static const struct option longopts[] = {
+               { "force-degraded",             0, NULL, 'f' },
+               //{ "force-data-lost",          0, NULL, 'F' },
+               //{ "force-metadata-lost",      0, NULL, 'F' },
+               { "help",                       0, NULL, 'h' },
+               { NULL }
+       };
+       int opt, force_degraded = 0, force_data = 0, force_metadata = 0;
+
+       while ((opt = getopt_long(argc, argv, "fh", longopts, NULL)) != -1)
+               switch (opt) {
+               case 'f':
+                       force_degraded = 1;
+                       break;
+               case 'h':
+                       device_fail_usage();
+               }
+
+       if (argc - optind < 2)
+               die("Please supply a filesystem and at least one device to fail");
+
+       struct bcache_handle fs = bcache_fs_open(argv[optind]);
+
+       for (unsigned i = optind + 1; i < argc; i++) {
+               struct bch_ioctl_disk_set_state ir = {
+                       .dev            = (__u64) argv[i],
+                       .new_state      = BCH_MEMBER_STATE_FAILED,
+               };
+
+               if (force_degraded)
+                       ir.flags |= BCH_FORCE_IF_DEGRADED;
+               if (force_data)
+                       ir.flags |= BCH_FORCE_IF_DATA_LOST;
+               if (force_metadata)
+                       ir.flags |= BCH_FORCE_IF_METADATA_LOST;
+
+               xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_SET_STATE, &ir);
+       }
+
+       return 0;
+}
+
+static void device_remove_usage(void)
  {
         puts("bcache device_remove - remove one or more devices from a filesystem\n"
              "Usage: bcache device_remove filesystem [devices]\n"
@@ -229,26 +288,25 @@ int cmd_device_remove(int argc, char *argv[])
                         force_metadata = 1;
                         break;
                 case 'h':
-                       usage();
+                       device_remove_usage();
                 }
  
-       if (argc < 3)
-               die("Please supply a filesystem and at least one device to add");
+       if (argc - optind < 2)
+               die("Please supply a filesystem and at least one device to remove");
  
-       struct bcache_handle fs = bcache_fs_open(argv[1]);
+       struct bcache_handle fs = bcache_fs_open(argv[optind]);
  
-       for (unsigned i = 2; i < argc; i++) {
+       for (unsigned i = optind + 1; i < argc; i++) {
                 struct bch_ioctl_disk_remove ir = {
                         .dev = (__u64) argv[i],
                 };
  
                 if (force_data)
-                       ir.flags |= BCH_FORCE_IF_DATA_MISSING;
+                       ir.flags |= BCH_FORCE_IF_DATA_LOST;
                 if (force_metadata)
-                       ir.flags |= BCH_FORCE_IF_METADATA_MISSING;
+                       ir.flags |= BCH_FORCE_IF_METADATA_LOST;
  
-               if (ioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &ir))
-                       die("BCH_IOCTL_DISK_REMOVE error: %s\n", strerror(errno));
+               xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &ir);
         }
  
         return 0;
diff --git a/cmd_format.c b/cmd_format.c

index f222a8b795974a84c140c552ddd8b72f684e4205..73342596e3256b242610c06d5f16b44e7f34153f 100644 (file)
--- a/cmd_format.c
+++ b/cmd_format.c
@@ -77,6 +77,8 @@ x(0,  btree_node_size,        "size",                 "Default 256k")         \
  x(0,   metadata_checksum_type, "(none|crc32c|crc64)",  NULL)                   \
  x(0,   data_checksum_type,     "(none|crc32c|crc64)",  NULL)                   \
  x(0,   compression_type,       "(none|lz4|gzip)",      NULL)                   \
+x(0,   data_replicas,          "#",                    NULL)                   \
+x(0,   metadata_replicas,      "#",                    NULL)                   \
  x(0,   encrypted,              NULL,                   "Enable whole filesystem encryption (chacha20/poly1305)")\
  x(0,   no_passphrase,          NULL,                   "Don't encrypt master encryption key")\
  x('e', error_action,           "(continue|readonly|panic)", NULL)              \
@@ -112,6 +114,8 @@ static void usage(void)
              "      --metadata_checksum_type=(none|crc32c|crc64)\n"
              "      --data_checksum_type=(none|crc32c|crc64)\n"
              "      --compression_type=(none|lz4|gzip)\n"
+            "      --data_replicas=#       Number of data replicas\n"
+            "      --metadata_replicas=#   Number of metadata replicas\n"
              "      --encrypted             Enable whole filesystem encryption (chacha20/poly1305)\n"
              "      --no_passphrase         Don't encrypt master encryption key\n"
              "      --error_action=(continue|readonly|panic)\n"
@@ -136,9 +140,9 @@ static void usage(void)
  }
  
  enum {
-       Opt_no_opt = 1,
+       O_no_opt = 1,
  #define t(text)
-#define x(shortopt, longopt, arg, help)        Opt_##longopt,
+#define x(shortopt, longopt, arg, help)        O_##longopt,
         OPTS
  #undef x
  #undef t
@@ -150,7 +154,7 @@ static const struct option format_opts[] = {
         .name           = #longopt,                                     \
         .has_arg        = arg ? required_argument : no_argument,        \
         .flag           = NULL,                                         \
-       .val            = Opt_##longopt,                                \
+       .val            = O_##longopt,                                  \
  },
         OPTS
  #undef x
@@ -194,85 +198,95 @@ int cmd_format(int argc, char *argv[])
                                   format_opts,
                                   NULL)) != -1)
                 switch (opt) {
-               case Opt_block_size:
+               case O_block_size:
                 case 'b':
                         opts.block_size =
                                 hatoi_validate(optarg, "block size");
                         break;
-               case Opt_btree_node_size:
+               case O_btree_node_size:
                         opts.btree_node_size =
                                 hatoi_validate(optarg, "btree node size");
                         break;
-               case Opt_metadata_checksum_type:
+               case O_metadata_checksum_type:
                         opts.meta_csum_type =
                                 read_string_list_or_die(optarg,
                                                 bch_csum_types, "checksum type");
                         break;
-               case Opt_data_checksum_type:
+               case O_data_checksum_type:
                         opts.data_csum_type =
                                 read_string_list_or_die(optarg,
                                                 bch_csum_types, "checksum type");
                         break;
-               case Opt_compression_type:
+               case O_compression_type:
                         opts.compression_type =
                                 read_string_list_or_die(optarg,
                                                 bch_compression_types,
                                                 "compression type");
                         break;
-               case Opt_encrypted:
+               case O_data_replicas:
+                       if (kstrtouint(optarg, 10, &opts.data_replicas) ||
+                           dev_opts.tier >= BCH_REPLICAS_MAX)
+                               die("invalid replicas");
+                       break;
+               case O_metadata_replicas:
+                       if (kstrtouint(optarg, 10, &opts.meta_replicas) ||
+                           dev_opts.tier >= BCH_REPLICAS_MAX)
+                               die("invalid replicas");
+                       break;
+               case O_encrypted:
                         opts.encrypted = true;
                         break;
-               case Opt_no_passphrase:
+               case O_no_passphrase:
                         no_passphrase = true;
                         break;
-               case Opt_error_action:
+               case O_error_action:
                 case 'e':
                         opts.on_error_action =
                                 read_string_list_or_die(optarg,
                                                 bch_error_actions, "error action");
                         break;
-               case Opt_max_journal_entry_size:
+               case O_max_journal_entry_size:
                         opts.max_journal_entry_size =
                                 hatoi_validate(optarg, "journal entry size");
                         break;
-               case Opt_label:
+               case O_label:
                 case 'L':
                         opts.label = strdup(optarg);
                         break;
-               case Opt_uuid:
+               case O_uuid:
                 case 'U':
                         if (uuid_parse(optarg, opts.uuid.b))
                                 die("Bad uuid");
                         break;
-               case Opt_force:
+               case O_force:
                 case 'f':
                         force = true;
                         break;
-               case Opt_fs_size:
+               case O_fs_size:
                         if (bch_strtoull_h(optarg, &dev_opts.size))
                                 die("invalid filesystem size");
  
                         dev_opts.size >>= 9;
                         break;
-               case Opt_bucket_size:
+               case O_bucket_size:
                         dev_opts.bucket_size =
                                 hatoi_validate(optarg, "bucket size");
                         break;
-               case Opt_tier:
+               case O_tier:
                 case 't':
                         if (kstrtouint(optarg, 10, &dev_opts.tier) ||
                             dev_opts.tier >= BCH_TIER_MAX)
                                 die("invalid tier");
                         break;
-               case Opt_discard:
+               case O_discard:
                         dev_opts.discard = true;
                         break;
-               case Opt_no_opt:
+               case O_no_opt:
                         dev_opts.path = strdup(optarg);
                         darray_append(devices, dev_opts);
                         dev_opts.size = 0;
                         break;
-               case Opt_help:
+               case O_help:
                 case 'h':
                         usage();
                         exit(EXIT_SUCCESS);
diff --git a/cmds.h b/cmds.h

index 120e83f9f7a8bd433c109d26769c2b6fa2e3bde0..401f295c9aa7bc766250e2e65956fc8b6c8b3009 100644 (file)
--- a/cmds.h
+++ b/cmds.h
@@ -22,6 +22,7 @@ int cmd_fs_set(int argc, char *argv[]);
  
  int cmd_device_show(int argc, char *argv[]);
  int cmd_device_add(int argc, char *argv[]);
+int cmd_device_fail(int argc, char *argv[]);
  int cmd_device_remove(int argc, char *argv[]);
  
  int cmd_fsck(int argc, char *argv[]);
diff --git a/include/linux/bcache-ioctl.h b/include/linux/bcache-ioctl.h

index 8ca2fdbe209bf35f9d5312bbc42daea0ca782cd7..7a0513cd2e119d0cd432d0f03bfa25da93786d05 100644 (file)
--- a/include/linux/bcache-ioctl.h
+++ b/include/linux/bcache-ioctl.h
@@ -10,8 +10,14 @@ extern "C" {
  
  /* global control dev: */
  
-#define BCH_FORCE_IF_DATA_MISSING      (1 << 0)
-#define BCH_FORCE_IF_METADATA_MISSING  (1 << 1)
+#define BCH_FORCE_IF_DATA_LOST         (1 << 0)
+#define BCH_FORCE_IF_METADATA_LOST     (1 << 1)
+#define BCH_FORCE_IF_DATA_DEGRADED     (1 << 2)
+#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
+
+#define BCH_FORCE_IF_DEGRADED                  \
+       (BCH_FORCE_IF_DATA_DEGRADED|            \
+        BCH_FORCE_IF_METADATA_DEGRADED)
  
  #define BCH_IOCTL_ASSEMBLE     _IOW('r', 1, struct bch_ioctl_assemble)
  #define BCH_IOCTL_INCREMENTAL  _IOW('r', 1, struct bch_ioctl_incremental)
@@ -23,7 +29,7 @@ extern "C" {
  
  #define BCH_IOCTL_DISK_ADD     _IOW('r', 4, struct bch_ioctl_disk_add)
  #define BCH_IOCTL_DISK_REMOVE  _IOW('r', 5, struct bch_ioctl_disk_remove)
-#define BCH_IOCTL_DISK_FAIL    _IOW('r', 6, struct bch_ioctl_disk_fail)
+#define BCH_IOCTL_DISK_SET_STATE _IOW('r', 6, struct bch_ioctl_disk_set_state)
  
  #define BCH_IOCTL_DISK_REMOVE_BY_UUID                                  \
         _IOW('r', 5, struct bch_ioctl_disk_remove_by_uuid)
@@ -57,9 +63,10 @@ struct bch_ioctl_disk_remove {
         __u64                   dev;
  };
  
-struct bch_ioctl_disk_fail {
+struct bch_ioctl_disk_set_state {
         __u32                   flags;
-       __u32                   pad;
+       __u8                    new_state;
+       __u8                    pad[3];
         __u64                   dev;
  };
  
diff --git a/include/linux/bcache.h b/include/linux/bcache.h

index d70e2e32449e0414dca2ae24e5972ca212c68f05..ac3b8b458f4487e179a5bfabb0c075061cb92c43 100644 (file)
--- a/include/linux/bcache.h
+++ b/include/linux/bcache.h
@@ -969,6 +969,9 @@ LE64_BITMASK(BCH_SB_128_BIT_MACS,   struct bch_sb, flags[1],  9, 10);
  LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE,   struct bch_sb, flags[1], 10, 14);
  LE64_BITMASK(BCH_SB_JOURNAL_ENTRY_SIZE,        struct bch_sb, flags[1], 14, 20);
  
+LE64_BITMASK(BCH_SB_META_REPLICAS_REQ, struct bch_sb, flags[1], 20, 24);
+LE64_BITMASK(BCH_SB_DATA_REPLICAS_REQ, struct bch_sb, flags[1], 24, 28);
+
  /* Features: */
  enum bch_sb_features {
         BCH_FEATURE_LZ4                 = 0,
diff --git a/libbcache.c b/libbcache.c

index 0cfafbbcd28849e00bfce62f215a0dd6265949a2..c9c113ae33d089710d6a427711b6624a130146cb 100644 (file)
--- a/libbcache.c
+++ b/libbcache.c
@@ -171,8 +171,10 @@ struct bch_sb *bcache_format(struct format_opts opts,
         SET_BCH_SB_GC_RESERVE(sb,               8);
         SET_BCH_SB_META_REPLICAS_WANT(sb,       opts.meta_replicas);
         SET_BCH_SB_META_REPLICAS_HAVE(sb,       opts.meta_replicas);
+       SET_BCH_SB_META_REPLICAS_REQ(sb,        opts.meta_replicas_required);
         SET_BCH_SB_DATA_REPLICAS_WANT(sb,       opts.data_replicas);
         SET_BCH_SB_DATA_REPLICAS_HAVE(sb,       opts.data_replicas);
+       SET_BCH_SB_DATA_REPLICAS_REQ(sb,        opts.data_replicas_required);
         SET_BCH_SB_ERROR_ACTION(sb,             opts.on_error_action);
         SET_BCH_SB_STR_HASH_TYPE(sb,            BCH_STR_HASH_SIPHASH);
         SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb,       ilog2(opts.max_journal_entry_size));
diff --git a/libbcache.h b/libbcache.h

index 779b4708d14751cb4c22c08686b836944e821a7f..965f09c0d89e35f953984d615c66742932cf48e5 100644 (file)
--- a/libbcache.h
+++ b/libbcache.h
@@ -32,6 +32,9 @@ struct format_opts {
         unsigned        meta_replicas;
         unsigned        data_replicas;
  
+       unsigned        meta_replicas_required;
+       unsigned        data_replicas_required;
+
         unsigned        meta_csum_type;
         unsigned        data_csum_type;
         unsigned        compression_type;
@@ -48,6 +51,8 @@ static inline struct format_opts format_opts_default()
                 .data_csum_type         = BCH_CSUM_CRC32C,
                 .meta_replicas          = 1,
                 .data_replicas          = 1,
+               .meta_replicas_required = 1,
+               .data_replicas_required = 1,
         };
  }
  
diff --git a/libbcache/alloc.c b/libbcache/alloc.c

index 93f0c2f14c0fe577dadbb68ef71f4e6a12c076dc..a0f8d64ff481f2d344fb021bbb1ad84924dcb29e 100644 (file)
--- a/libbcache/alloc.c
+++ b/libbcache/alloc.c
@@ -138,7 +138,7 @@ static void pd_controllers_update(struct work_struct *work)
                                 -1);
  
                 group_for_each_cache_rcu(ca, &c->tiers[i].devs, iter) {
-                       struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
+                       struct bch_dev_usage stats = bch_dev_usage_read(ca);
                         unsigned bucket_bits = ca->bucket_bits + 9;
  
                         u64 size = (ca->mi.nbuckets -
@@ -1304,9 +1304,7 @@ static unsigned open_bucket_sectors_free(struct cache_set *c,
         struct cache_member_rcu *mi = cache_member_info_get(c);
         unsigned i, sectors_free = UINT_MAX;
  
-       BUG_ON(nr_replicas > ob->nr_ptrs);
-
-       for (i = 0; i < nr_replicas; i++)
+       for (i = 0; i < min(nr_replicas, ob->nr_ptrs); i++)
                 sectors_free = min(sectors_free,
                                    ob_ptr_sectors_free(ob, mi, &ob->ptrs[i]));
  
@@ -1369,11 +1367,13 @@ static int open_bucket_add_buckets(struct cache_set *c,
                                    struct write_point *wp,
                                    struct open_bucket *ob,
                                    unsigned nr_replicas,
+                                  unsigned nr_replicas_required,
                                    enum alloc_reserve reserve,
                                    struct closure *cl)
  {
         long caches_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
-       int i, dst;
+       unsigned i;
+       int ret;
  
         /*
          * We might be allocating pointers to add to an existing extent
@@ -1388,23 +1388,17 @@ static int open_bucket_add_buckets(struct cache_set *c,
  
         memset(caches_used, 0, sizeof(caches_used));
  
-       /*
-        * Shuffle pointers to devices we already have to the end:
-        * bch_bucket_alloc_set() will add new pointers to the statr of @b, and
-        * bch_alloc_sectors_done() will add the first nr_replicas ptrs to @e:
-        */
-       for (i = dst = ob->nr_ptrs - 1; i >= 0; --i)
-               if (__test_and_set_bit(ob->ptrs[i].dev, caches_used)) {
-                       if (i != dst) {
-                               swap(ob->ptrs[i], ob->ptrs[dst]);
-                               swap(ob->ptr_offset[i], ob->ptr_offset[dst]);
-                       }
-                       --dst;
-                       nr_replicas++;
-               }
+       for (i = 0; i < ob->nr_ptrs; i++)
+               __set_bit(ob->ptrs[i].dev, caches_used);
+
+       ret = bch_bucket_alloc_set(c, wp, ob, nr_replicas,
+                                  reserve, caches_used, cl);
+
+       if (ret == -EROFS &&
+           ob->nr_ptrs >= nr_replicas_required)
+               ret = 0;
  
-       return bch_bucket_alloc_set(c, wp, ob, nr_replicas,
-                                   reserve, caches_used, cl);
+       return ret;
  }
  
  /*
@@ -1413,6 +1407,7 @@ static int open_bucket_add_buckets(struct cache_set *c,
  struct open_bucket *bch_alloc_sectors_start(struct cache_set *c,
                                             struct write_point *wp,
                                             unsigned nr_replicas,
+                                           unsigned nr_replicas_required,
                                             enum alloc_reserve reserve,
                                             struct closure *cl)
  {
@@ -1466,6 +1461,7 @@ retry:
         }
  
         ret = open_bucket_add_buckets(c, wp, ob, nr_replicas,
+                                     nr_replicas_required,
                                       reserve, cl);
         if (ret) {
                 mutex_unlock(&ob->lock);
@@ -1498,10 +1494,6 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
          * __bch_write() will only write to the pointers we add here:
          */
  
-       /*
-        * XXX: don't add pointers to devices @e already has
-        */
-       BUG_ON(nr_replicas > ob->nr_ptrs);
         BUG_ON(sectors > ob->sectors_free);
  
         /* didn't use all the ptrs: */
@@ -1510,7 +1502,7 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
  
         rcu_read_lock();
  
-       for (i = 0; i < nr_replicas; i++) {
+       for (i = 0; i < min(ob->nr_ptrs, nr_replicas); i++) {
                 EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
  
                 tmp = ob->ptrs[i];
@@ -1576,12 +1568,15 @@ struct open_bucket *bch_alloc_sectors(struct cache_set *c,
                                       struct write_point *wp,
                                       struct bkey_i_extent *e,
                                       unsigned nr_replicas,
+                                     unsigned nr_replicas_required,
                                       enum alloc_reserve reserve,
                                       struct closure *cl)
  {
         struct open_bucket *ob;
  
-       ob = bch_alloc_sectors_start(c, wp, nr_replicas, reserve, cl);
+       ob = bch_alloc_sectors_start(c, wp, nr_replicas,
+                                    nr_replicas_required,
+                                    reserve, cl);
         if (IS_ERR_OR_NULL(ob))
                 return ob;
  
diff --git a/libbcache/alloc.h b/libbcache/alloc.h

index 9573dd2cf240f75d4bf95fd2f7322cd99ad03150..aec9fc5f3c8cb475d1e412234fe8a72ba4e87f84 100644 (file)
--- a/libbcache/alloc.h
+++ b/libbcache/alloc.h
@@ -33,7 +33,8 @@ void bch_open_bucket_put(struct cache_set *, struct open_bucket *);
  
  struct open_bucket *bch_alloc_sectors_start(struct cache_set *,
                                             struct write_point *,
-                                           unsigned, enum alloc_reserve,
+                                           unsigned, unsigned,
+                                           enum alloc_reserve,
                                             struct closure *);
  
  void bch_alloc_sectors_append_ptrs(struct cache_set *, struct bkey_i_extent *,
@@ -42,7 +43,7 @@ void bch_alloc_sectors_done(struct cache_set *, struct write_point *,
                             struct open_bucket *);
  
  struct open_bucket *bch_alloc_sectors(struct cache_set *, struct write_point *,
-                                     struct bkey_i_extent *, unsigned,
+                                     struct bkey_i_extent *, unsigned, unsigned,
                                       enum alloc_reserve, struct closure *);
  
  static inline void bch_wake_allocator(struct cache *ca)
diff --git a/libbcache/bcache.h b/libbcache/bcache.h

index 5b668c718ff11c396ba7fdb7e1ded0ddcc8f661b..c20a170137fe3b8058006faca9b394a894fdc859 100644 (file)
--- a/libbcache/bcache.h
+++ b/libbcache/bcache.h
@@ -347,18 +347,10 @@ struct cache_member_rcu {
         struct cache_member_cpu m[];
  };
  
-/* cache->flags: */
-enum {
-       BCH_DEV_REMOVING,
-       BCH_DEV_FORCE_REMOVE,
-};
-
  struct cache {
         struct percpu_ref       ref;
         struct rcu_head         free_rcu;
         struct work_struct      free_work;
-       struct work_struct      remove_work;
-       unsigned long           flags;
  
         struct cache_set        *set;
  
@@ -424,8 +416,8 @@ struct cache {
          * second contains a saved copy of the stats from the beginning
          * of GC.
          */
-       struct bucket_stats_cache __percpu *bucket_stats_percpu;
-       struct bucket_stats_cache       bucket_stats_cached;
+       struct bch_dev_usage __percpu *bucket_stats_percpu;
+       struct bch_dev_usage    bucket_stats_cached;
  
         atomic_long_t           saturated_count;
         size_t                  inc_gen_needs_gc;
@@ -659,8 +651,8 @@ struct cache_set {
  
         atomic64_t              sectors_available;
  
-       struct bucket_stats_cache_set __percpu *bucket_stats_percpu;
-       struct bucket_stats_cache_set   bucket_stats_cached;
+       struct bch_fs_usage __percpu *bucket_stats_percpu;
+       struct bch_fs_usage     bucket_stats_cached;
         struct lglock           bucket_stats_lock;
  
         struct mutex            bucket_lock;
diff --git a/libbcache/btree_gc.c b/libbcache/btree_gc.c

index b90807f7be317a69ca6c5d5e71fa126d86e17b67..254d29d3e746075ae1095045c24c1fcecfb833b4 100644 (file)
--- a/libbcache/btree_gc.c
+++ b/libbcache/btree_gc.c
@@ -333,7 +333,7 @@ static void bch_mark_metadata(struct cache_set *c)
  /* Also see bch_pending_btree_node_free_insert_done() */
  static void bch_mark_pending_btree_node_frees(struct cache_set *c)
  {
-       struct bucket_stats_cache_set stats = { 0 };
+       struct bch_fs_usage stats = { 0 };
         struct btree_interior_update *as;
         struct pending_btree_node_free *d;
  
@@ -407,17 +407,17 @@ void bch_gc(struct cache_set *c)
  
         /* Save a copy of the existing bucket stats while we recompute them: */
         for_each_cache(ca, c, i) {
-               ca->bucket_stats_cached = __bch_bucket_stats_read_cache(ca);
+               ca->bucket_stats_cached = __bch_dev_usage_read(ca);
                 for_each_possible_cpu(cpu) {
-                       struct bucket_stats_cache *p =
+                       struct bch_dev_usage *p =
                                 per_cpu_ptr(ca->bucket_stats_percpu, cpu);
                         memset(p, 0, sizeof(*p));
                 }
         }
  
-       c->bucket_stats_cached = __bch_bucket_stats_read_cache_set(c);
+       c->bucket_stats_cached = __bch_fs_usage_read(c);
         for_each_possible_cpu(cpu) {
-               struct bucket_stats_cache_set *p =
+               struct bch_fs_usage *p =
                         per_cpu_ptr(c->bucket_stats_percpu, cpu);
  
                 memset(p->s, 0, sizeof(p->s));
diff --git a/libbcache/btree_types.h b/libbcache/btree_types.h

index 4cbec7fe97776014a70ed1e8f1908aa563c00e8d..a99bf98b6509b432ad3485995a0e88c11eef169f 100644 (file)
--- a/libbcache/btree_types.h
+++ b/libbcache/btree_types.h
@@ -272,7 +272,6 @@ struct btree_root {
   */
  
  struct btree_iter;
-struct bucket_stats_cache_set;
  struct btree_node_iter;
  
  enum extent_insert_hook_ret {
diff --git a/libbcache/btree_update.c b/libbcache/btree_update.c

index 95d127fe865a802e05e0383cea2809e30bc37847..43207071404540cbfa8fcf2fffdfacbdd9069c73 100644 (file)
--- a/libbcache/btree_update.c
+++ b/libbcache/btree_update.c
@@ -94,7 +94,7 @@ bool bch_btree_node_format_fits(struct cache_set *c, struct btree *b,
   */
  static void bch_btree_node_free_index(struct cache_set *c, struct btree *b,
                                       enum btree_id id, struct bkey_s_c k,
-                                     struct bucket_stats_cache_set *stats)
+                                     struct bch_fs_usage *stats)
  {
         struct btree_interior_update *as;
         struct pending_btree_node_free *d;
@@ -140,7 +140,7 @@ found:
          * moving this reference from, hence one comparison here:
          */
         if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
-               struct bucket_stats_cache_set tmp = { 0 };
+               struct bch_fs_usage tmp = { 0 };
  
                 bch_mark_key(c, bkey_i_to_s_c(&d->key),
                              -c->sb.btree_node_size, true, b
@@ -208,7 +208,7 @@ void bch_btree_node_free_inmem(struct btree_iter *iter, struct btree *b)
  static void bch_btree_node_free_ondisk(struct cache_set *c,
                                        struct pending_btree_node_free *pending)
  {
-       struct bucket_stats_cache_set stats = { 0 };
+       struct bch_fs_usage stats = { 0 };
  
         BUG_ON(!pending->index_update_done);
  
@@ -258,6 +258,7 @@ retry:
         ob = bch_alloc_sectors(c, &c->btree_write_point,
                                bkey_i_to_extent(&tmp.k),
                                res->nr_replicas,
+                              c->opts.metadata_replicas_required,
                                use_reserve ? RESERVE_BTREE : RESERVE_NONE,
                                cl);
         if (IS_ERR(ob))
@@ -373,7 +374,7 @@ static void bch_btree_set_root_inmem(struct cache_set *c, struct btree *b,
                  * bch_btree_root_read()) - do marking while holding
                  * btree_root_lock:
                  */
-               struct bucket_stats_cache_set stats = { 0 };
+               struct bch_fs_usage stats = { 0 };
  
                 bch_mark_key(c, bkey_i_to_s_c(&b->key),
                              c->sb.btree_node_size, true,
@@ -632,7 +633,7 @@ static void bch_insert_fixup_btree_ptr(struct btree_iter *iter,
                                        struct disk_reservation *disk_res)
  {
         struct cache_set *c = iter->c;
-       struct bucket_stats_cache_set stats = { 0 };
+       struct bch_fs_usage stats = { 0 };
         struct bkey_packed *k;
         struct bkey tmp;
  
diff --git a/libbcache/buckets.c b/libbcache/buckets.c

index ec4ee54a9b4ddac0e05c3a1ea560f02934cf358a..91240afa12eef39c9736cf922409d57864f8e657 100644 (file)
--- a/libbcache/buckets.c
+++ b/libbcache/buckets.c
@@ -78,8 +78,8 @@
  
  static void bch_fs_stats_verify(struct cache_set *c)
  {
-       struct bucket_stats_cache_set stats =
-               __bch_bucket_stats_read_cache_set(c);
+       struct bch_fs_usage stats =
+               __bch_fs_usage_read(c);
  
         if ((s64) stats.sectors_dirty < 0)
                 panic("sectors_dirty underflow: %lli\n", stats.sectors_dirty);
@@ -162,26 +162,26 @@ do {                                                                      \
         _ret;                                                           \
  })
  
-struct bucket_stats_cache __bch_bucket_stats_read_cache(struct cache *ca)
+struct bch_dev_usage __bch_dev_usage_read(struct cache *ca)
  {
         return bucket_stats_read_raw(ca->bucket_stats_percpu);
  }
  
-struct bucket_stats_cache bch_bucket_stats_read_cache(struct cache *ca)
+struct bch_dev_usage bch_dev_usage_read(struct cache *ca)
  {
         return bucket_stats_read_cached(ca->set,
                                 ca->bucket_stats_cached,
                                 ca->bucket_stats_percpu);
  }
  
-struct bucket_stats_cache_set
-__bch_bucket_stats_read_cache_set(struct cache_set *c)
+struct bch_fs_usage
+__bch_fs_usage_read(struct cache_set *c)
  {
         return bucket_stats_read_raw(c->bucket_stats_percpu);
  }
  
-struct bucket_stats_cache_set
-bch_bucket_stats_read_cache_set(struct cache_set *c)
+struct bch_fs_usage
+bch_fs_usage_read(struct cache_set *c)
  {
         return bucket_stats_read_cached(c,
                                 c->bucket_stats_cached,
@@ -205,7 +205,7 @@ static inline int is_cached_bucket(struct bucket_mark m)
  }
  
  void bch_fs_stats_apply(struct cache_set *c,
-                       struct bucket_stats_cache_set *stats,
+                       struct bch_fs_usage *stats,
                         struct disk_reservation *disk_res,
                         struct gc_pos gc_pos)
  {
@@ -251,11 +251,11 @@ static bool bucket_became_unavailable(struct cache_set *c,
  }
  
  static void bucket_stats_update(struct cache *ca,
-                       struct bucket_mark old, struct bucket_mark new,
-                       struct bucket_stats_cache_set *bch_alloc_stats)
+                               struct bucket_mark old, struct bucket_mark new,
+                               struct bch_fs_usage *bch_alloc_stats)
  {
         struct cache_set *c = ca->set;
-       struct bucket_stats_cache *cache_stats;
+       struct bch_dev_usage *cache_stats;
  
         bch_fs_inconsistent_on(old.data_type && new.data_type &&
                         old.data_type != new.data_type, c,
@@ -305,7 +305,7 @@ static void bucket_stats_update(struct cache *ca,
  
  #define bucket_data_cmpxchg(ca, g, new, expr)                  \
  ({                                                             \
-       struct bucket_stats_cache_set _stats = { 0 };           \
+       struct bch_fs_usage _stats = { 0 };             \
         struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
                                                                 \
         bucket_stats_update(ca, _old, new, &_stats);            \
@@ -314,7 +314,7 @@ static void bucket_stats_update(struct cache *ca,
  
  void bch_invalidate_bucket(struct cache *ca, struct bucket *g)
  {
-       struct bucket_stats_cache_set stats = { 0 };
+       struct bch_fs_usage stats = { 0 };
         struct bucket_mark old, new;
  
         old = bucket_cmpxchg(g, new, ({
@@ -441,18 +441,18 @@ static unsigned __compressed_sectors(const union bch_extent_crc *crc, unsigned s
   */
  static void bch_mark_pointer(struct cache_set *c,
                              struct bkey_s_c_extent e,
-                            struct cache *ca,
                              const union bch_extent_crc *crc,
                              const struct bch_extent_ptr *ptr,
                              s64 sectors, enum s_alloc type,
                              bool may_make_unavailable,
-                            struct bucket_stats_cache_set *stats,
+                            struct bch_fs_usage *stats,
                              bool gc_will_visit, u64 journal_seq)
  {
         struct bucket_mark old, new;
         unsigned saturated;
-       struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
-       u64 v = READ_ONCE(g->_mark.counter);
+       struct cache *ca;
+       struct bucket *g;
+       u64 v;
         unsigned old_sectors, new_sectors;
         int disk_sectors, compressed_sectors;
  
@@ -469,6 +469,12 @@ static void bch_mark_pointer(struct cache_set *c,
         compressed_sectors = -__compressed_sectors(crc, old_sectors)
                 + __compressed_sectors(crc, new_sectors);
  
+       ca = PTR_CACHE(c, ptr);
+       if (!ca)
+               goto out;
+
+       g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
+
         if (gc_will_visit) {
                 if (journal_seq)
                         bucket_cmpxchg(g, new, new.journal_seq = journal_seq);
@@ -476,6 +482,7 @@ static void bch_mark_pointer(struct cache_set *c,
                 goto out;
         }
  
+       v = READ_ONCE(g->_mark.counter);
         do {
                 new.counter = old.counter = v;
                 saturated = 0;
@@ -548,33 +555,29 @@ out:
  static void bch_mark_extent(struct cache_set *c, struct bkey_s_c_extent e,
                             s64 sectors, bool metadata,
                             bool may_make_unavailable,
-                           struct bucket_stats_cache_set *stats,
+                           struct bch_fs_usage *stats,
                             bool gc_will_visit, u64 journal_seq)
  {
         const struct bch_extent_ptr *ptr;
         const union bch_extent_crc *crc;
-       struct cache *ca;
         enum s_alloc type = metadata ? S_META : S_DIRTY;
  
         BUG_ON(metadata && bkey_extent_is_cached(e.k));
         BUG_ON(!sectors);
  
         rcu_read_lock();
-       extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
-               trace_bcache_mark_bucket(ca, e.k, ptr, sectors, !ptr->cached);
-
-               bch_mark_pointer(c, e, ca, crc, ptr, sectors,
+       extent_for_each_ptr_crc(e, ptr, crc)
+               bch_mark_pointer(c, e, crc, ptr, sectors,
                                  ptr->cached ? S_CACHED : type,
                                  may_make_unavailable,
                                  stats, gc_will_visit, journal_seq);
-       }
         rcu_read_unlock();
  }
  
  static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k,
                            s64 sectors, bool metadata,
                            bool may_make_unavailable,
-                          struct bucket_stats_cache_set *stats,
+                          struct bch_fs_usage *stats,
                            bool gc_will_visit, u64 journal_seq)
  {
         switch (k.k->type) {
@@ -595,7 +598,7 @@ static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k,
  
  void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
                        s64 sectors, bool metadata,
-                      struct bucket_stats_cache_set *stats)
+                      struct bch_fs_usage *stats)
  {
         __bch_mark_key(c, k, sectors, metadata, true, stats, false, 0);
  }
@@ -603,7 +606,7 @@ void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
  void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
                      s64 sectors, bool metadata)
  {
-       struct bucket_stats_cache_set stats = { 0 };
+       struct bch_fs_usage stats = { 0 };
  
         __bch_gc_mark_key(c, k, sectors, metadata, &stats);
  
@@ -614,7 +617,7 @@ void bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
  
  void bch_mark_key(struct cache_set *c, struct bkey_s_c k,
                   s64 sectors, bool metadata, struct gc_pos gc_pos,
-                 struct bucket_stats_cache_set *stats, u64 journal_seq)
+                 struct bch_fs_usage *stats, u64 journal_seq)
  {
         /*
          * synchronization w.r.t. GC:
@@ -693,7 +696,7 @@ int bch_disk_reservation_add(struct cache_set *c,
                              struct disk_reservation *res,
                              unsigned sectors, int flags)
  {
-       struct bucket_stats_cache_set *stats;
+       struct bch_fs_usage *stats;
         u64 old, new, v;
         s64 sectors_available;
         int ret;
diff --git a/libbcache/buckets.h b/libbcache/buckets.h

index 6d70103efb42a191b703972c7765dc170b2dd0e1..37a664341e3ebaba0bfa68a8f62d122229cbd3e2 100644 (file)
--- a/libbcache/buckets.h
+++ b/libbcache/buckets.h
@@ -157,11 +157,11 @@ static inline unsigned bucket_sectors_used(struct bucket *g)
  
  /* Per device stats: */
  
-struct bucket_stats_cache __bch_bucket_stats_read_cache(struct cache *);
-struct bucket_stats_cache bch_bucket_stats_read_cache(struct cache *);
+struct bch_dev_usage __bch_dev_usage_read(struct cache *);
+struct bch_dev_usage bch_dev_usage_read(struct cache *);
  
  static inline u64 __buckets_available_cache(struct cache *ca,
-                                           struct bucket_stats_cache stats)
+                                           struct bch_dev_usage stats)
  {
         return max_t(s64, 0,
                      ca->mi.nbuckets - ca->mi.first_bucket -
@@ -175,11 +175,11 @@ static inline u64 __buckets_available_cache(struct cache *ca,
   */
  static inline u64 buckets_available_cache(struct cache *ca)
  {
-       return __buckets_available_cache(ca, bch_bucket_stats_read_cache(ca));
+       return __buckets_available_cache(ca, bch_dev_usage_read(ca));
  }
  
  static inline u64 __buckets_free_cache(struct cache *ca,
-                                      struct bucket_stats_cache stats)
+                                      struct bch_dev_usage stats)
  {
         return __buckets_available_cache(ca, stats) +
                 fifo_used(&ca->free[RESERVE_NONE]) +
@@ -188,21 +188,19 @@ static inline u64 __buckets_free_cache(struct cache *ca,
  
  static inline u64 buckets_free_cache(struct cache *ca)
  {
-       return __buckets_free_cache(ca, bch_bucket_stats_read_cache(ca));
+       return __buckets_free_cache(ca, bch_dev_usage_read(ca));
  }
  
  /* Cache set stats: */
  
-struct bucket_stats_cache_set __bch_bucket_stats_read_cache_set(struct cache_set *);
-struct bucket_stats_cache_set bch_bucket_stats_read_cache_set(struct cache_set *);
-void bch_fs_stats_apply(struct cache_set *,
-                       struct bucket_stats_cache_set *,
-                       struct disk_reservation *,
-                              struct gc_pos);
+struct bch_fs_usage __bch_fs_usage_read(struct cache_set *);
+struct bch_fs_usage bch_fs_usage_read(struct cache_set *);
+void bch_fs_stats_apply(struct cache_set *, struct bch_fs_usage *,
+                       struct disk_reservation *, struct gc_pos);
  
  static inline u64 __bch_fs_sectors_used(struct cache_set *c)
  {
-       struct bucket_stats_cache_set stats = __bch_bucket_stats_read_cache_set(c);
+       struct bch_fs_usage stats = __bch_fs_usage_read(c);
         u64 reserved = stats.persistent_reserved +
                 stats.online_reserved;
  
@@ -256,10 +254,10 @@ void bch_mark_metadata_bucket(struct cache *, struct bucket *,
                               enum bucket_data_type, bool);
  
  void __bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
-                      struct bucket_stats_cache_set *);
+                      struct bch_fs_usage *);
  void bch_gc_mark_key(struct cache_set *, struct bkey_s_c, s64, bool);
  void bch_mark_key(struct cache_set *, struct bkey_s_c, s64, bool,
-                 struct gc_pos, struct bucket_stats_cache_set *, u64);
+                 struct gc_pos, struct bch_fs_usage *, u64);
  
  void bch_recalc_sectors_available(struct cache_set *);
  
diff --git a/libbcache/buckets_types.h b/libbcache/buckets_types.h

index f42e09d8a0b9ee2291ea27861c1644fa8e3558fa..1856db93d6394dbd7c3312d1739be9cb6190d093 100644 (file)
--- a/libbcache/buckets_types.h
+++ b/libbcache/buckets_types.h
@@ -65,7 +65,7 @@ struct bucket {
         };
  };
  
-struct bucket_stats_cache {
+struct bch_dev_usage {
         u64                     buckets_dirty;
         u64                     buckets_cached;
         u64                     buckets_meta;
@@ -89,7 +89,7 @@ enum s_compressed {
         S_COMPRESSED_NR,
  };
  
-struct bucket_stats_cache_set {
+struct bch_fs_usage {
         /* all fields are in units of 512 byte sectors: */
         u64                     s[S_COMPRESSED_NR][S_ALLOC_NR];
         u64                     persistent_reserved;
diff --git a/libbcache/chardev.c b/libbcache/chardev.c

index 049aa9108d47594f43f748d93e9d92dd626e94a2..450859d5b09810538a88a6f1a9b037149938def8 100644 (file)
--- a/libbcache/chardev.c
+++ b/libbcache/chardev.c
@@ -173,17 +173,16 @@ static long bch_ioctl_disk_remove(struct cache_set *c,
         if (IS_ERR(ca))
                 return PTR_ERR(ca);
  
-       ret = bch_dev_remove(ca, arg.flags & BCH_FORCE_IF_DATA_MISSING)
-               ? 0 : -EBUSY;
+       ret = bch_dev_remove(c, ca, arg.flags);
  
         percpu_ref_put(&ca->ref);
         return ret;
  }
  
-static long bch_ioctl_disk_fail(struct cache_set *c,
-                               struct bch_ioctl_disk_fail __user *user_arg)
+static long bch_ioctl_disk_set_state(struct cache_set *c,
+                                    struct bch_ioctl_disk_set_state __user *user_arg)
  {
-       struct bch_ioctl_disk_fail arg;
+       struct bch_ioctl_disk_set_state arg;
         struct cache *ca;
         int ret;
  
@@ -194,8 +193,7 @@ static long bch_ioctl_disk_fail(struct cache_set *c,
         if (IS_ERR(ca))
                 return PTR_ERR(ca);
  
-       /* XXX: failed not actually implemented yet */
-       ret = bch_dev_remove(ca, true);
+       ret = bch_dev_set_state(c, ca, arg.new_state, arg.flags);
  
         percpu_ref_put(&ca->ref);
         return ret;
@@ -288,8 +286,8 @@ long bch_fs_ioctl(struct cache_set *c, unsigned cmd, void __user *arg)
                 return bch_ioctl_disk_add(c, arg);
         case BCH_IOCTL_DISK_REMOVE:
                 return bch_ioctl_disk_remove(c, arg);
-       case BCH_IOCTL_DISK_FAIL:
-               return bch_ioctl_disk_fail(c, arg);
+       case BCH_IOCTL_DISK_SET_STATE:
+               return bch_ioctl_disk_set_state(c, arg);
  
         case BCH_IOCTL_DISK_REMOVE_BY_UUID:
                 return bch_ioctl_disk_remove_by_uuid(c, arg);
diff --git a/libbcache/error.c b/libbcache/error.c

index f4109da6ebebfe66cfec8332d7164588c7a9a604..814c0eb7bb9491dde4f672ec86f3389851d9a215 100644 (file)
--- a/libbcache/error.c
+++ b/libbcache/error.c
@@ -121,9 +121,11 @@ void bch_nonfatal_io_error_work(struct work_struct *work)
                 bch_notify_dev_error(ca, true);
  
                 mutex_lock(&c->state_lock);
-               dev = bch_dev_may_remove(ca);
+               dev = bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
+                                           BCH_FORCE_IF_DEGRADED);
                 if (dev
-                   ? bch_dev_read_only(ca)
+                   ? __bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
+                                         BCH_FORCE_IF_DEGRADED)
                     : bch_fs_emergency_read_only(c))
                         bch_err(c,
                                 "too many IO errors on %s, setting %s RO",
diff --git a/libbcache/extents.c b/libbcache/extents.c

index c5e0e37584922728b9e88e1874d0406ea8baa150..af3d031ae72000e754bb1f4ae28eae0ac6614d31 100644 (file)
--- a/libbcache/extents.c
+++ b/libbcache/extents.c
@@ -622,6 +622,9 @@ bch_btree_pick_ptr(struct cache_set *c, const struct btree *b)
                                 PTR_BUCKET_NR(ca, ptr)))
                         continue;
  
+               if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
+                       continue;
+
                 if (pick.ca && pick.ca->mi.tier < ca->mi.tier)
                         continue;
  
@@ -938,7 +941,7 @@ struct extent_insert_state {
         struct btree_insert             *trans;
         struct btree_insert_entry       *insert;
         struct bpos                     committed;
-       struct bucket_stats_cache_set   stats;
+       struct bch_fs_usage     stats;
  
         /* for deleting: */
         struct bkey_i                   whiteout;
@@ -2202,6 +2205,9 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k,
                         if (ptr_stale(ca, ptr))
                                 continue;
  
+                       if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
+                               continue;
+
                         if (ret->ca &&
                             (ca == avoid ||
                              ret->ca->mi.tier < ca->mi.tier))
diff --git a/libbcache/fs-io.c b/libbcache/fs-io.c

index ecf249c3e66482c726dbd7a289d61931454c453d..d9bd56443719c7fee373da04bcc9aba962226dfb 100644 (file)
--- a/libbcache/fs-io.c
+++ b/libbcache/fs-io.c
@@ -974,7 +974,9 @@ do_io:
                 new.reserved = 0;
         });
  
-       w->io->op.op.res.sectors += PAGE_SECTORS * (old.reserved - new.reserved);
+       w->io->op.op.res.sectors += PAGE_SECTORS *
+               (old.reserved - new.reserved) *
+               old.nr_replicas;
  out:
         BUG_ON(PageWriteback(page));
         set_page_writeback(page);
diff --git a/libbcache/io.c b/libbcache/io.c

index a3df379499867de1f704d9e0f48740b5b58ed6e0..9f19ea4b976a4c0dbdabd84839a814fed8e37c5f 100644 (file)
--- a/libbcache/io.c
+++ b/libbcache/io.c
@@ -625,7 +625,9 @@ static void __bch_write(struct closure *cl)
                                         BKEY_EXTENT_U64s_MAX))
                         continue_at(cl, bch_write_index, index_update_wq(op));
  
-               b = bch_alloc_sectors_start(c, op->wp, op->nr_replicas,
+               b = bch_alloc_sectors_start(c, op->wp,
+                       op->nr_replicas,
+                       c->opts.data_replicas_required,
                         op->alloc_reserve,
                         (op->flags & BCH_WRITE_ALLOC_NOWAIT) ? NULL : cl);
                 EBUG_ON(!b);
diff --git a/libbcache/journal.c b/libbcache/journal.c

index b28383763fc3e2c608db351f31dfd5275d6b648a..e50d4085ccced809a369402c2193184b4d44820c 100644 (file)
--- a/libbcache/journal.c
+++ b/libbcache/journal.c
@@ -1319,10 +1319,10 @@ static int journal_entry_sectors(struct journal *j)
         }
         rcu_read_unlock();
  
-       if (nr_online < c->opts.metadata_replicas)
+       if (nr_online < c->opts.metadata_replicas_required)
                 return -EROFS;
  
-       if (nr_devs < c->opts.metadata_replicas)
+       if (nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas))
                 return 0;
  
         return sectors_available;
@@ -1540,11 +1540,9 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca,
  
         closure_init_stack(&cl);
  
-       mutex_lock(&c->sb_lock);
-
         /* don't handle reducing nr of buckets yet: */
         if (nr <= ja->nr)
-               goto err;
+               return 0;
  
         /*
          * note: journal buckets aren't really counted as _sectors_ used yet, so
@@ -1553,10 +1551,11 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca,
          * reservation to ensure we'll actually be able to allocate:
          */
  
-       ret = ENOSPC;
         if (bch_disk_reservation_get(c, &disk_res,
                         (nr - ja->nr) << ca->bucket_bits, 0))
-               goto err;
+               return -ENOSPC;
+
+       mutex_lock(&c->sb_lock);
  
         ret = -ENOMEM;
         new_buckets     = kzalloc(nr * sizeof(u64), GFP_KERNEL);
@@ -2040,9 +2039,11 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
         j->prev_buf_sectors = 0;
         spin_unlock(&j->lock);
  
-       if (replicas < replicas_want)
+       if (replicas < c->opts.metadata_replicas_required)
                 return -EROFS;
  
+       BUG_ON(!replicas);
+
         return 0;
  }
  
diff --git a/libbcache/migrate.c b/libbcache/migrate.c

index 407ca17ec4418a91be5a694bab7daca36e74857c..89599a435571548c65299e095daa1459e478f3ea 100644 (file)
--- a/libbcache/migrate.c
+++ b/libbcache/migrate.c
@@ -11,6 +11,7 @@
  #include "keylist.h"
  #include "migrate.h"
  #include "move.h"
+#include "super-io.h"
  
  static int issue_migration_move(struct cache *ca,
                                 struct moving_context *ctxt,
@@ -58,12 +59,16 @@ int bch_move_data_off_device(struct cache *ca)
  {
         struct moving_context ctxt;
         struct cache_set *c = ca->set;
+       struct bch_sb_field_members *mi;
         unsigned pass = 0;
         u64 seen_key_count;
         int ret = 0;
  
         BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
  
+       if (!ca->mi.has_data)
+               return 0;
+
         bch_move_ctxt_init(&ctxt, NULL, SECTORS_IN_FLIGHT_PER_DEVICE);
         ctxt.avoid = ca;
  
@@ -136,6 +141,13 @@ next:
                 return -1;
         }
  
+       mutex_lock(&c->sb_lock);
+       mi = bch_sb_get_members(c->disk_sb);
+       SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false);
+
+       bch_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
         return 0;
  }
  
@@ -240,11 +252,18 @@ retry:
   *   is written.
   */
  
-int bch_move_meta_data_off_device(struct cache *ca)
+int bch_move_metadata_off_device(struct cache *ca)
  {
+       struct cache_set *c = ca->set;
+       struct bch_sb_field_members *mi;
         unsigned i;
         int ret;
  
+       BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
+
+       if (!ca->mi.has_metadata)
+               return 0;
+
         /* 1st, Move the btree nodes off the device */
  
         for (i = 0; i < BTREE_ID_NR; i++) {
@@ -261,6 +280,13 @@ int bch_move_meta_data_off_device(struct cache *ca)
         if (ret)
                 return ret;
  
+       mutex_lock(&c->sb_lock);
+       mi = bch_sb_get_members(c->disk_sb);
+       SET_BCH_MEMBER_HAS_METADATA(&mi->members[ca->dev_idx], false);
+
+       bch_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
         return 0;
  }
  
@@ -303,11 +329,11 @@ static int bch_flag_key_bad(struct btree_iter *iter,
   * and don't have other valid pointers.  If there are valid pointers,
   * the necessary pointers to the removed device are replaced with
   * bad pointers instead.
+ *
   * This is only called if bch_move_data_off_device above failed, meaning
   * that we've already tried to move the data MAX_DATA_OFF_ITER times and
   * are not likely to succeed if we try again.
   */
-
  int bch_flag_data_bad(struct cache *ca)
  {
         int ret = 0;
diff --git a/libbcache/migrate.h b/libbcache/migrate.h

index 55636e00f105deaf83b22b1798985cd3634bbe88..449e9192414dd47fb560ab0892f1406defafead8 100644 (file)
--- a/libbcache/migrate.h
+++ b/libbcache/migrate.h
@@ -2,7 +2,7 @@
  #define _BCACHE_MIGRATE_H
  
  int bch_move_data_off_device(struct cache *);
-int bch_move_meta_data_off_device(struct cache *);
+int bch_move_metadata_off_device(struct cache *);
  int bch_flag_data_bad(struct cache *);
  
  #endif /* _BCACHE_MIGRATE_H */
diff --git a/libbcache/opts.h b/libbcache/opts.h

index 9b10310d1e21f966036a1f4c862d1daf2ab46da2..253b739958ddfaa54cf69d5d90ba5459bb14ba5b 100644 (file)
--- a/libbcache/opts.h
+++ b/libbcache/opts.h
@@ -52,9 +52,13 @@ enum opt_type {
         BCH_OPT(errors,                 0644,   BCH_SB_ERROR_ACTION,    \
                 s8,  OPT_STR(bch_error_actions))                        \
         BCH_OPT(metadata_replicas,      0444,   BCH_SB_META_REPLICAS_WANT,\
-               s8,  OPT_UINT(0, BCH_REPLICAS_MAX))                     \
+               s8,  OPT_UINT(1, BCH_REPLICAS_MAX))                     \
         BCH_OPT(data_replicas,          0444,   BCH_SB_DATA_REPLICAS_WANT,\
-               s8,  OPT_UINT(0, BCH_REPLICAS_MAX))                     \
+               s8,  OPT_UINT(1, BCH_REPLICAS_MAX))                     \
+       BCH_OPT(metadata_replicas_required, 0444, BCH_SB_META_REPLICAS_REQ,\
+               s8,  OPT_UINT(1, BCH_REPLICAS_MAX))                     \
+       BCH_OPT(data_replicas_required, 0444,   BCH_SB_DATA_REPLICAS_REQ,\
+               s8,  OPT_UINT(1, BCH_REPLICAS_MAX))                     \
         BCH_OPT(metadata_checksum,      0644,   BCH_SB_META_CSUM_TYPE,  \
                 s8,  OPT_STR(bch_csum_types))                           \
         BCH_OPT(data_checksum,          0644,   BCH_SB_DATA_CSUM_TYPE,  \
diff --git a/libbcache/super-io.c b/libbcache/super-io.c

index f50a5ee8b104416f3e07a383a9b4038f18a81425..3a53b7ea2761e1ab3ff0da94aa0f0823a4748fca 100644 (file)
--- a/libbcache/super-io.c
+++ b/libbcache/super-io.c
@@ -317,6 +317,10 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
             BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
                 return "Invalid number of metadata replicas";
  
+       if (!BCH_SB_META_REPLICAS_REQ(sb) ||
+           BCH_SB_META_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
+               return "Invalid number of metadata replicas";
+
         if (!BCH_SB_META_REPLICAS_HAVE(sb) ||
             BCH_SB_META_REPLICAS_HAVE(sb) >
             BCH_SB_META_REPLICAS_WANT(sb))
@@ -326,6 +330,10 @@ const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
             BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
                 return "Invalid number of data replicas";
  
+       if (!BCH_SB_DATA_REPLICAS_REQ(sb) ||
+           BCH_SB_DATA_REPLICAS_REQ(sb) >= BCH_REPLICAS_MAX)
+               return "Invalid number of metadata replicas";
+
         if (!BCH_SB_DATA_REPLICAS_HAVE(sb) ||
             BCH_SB_DATA_REPLICAS_HAVE(sb) >
             BCH_SB_DATA_REPLICAS_WANT(sb))
@@ -831,6 +839,7 @@ void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k,
         struct bch_member *mi;
         struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
         const struct bch_extent_ptr *ptr;
+       unsigned nr_replicas = 0;
  
         mutex_lock(&c->sb_lock);
  
@@ -843,10 +852,20 @@ void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k,
         mi = bch_sb_get_members(c->disk_sb)->members;
  
         extent_for_each_ptr(e, ptr)
-               if (!ptr->cached)
+               if (!ptr->cached) {
                         (meta
                          ? SET_BCH_MEMBER_HAS_METADATA
                          : SET_BCH_MEMBER_HAS_DATA)(mi + ptr->dev, true);
+                       nr_replicas++;
+               }
+
+       nr_replicas = min_t(unsigned, nr_replicas,
+                           (meta
+                            ? BCH_SB_META_REPLICAS_HAVE
+                            : BCH_SB_DATA_REPLICAS_HAVE)(c->disk_sb));
+       (meta
+        ? SET_BCH_SB_META_REPLICAS_HAVE
+        : SET_BCH_SB_DATA_REPLICAS_HAVE)(c->disk_sb, nr_replicas);
  
         bch_write_super(c);
         mutex_unlock(&c->sb_lock);
diff --git a/libbcache/super-io.h b/libbcache/super-io.h

index ae1e8b9dc304f3e675280d075dcaec211be66fee..21ba6e07be32a510b7245621977912114eaa2755 100644 (file)
--- a/libbcache/super-io.h
+++ b/libbcache/super-io.h
@@ -129,17 +129,27 @@ static inline bool bch_check_super_marked(struct cache_set *c,
         struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
         const struct bch_extent_ptr *ptr;
         struct cache_member_cpu *mi = cache_member_info_get(c)->m;
+       unsigned nr_replicas = 0;
         bool ret = true;
  
-       extent_for_each_ptr(e, ptr)
-               if (!ptr->cached &&
-                   !(meta
+       extent_for_each_ptr(e, ptr) {
+               if (ptr->cached)
+                       continue;
+
+               if (!(meta
                       ? mi[ptr->dev].has_metadata
                       : mi[ptr->dev].has_data)) {
                         ret = false;
                         break;
                 }
  
+               nr_replicas++;
+       }
+
+       if (nr_replicas <
+           (meta ? c->sb.meta_replicas_have : c->sb.data_replicas_have))
+               ret = false;
+
         cache_member_info_put();
  
         return ret;
diff --git a/libbcache/super.c b/libbcache/super.c

index 5535639cf829509cb8a3d111a2530d735a6beaa8..d2863e62abaac1e6b8853b3922c62430f3384718 100644 (file)
--- a/libbcache/super.c
+++ b/libbcache/super.c
@@ -616,7 +616,7 @@ static struct cache_set *bch_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                                          c->sb.btree_node_size,
                                          BCH_ENCODED_EXTENT_MAX) /
                                    PAGE_SECTORS, 0) ||
-           !(c->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache_set)) ||
+           !(c->bucket_stats_percpu = alloc_percpu(struct bch_fs_usage)) ||
             lg_lock_init(&c->bucket_stats_lock) ||
             mempool_init_page_pool(&c->btree_bounce_pool, 1,
                                    ilog2(btree_pages(c))) ||
@@ -1015,104 +1015,7 @@ static const char *bch_dev_in_fs(struct bch_sb *sb, struct cache_set *c)
         return NULL;
  }
  
-/* Device startup/shutdown, ro/rw: */
-
-bool bch_dev_read_only(struct cache *ca)
-{
-       struct cache_set *c = ca->set;
-       struct bch_sb_field_members *mi;
-       char buf[BDEVNAME_SIZE];
-
-       bdevname(ca->disk_sb.bdev, buf);
-
-       lockdep_assert_held(&c->state_lock);
-
-       if (ca->mi.state != BCH_MEMBER_STATE_ACTIVE)
-               return false;
-
-       if (!bch_dev_may_remove(ca)) {
-               bch_err(c, "required member %s going RO, forcing fs RO", buf);
-               bch_fs_read_only(c);
-       }
-
-       trace_bcache_cache_read_only(ca);
-
-       bch_moving_gc_stop(ca);
-
-       /*
-        * This stops new data writes (e.g. to existing open data
-        * buckets) and then waits for all existing writes to
-        * complete.
-        */
-       bch_dev_allocator_stop(ca);
-
-       bch_dev_group_remove(&c->journal.devs, ca);
-
-       /*
-        * Device data write barrier -- no non-meta-data writes should
-        * occur after this point.  However, writes to btree buckets,
-        * journal buckets, and the superblock can still occur.
-        */
-       trace_bcache_cache_read_only_done(ca);
-
-       bch_notice(c, "%s read only", bdevname(ca->disk_sb.bdev, buf));
-       bch_notify_dev_read_only(ca);
-
-       mutex_lock(&c->sb_lock);
-       mi = bch_sb_get_members(c->disk_sb);
-       SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx],
-                            BCH_MEMBER_STATE_RO);
-       bch_write_super(c);
-       mutex_unlock(&c->sb_lock);
-       return true;
-}
-
-static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca)
-{
-       lockdep_assert_held(&c->state_lock);
-
-       if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
-               return NULL;
-
-       if (test_bit(BCH_DEV_REMOVING, &ca->flags))
-               return "removing";
-
-       trace_bcache_cache_read_write(ca);
-
-       if (bch_dev_allocator_start(ca))
-               return "error starting allocator thread";
-
-       if (bch_moving_gc_start(ca))
-               return "error starting moving GC thread";
-
-       if (bch_tiering_start(c))
-               return "error starting tiering thread";
-
-       bch_notify_dev_read_write(ca);
-       trace_bcache_cache_read_write_done(ca);
-
-       return NULL;
-}
-
-const char *bch_dev_read_write(struct cache *ca)
-{
-       struct cache_set *c = ca->set;
-       struct bch_sb_field_members *mi;
-       const char *err;
-
-       err = __bch_dev_read_write(c, ca);
-       if (err)
-               return err;
-
-       mutex_lock(&c->sb_lock);
-       mi = bch_sb_get_members(c->disk_sb);
-       SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx],
-                            BCH_MEMBER_STATE_ACTIVE);
-       bch_write_super(c);
-       mutex_unlock(&c->sb_lock);
-
-       return NULL;
-}
+/* Device startup/shutdown: */
  
  void bch_dev_release(struct kobject *kobj)
  {
@@ -1209,148 +1112,6 @@ static void bch_dev_stop(struct cache *ca)
         call_rcu(&ca->free_rcu, bch_dev_free_rcu);
  }
  
-static void bch_dev_remove_work(struct work_struct *work)
-{
-       struct cache *ca = container_of(work, struct cache, remove_work);
-       struct bch_sb_field_members *mi;
-       struct cache_set *c = ca->set;
-       char name[BDEVNAME_SIZE];
-       bool force = test_bit(BCH_DEV_FORCE_REMOVE, &ca->flags);
-       unsigned dev_idx = ca->dev_idx;
-
-       bdevname(ca->disk_sb.bdev, name);
-
-       /*
-        * Device should already be RO, now migrate data off:
-        *
-        * XXX: locking is sketchy, bch_dev_read_write() has to check
-        * BCH_DEV_REMOVING bit
-        */
-       if (!ca->mi.has_data) {
-               /* Nothing to do: */
-       } else if (!bch_move_data_off_device(ca)) {
-               mutex_lock(&c->sb_lock);
-               mi = bch_sb_get_members(c->disk_sb);
-               SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false);
-
-               bch_write_super(c);
-               mutex_unlock(&c->sb_lock);
-       } else if (force) {
-               bch_flag_data_bad(ca);
-
-               mutex_lock(&c->sb_lock);
-               mi = bch_sb_get_members(c->disk_sb);
-               SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false);
-
-               bch_write_super(c);
-               mutex_unlock(&c->sb_lock);
-       } else {
-               bch_err(c, "Remove of %s failed, unable to migrate data off",
-                       name);
-               clear_bit(BCH_DEV_REMOVING, &ca->flags);
-               return;
-       }
-
-       /* Now metadata: */
-
-       if (!ca->mi.has_metadata) {
-               /* Nothing to do: */
-       } else if (!bch_move_meta_data_off_device(ca)) {
-               mutex_lock(&c->sb_lock);
-               mi = bch_sb_get_members(c->disk_sb);
-               SET_BCH_MEMBER_HAS_METADATA(&mi->members[ca->dev_idx], false);
-
-               bch_write_super(c);
-               mutex_unlock(&c->sb_lock);
-       } else {
-               bch_err(c, "Remove of %s failed, unable to migrate metadata off",
-                       name);
-               clear_bit(BCH_DEV_REMOVING, &ca->flags);
-               return;
-       }
-
-       /*
-        * Ok, really doing the remove:
-        * Drop device's prio pointer before removing it from superblock:
-        */
-       bch_notify_dev_removed(ca);
-
-       spin_lock(&c->journal.lock);
-       c->journal.prio_buckets[dev_idx] = 0;
-       spin_unlock(&c->journal.lock);
-
-       bch_journal_meta(&c->journal);
-
-       /*
-        * Stop device before removing it from the cache set's list of devices -
-        * and get our own ref on cache set since ca is going away:
-        */
-       closure_get(&c->cl);
-
-       mutex_lock(&c->state_lock);
-
-       bch_dev_stop(ca);
-
-       /*
-        * RCU barrier between dropping between c->cache and dropping from
-        * member info:
-        */
-       synchronize_rcu();
-
-       /*
-        * Free this device's slot in the bch_member array - all pointers to
-        * this device must be gone:
-        */
-       mutex_lock(&c->sb_lock);
-       mi = bch_sb_get_members(c->disk_sb);
-       memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
-
-       bch_write_super(c);
-
-       mutex_unlock(&c->sb_lock);
-       mutex_unlock(&c->state_lock);
-
-       closure_put(&c->cl);
-}
-
-static bool __bch_dev_remove(struct cache_set *c, struct cache *ca, bool force)
-{
-       if (test_bit(BCH_DEV_REMOVING, &ca->flags))
-               return false;
-
-       if (!bch_dev_may_remove(ca)) {
-               bch_err(ca->set, "Can't remove last RW device");
-               bch_notify_dev_remove_failed(ca);
-               return false;
-       }
-
-       /* First, go RO before we try to migrate data off: */
-       bch_dev_read_only(ca);
-
-       if (force)
-               set_bit(BCH_DEV_FORCE_REMOVE, &ca->flags);
-
-       set_bit(BCH_DEV_REMOVING, &ca->flags);
-       bch_notify_dev_removing(ca);
-
-       /* Migrate the data and finish removal asynchronously: */
-
-       queue_work(system_long_wq, &ca->remove_work);
-       return true;
-}
-
-bool bch_dev_remove(struct cache *ca, bool force)
-{
-       struct cache_set *c = ca->set;
-       bool ret;
-
-       mutex_lock(&c->state_lock);
-       ret = __bch_dev_remove(c, ca, force);
-       mutex_unlock(&c->state_lock);
-
-       return ret;
-}
-
  static int bch_dev_online(struct cache *ca)
  {
         char buf[12];
@@ -1402,7 +1163,6 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
         ca->dev_idx = sb->sb->dev_idx;
  
         INIT_WORK(&ca->free_work, bch_dev_free_work);
-       INIT_WORK(&ca->remove_work, bch_dev_remove_work);
         spin_lock_init(&ca->freelist_lock);
         spin_lock_init(&ca->prio_buckets_lock);
         mutex_init(&ca->heap_lock);
@@ -1451,7 +1211,7 @@ static const char *bch_dev_alloc(struct bcache_superblock *sb,
             !(ca->prio_buckets  = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
                                           2, GFP_KERNEL)) ||
             !(ca->disk_buckets  = alloc_bucket_pages(GFP_KERNEL, ca)) ||
-           !(ca->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache)) ||
+           !(ca->bucket_stats_percpu = alloc_percpu(struct bch_dev_usage)) ||
             !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) ||
             bioset_init(&ca->replica_set, 4,
                         offsetof(struct bch_write_bio, bio)) ||
@@ -1506,6 +1266,232 @@ err:
         return err;
  }
  
+/* Device management: */
+
+static void __bch_dev_read_only(struct cache_set *c, struct cache *ca)
+{
+       bch_moving_gc_stop(ca);
+
+       /*
+        * This stops new data writes (e.g. to existing open data
+        * buckets) and then waits for all existing writes to
+        * complete.
+        */
+       bch_dev_allocator_stop(ca);
+
+       bch_dev_group_remove(&c->journal.devs, ca);
+}
+
+static const char *__bch_dev_read_write(struct cache_set *c, struct cache *ca)
+{
+       lockdep_assert_held(&c->state_lock);
+
+       if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
+               return NULL;
+
+       trace_bcache_cache_read_write(ca);
+
+       if (bch_dev_allocator_start(ca))
+               return "error starting allocator thread";
+
+       if (bch_moving_gc_start(ca))
+               return "error starting moving GC thread";
+
+       if (bch_tiering_start(c))
+               return "error starting tiering thread";
+
+       bch_notify_dev_read_write(ca);
+       trace_bcache_cache_read_write_done(ca);
+
+       return NULL;
+}
+
+bool bch_dev_state_allowed(struct cache_set *c, struct cache *ca,
+                          enum bch_member_state new_state, int flags)
+{
+       lockdep_assert_held(&c->state_lock);
+
+       if (new_state == BCH_MEMBER_STATE_ACTIVE)
+               return true;
+
+       if (ca->mi.has_data &&
+           !(flags & BCH_FORCE_IF_DATA_DEGRADED))
+               return false;
+
+       if (ca->mi.has_data &&
+           c->sb.data_replicas_have <= 1 &&
+           !(flags & BCH_FORCE_IF_DATA_LOST))
+               return false;
+
+       if (ca->mi.has_metadata &&
+           !(flags & BCH_FORCE_IF_METADATA_DEGRADED))
+               return false;
+
+       if (ca->mi.has_metadata &&
+           c->sb.meta_replicas_have <= 1 &&
+           !(flags & BCH_FORCE_IF_METADATA_LOST))
+               return false;
+
+       return true;
+}
+
+int __bch_dev_set_state(struct cache_set *c, struct cache *ca,
+                       enum bch_member_state new_state, int flags)
+{
+       struct bch_sb_field_members *mi;
+       char buf[BDEVNAME_SIZE];
+
+       if (ca->mi.state == new_state)
+               return 0;
+
+       if (!bch_dev_state_allowed(c, ca, new_state, flags))
+               return -EINVAL;
+
+       if (new_state == BCH_MEMBER_STATE_ACTIVE) {
+               if (__bch_dev_read_write(c, ca))
+                       return -ENOMEM;
+       } else {
+               __bch_dev_read_only(c, ca);
+       }
+
+       bch_notice(c, "%s %s",
+                  bdevname(ca->disk_sb.bdev, buf),
+                  bch_dev_state[new_state]);
+
+       mutex_lock(&c->sb_lock);
+       mi = bch_sb_get_members(c->disk_sb);
+       SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx], new_state);
+       bch_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       return 0;
+}
+
+int bch_dev_set_state(struct cache_set *c, struct cache *ca,
+                     enum bch_member_state new_state, int flags)
+{
+       int ret;
+
+       mutex_lock(&c->state_lock);
+       ret = __bch_dev_set_state(c, ca, new_state, flags);
+       mutex_unlock(&c->state_lock);
+
+       return ret;
+}
+
+#if 0
+int bch_dev_migrate_from(struct cache_set *c, struct cache *ca)
+{
+       /* First, go RO before we try to migrate data off: */
+       ret = bch_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, flags);
+       if (ret)
+               return ret;
+
+       bch_notify_dev_removing(ca);
+
+       /* Migrate data, metadata off device: */
+
+       ret = bch_move_data_off_device(ca);
+       if (ret && !(flags & BCH_FORCE_IF_DATA_LOST)) {
+               bch_err(c, "Remove of %s failed, unable to migrate data off",
+                       name);
+               return ret;
+       }
+
+       if (ret)
+               ret = bch_flag_data_bad(ca);
+       if (ret) {
+               bch_err(c, "Remove of %s failed, unable to migrate data off",
+                       name);
+               return ret;
+       }
+
+       ret = bch_move_metadata_off_device(ca);
+       if (ret)
+               return ret;
+}
+#endif
+
+/* Device add/removal: */
+
+static int __bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
+{
+       struct bch_sb_field_members *mi;
+       char name[BDEVNAME_SIZE];
+       unsigned dev_idx = ca->dev_idx;
+       int ret;
+
+       bdevname(ca->disk_sb.bdev, name);
+
+       if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
+               bch_err(ca->set, "Cannot remove RW device");
+               bch_notify_dev_remove_failed(ca);
+               return -EINVAL;
+       }
+
+       if (!bch_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
+               bch_err(ca->set, "Cannot remove %s without losing data", name);
+               bch_notify_dev_remove_failed(ca);
+               return -EINVAL;
+       }
+
+       /*
+        * XXX: verify that dev_idx is really not in use anymore, anywhere
+        *
+        * flag_data_bad() does not check btree pointers
+        */
+       ret = bch_flag_data_bad(ca);
+       if (ret) {
+               bch_err(c, "Remove of %s failed", name);
+               return ret;
+       }
+
+       /*
+        * Ok, really doing the remove:
+        * Drop device's prio pointer before removing it from superblock:
+        */
+       bch_notify_dev_removed(ca);
+
+       spin_lock(&c->journal.lock);
+       c->journal.prio_buckets[dev_idx] = 0;
+       spin_unlock(&c->journal.lock);
+
+       bch_journal_meta(&c->journal);
+
+       bch_dev_stop(ca);
+
+       /*
+        * RCU barrier between dropping between c->cache and dropping from
+        * member info:
+        */
+       synchronize_rcu();
+
+       /*
+        * Free this device's slot in the bch_member array - all pointers to
+        * this device must be gone:
+        */
+       mutex_lock(&c->sb_lock);
+       mi = bch_sb_get_members(c->disk_sb);
+       memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
+
+       bch_write_super(c);
+
+       mutex_unlock(&c->sb_lock);
+
+       return 0;
+}
+
+int bch_dev_remove(struct cache_set *c, struct cache *ca, int flags)
+{
+       int ret;
+
+       mutex_lock(&c->state_lock);
+       ret = __bch_dev_remove(c, ca, flags);
+       mutex_unlock(&c->state_lock);
+
+       return ret;
+}
+
  int bch_dev_add(struct cache_set *c, const char *path)
  {
         struct bcache_superblock sb;
@@ -1626,6 +1612,8 @@ err_unlock:
         return ret ?: -EINVAL;
  }
  
+/* Filesystem open: */
+
  const char *bch_fs_open(char * const *devices, unsigned nr_devices,
                         struct bch_opts opts, struct cache_set **ret)
  {
diff --git a/libbcache/super.h b/libbcache/super.h

index bafd88e087e92128fd55ea5a192dc77566d56572..5626727d37647b681a5cd052ddfcce337a93b45e 100644 (file)
--- a/libbcache/super.h
+++ b/libbcache/super.h
@@ -3,6 +3,8 @@
  
  #include "extents.h"
  
+#include <linux/bcache-ioctl.h>
+
  static inline size_t sector_to_bucket(const struct cache *ca, sector_t s)
  {
         return s >> ca->bucket_bits;
@@ -54,21 +56,17 @@ static inline struct cache *bch_get_next_cache(struct cache_set *c,
              (ca = bch_get_next_cache(c, &(iter)));                     \
              percpu_ref_put(&ca->ref), (iter)++)
  
-static inline bool bch_dev_may_remove(struct cache *ca)
-{
-       struct cache_set *c = ca->set;
-       struct cache_group *grp = &c->cache_all;
-
-       /* Can't remove the last RW device: */
-       return grp->nr != 1 ||
-               rcu_access_pointer(grp->d[0].dev) != ca;
-}
-
  void bch_dev_release(struct kobject *);
  
-bool bch_dev_read_only(struct cache *);
-const char *bch_dev_read_write(struct cache *);
-bool bch_dev_remove(struct cache *, bool force);
+bool bch_dev_state_allowed(struct cache_set *, struct cache *,
+                          enum bch_member_state, int);
+int __bch_dev_set_state(struct cache_set *, struct cache *,
+                       enum bch_member_state, int);
+int bch_dev_set_state(struct cache_set *, struct cache *,
+                     enum bch_member_state, int);
+
+int bch_dev_fail(struct cache *, int);
+int bch_dev_remove(struct cache_set *, struct cache *, int);
  int bch_dev_add(struct cache_set *, const char *);
  
  void bch_fs_detach(struct cache_set *);
diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c

index 48f9f1f67ed787957d9e5658edb513a203789e1d..5f41d6eafedf8a8d3a835781d116599b8b50f30f 100644 (file)
--- a/libbcache/sysfs.c
+++ b/libbcache/sysfs.c
@@ -159,7 +159,7 @@ read_attribute(data_replicas_have);
  
  static struct attribute sysfs_state_rw = {
         .name = "state",
-       .mode = S_IRUGO|S_IWUSR
+       .mode = S_IRUGO
  };
  
  SHOW(bch_cached_dev)
@@ -552,7 +552,7 @@ static unsigned bch_average_key_size(struct cache_set *c)
  
  static ssize_t show_fs_alloc_debug(struct cache_set *c, char *buf)
  {
-       struct bucket_stats_cache_set stats = bch_bucket_stats_read_cache_set(c);
+       struct bch_fs_usage stats = bch_fs_usage_read(c);
  
         return scnprintf(buf, PAGE_SIZE,
                          "capacity:\t\t%llu\n"
@@ -1127,7 +1127,7 @@ static ssize_t show_reserve_stats(struct cache *ca, char *buf)
  static ssize_t show_dev_alloc_debug(struct cache *ca, char *buf)
  {
         struct cache_set *c = ca->set;
-       struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
+       struct bch_dev_usage stats = bch_dev_usage_read(ca);
  
         return scnprintf(buf, PAGE_SIZE,
                 "free_inc:               %zu/%zu\n"
@@ -1171,7 +1171,7 @@ SHOW(bch_dev)
  {
         struct cache *ca = container_of(kobj, struct cache, kobj);
         struct cache_set *c = ca->set;
-       struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
+       struct bch_dev_usage stats = bch_dev_usage_read(ca);
  
         sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
  
@@ -1297,52 +1297,6 @@ STORE(__bch_dev)
                 bch_tiering_start(c);
         }
  
-       if (attr == &sysfs_state_rw) {
-               char name[BDEVNAME_SIZE];
-               const char *err = NULL;
-               ssize_t v = bch_read_string_list(buf, bch_dev_state);
-
-               if (v < 0)
-                       return v;
-
-               if (v == ca->mi.state)
-                       return size;
-
-               switch (v) {
-               case BCH_MEMBER_STATE_ACTIVE:
-                       err = bch_dev_read_write(ca);
-                       break;
-               case BCH_MEMBER_STATE_RO:
-                       bch_dev_read_only(ca);
-                       break;
-               case BCH_MEMBER_STATE_FAILED:
-               case BCH_MEMBER_STATE_SPARE:
-                       /*
-                        * XXX: need to migrate data off and set correct state
-                        */
-                       pr_err("can't set %s %s: not supported",
-                              bdevname(ca->disk_sb.bdev, name),
-                              bch_dev_state[v]);
-                       return -EINVAL;
-               }
-
-               if (err) {
-                       pr_err("can't set %s %s: %s",
-                              bdevname(ca->disk_sb.bdev, name),
-                              bch_dev_state[v], err);
-                       return -EINVAL;
-               }
-       }
-
-       if (attr == &sysfs_unregister) {
-               bool force = false;
-
-               if (!strncmp(buf, "force", 5) &&
-                   (buf[5] == '\0' || buf[5] == '\n'))
-                       force = true;
-               bch_dev_remove(ca, force);
-       }
-
         if (attr == &sysfs_clear_stats) {
                 int cpu;
  
@@ -1361,7 +1315,6 @@ STORE_LOCKED(bch_dev)
  
  static struct attribute *bch_dev_files[] = {
         &sysfs_uuid,
-       &sysfs_unregister,
         &sysfs_bucket_size,
         &sysfs_bucket_size_bytes,
         &sysfs_block_size,
author	Kent Overstreet <kent.overstreet@gmail.com>
	Thu, 9 Mar 2017 17:27:30 +0000 (08:27 -0900)
committer	Kent Overstreet <kent.overstreet@gmail.com>
	Thu, 9 Mar 2017 18:14:11 +0000 (09:14 -0900)
.bcache_revision		patch \| blob \| history
bcache.c		patch \| blob \| history
cmd_device.c		patch \| blob \| history
cmd_format.c		patch \| blob \| history
cmds.h		patch \| blob \| history
include/linux/bcache-ioctl.h		patch \| blob \| history
include/linux/bcache.h		patch \| blob \| history
libbcache.c		patch \| blob \| history
libbcache.h		patch \| blob \| history
libbcache/alloc.c		patch \| blob \| history
libbcache/alloc.h		patch \| blob \| history
libbcache/bcache.h		patch \| blob \| history
libbcache/btree_gc.c		patch \| blob \| history
libbcache/btree_types.h		patch \| blob \| history
libbcache/btree_update.c		patch \| blob \| history
libbcache/buckets.c		patch \| blob \| history
libbcache/buckets.h		patch \| blob \| history
libbcache/buckets_types.h		patch \| blob \| history
libbcache/chardev.c		patch \| blob \| history
libbcache/error.c		patch \| blob \| history
libbcache/extents.c		patch \| blob \| history
libbcache/fs-io.c		patch \| blob \| history
libbcache/io.c		patch \| blob \| history
libbcache/journal.c		patch \| blob \| history
libbcache/migrate.c		patch \| blob \| history
libbcache/migrate.h		patch \| blob \| history
libbcache/opts.h		patch \| blob \| history
libbcache/super-io.c		patch \| blob \| history
libbcache/super-io.h		patch \| blob \| history
libbcache/super.c		patch \| blob \| history
libbcache/super.h		patch \| blob \| history
libbcache/sysfs.c		patch \| blob \| history