]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcache/sysfs.c
57b7dd9d5edbedb53296938895091e5970c5b84c
[bcachefs-tools-debian] / libbcache / sysfs.c
1 /*
2  * bcache sysfs interfaces
3  *
4  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
5  * Copyright 2012 Google, Inc.
6  */
7
8 #include "bcache.h"
9 #include "alloc.h"
10 #include "blockdev.h"
11 #include "compress.h"
12 #include "sysfs.h"
13 #include "btree_cache.h"
14 #include "btree_iter.h"
15 #include "btree_update.h"
16 #include "btree_gc.h"
17 #include "buckets.h"
18 #include "inode.h"
19 #include "journal.h"
20 #include "keylist.h"
21 #include "move.h"
22 #include "opts.h"
23 #include "request.h"
24 #include "super-io.h"
25 #include "writeback.h"
26
27 #include <linux/blkdev.h>
28 #include <linux/sort.h>
29
30 write_attribute(attach);
31 write_attribute(detach);
32 write_attribute(unregister);
33 write_attribute(stop);
34 write_attribute(clear_stats);
35 write_attribute(trigger_btree_coalesce);
36 write_attribute(trigger_gc);
37 write_attribute(prune_cache);
38 write_attribute(blockdev_volume_create);
39 write_attribute(add_device);
40
41 read_attribute(uuid);
42 read_attribute(minor);
43 read_attribute(bucket_size);
44 read_attribute(bucket_size_bytes);
45 read_attribute(block_size);
46 read_attribute(block_size_bytes);
47 read_attribute(btree_node_size);
48 read_attribute(btree_node_size_bytes);
49 read_attribute(first_bucket);
50 read_attribute(nbuckets);
51 read_attribute(tree_depth);
52 read_attribute(root_usage_percent);
53 read_attribute(read_priority_stats);
54 read_attribute(write_priority_stats);
55 read_attribute(fragmentation_stats);
56 read_attribute(oldest_gen_stats);
57 read_attribute(reserve_stats);
58 read_attribute(btree_cache_size);
59 read_attribute(cache_available_percent);
60 read_attribute(compression_stats);
61 read_attribute(written);
62 read_attribute(btree_written);
63 read_attribute(metadata_written);
64 read_attribute(journal_debug);
65 write_attribute(journal_flush);
66 read_attribute(internal_uuid);
67
68 read_attribute(btree_gc_running);
69
70 read_attribute(btree_nodes);
71 read_attribute(btree_used_percent);
72 read_attribute(average_key_size);
73 read_attribute(available_buckets);
74 read_attribute(free_buckets);
75 read_attribute(dirty_data);
76 read_attribute(dirty_bytes);
77 read_attribute(dirty_buckets);
78 read_attribute(cached_data);
79 read_attribute(cached_bytes);
80 read_attribute(cached_buckets);
81 read_attribute(meta_buckets);
82 read_attribute(alloc_buckets);
83 read_attribute(has_data);
84 read_attribute(has_metadata);
85 read_attribute(bset_tree_stats);
86 read_attribute(alloc_debug);
87
88 read_attribute(state);
89 read_attribute(cache_read_races);
90 read_attribute(writeback_keys_done);
91 read_attribute(writeback_keys_failed);
92 read_attribute(io_errors);
93 rw_attribute(io_error_limit);
94 rw_attribute(io_error_halflife);
95 read_attribute(congested);
96 rw_attribute(congested_read_threshold_us);
97 rw_attribute(congested_write_threshold_us);
98
99 rw_attribute(sequential_cutoff);
100 rw_attribute(cache_mode);
101 rw_attribute(writeback_metadata);
102 rw_attribute(writeback_running);
103 rw_attribute(writeback_percent);
104 sysfs_pd_controller_attribute(writeback);
105
106 read_attribute(stripe_size);
107 read_attribute(partial_stripes_expensive);
108
109 rw_attribute(journal_write_delay_ms);
110 rw_attribute(journal_reclaim_delay_ms);
111 read_attribute(journal_entry_size_max);
112
113 rw_attribute(discard);
114 rw_attribute(running);
115 rw_attribute(label);
116 rw_attribute(readahead);
117 rw_attribute(verify);
118 rw_attribute(bypass_torture_test);
119 rw_attribute(cache_replacement_policy);
120
121 rw_attribute(foreground_write_ratelimit_enabled);
122 rw_attribute(copy_gc_enabled);
123 sysfs_pd_controller_attribute(copy_gc);
124 rw_attribute(tiering_enabled);
125 rw_attribute(tiering_percent);
126 sysfs_pd_controller_attribute(tiering);
127
128 sysfs_pd_controller_attribute(foreground_write);
129
130 rw_attribute(pd_controllers_update_seconds);
131
132 rw_attribute(foreground_target_percent);
133
134 rw_attribute(size);
135 read_attribute(meta_replicas_have);
136 read_attribute(data_replicas_have);
137 read_attribute(tier);
138
139 #define BCH_DEBUG_PARAM(name, description)                              \
140         rw_attribute(name);
141
142         BCH_DEBUG_PARAMS()
143 #undef BCH_DEBUG_PARAM
144
145 #define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)            \
146         static struct attribute sysfs_opt_##_name = {                   \
147                 .name = #_name,                                         \
148                 .mode = S_IRUGO|(_perm ? S_IWUSR : 0)                   \
149         };
150
151         BCH_VISIBLE_OPTS()
152 #undef BCH_OPT
153
154 #define BCH_TIME_STAT(name, frequency_units, duration_units)            \
155         sysfs_time_stats_attribute(name, frequency_units, duration_units);
156         BCH_TIME_STATS()
157 #undef BCH_TIME_STAT
158
159 static struct attribute sysfs_state_rw = {
160         .name = "state",
161         .mode = S_IRUGO|S_IWUSR
162 };
163
164 SHOW(bch_cached_dev)
165 {
166         struct cached_dev *dc = container_of(kobj, struct cached_dev,
167                                              disk.kobj);
168         const char *states[] = { "no cache", "clean", "dirty", "inconsistent" };
169
170 #define var(stat)               (dc->stat)
171
172         if (attr == &sysfs_cache_mode)
173                 return bch_snprint_string_list(buf, PAGE_SIZE,
174                                                bch_cache_modes + 1,
175                                                BDEV_CACHE_MODE(dc->disk_sb.sb));
176
177         var_printf(verify,              "%i");
178         var_printf(bypass_torture_test, "%i");
179         var_printf(writeback_metadata,  "%i");
180         var_printf(writeback_running,   "%i");
181         var_print(writeback_percent);
182         sysfs_pd_controller_show(writeback, &dc->writeback_pd);
183
184         sysfs_hprint(dirty_data,
185                      bcache_dev_sectors_dirty(&dc->disk) << 9);
186         sysfs_print(dirty_bytes,
187                     bcache_dev_sectors_dirty(&dc->disk) << 9);
188
189         sysfs_hprint(stripe_size,       dc->disk.stripe_size << 9);
190         var_printf(partial_stripes_expensive,   "%u");
191
192         var_hprint(sequential_cutoff);
193         var_hprint(readahead);
194
195         sysfs_print(running,            atomic_read(&dc->running));
196         sysfs_print(state,              states[BDEV_STATE(dc->disk_sb.sb)]);
197
198         if (attr == &sysfs_label) {
199                 memcpy(buf, dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
200                 buf[BCH_SB_LABEL_SIZE + 1] = '\0';
201                 strcat(buf, "\n");
202                 return strlen(buf);
203         }
204
205 #undef var
206         return 0;
207 }
208
209 STORE(__cached_dev)
210 {
211         struct cached_dev *dc = container_of(kobj, struct cached_dev,
212                                              disk.kobj);
213         unsigned v = size;
214         struct cache_set *c;
215         struct kobj_uevent_env *env;
216
217 #define d_strtoul(var)          sysfs_strtoul(var, dc->var)
218 #define d_strtoul_nonzero(var)  sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
219 #define d_strtoi_h(var)         sysfs_hatoi(var, dc->var)
220
221         d_strtoul(verify);
222         d_strtoul(bypass_torture_test);
223         d_strtoul(writeback_metadata);
224         d_strtoul(writeback_running);
225         sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
226         sysfs_pd_controller_store(writeback, &dc->writeback_pd);
227
228         d_strtoi_h(sequential_cutoff);
229         d_strtoi_h(readahead);
230
231         if (attr == &sysfs_clear_stats)
232                 bch_cache_accounting_clear(&dc->accounting);
233
234         if (attr == &sysfs_running &&
235             strtoul_or_return(buf))
236                 bch_cached_dev_run(dc);
237
238         if (attr == &sysfs_cache_mode) {
239                 ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1);
240
241                 if (v < 0)
242                         return v;
243
244                 if ((unsigned) v != BDEV_CACHE_MODE(dc->disk_sb.sb)) {
245                         SET_BDEV_CACHE_MODE(dc->disk_sb.sb, v);
246                         bch_write_bdev_super(dc, NULL);
247                 }
248         }
249
250         if (attr == &sysfs_label) {
251                 u64 journal_seq = 0;
252                 int ret = 0;
253
254                 if (size > BCH_SB_LABEL_SIZE)
255                         return -EINVAL;
256
257                 mutex_lock(&dc->disk.inode_lock);
258
259                 memcpy(dc->disk_sb.sb->label, buf, size);
260                 if (size < BCH_SB_LABEL_SIZE)
261                         dc->disk_sb.sb->label[size] = '\0';
262                 if (size && dc->disk_sb.sb->label[size - 1] == '\n')
263                         dc->disk_sb.sb->label[size - 1] = '\0';
264
265                 memcpy(dc->disk.inode.v.i_label,
266                        dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
267
268                 bch_write_bdev_super(dc, NULL);
269
270                 if (dc->disk.c)
271                         ret = bch_btree_update(dc->disk.c, BTREE_ID_INODES,
272                                                &dc->disk.inode.k_i,
273                                                &journal_seq);
274
275                 mutex_unlock(&dc->disk.inode_lock);
276
277                 if (ret)
278                         return ret;
279
280                 if (dc->disk.c)
281                         ret = bch_journal_flush_seq(&dc->disk.c->journal,
282                                                     journal_seq);
283                 if (ret)
284                         return ret;
285
286                 env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
287                 if (!env)
288                         return -ENOMEM;
289                 add_uevent_var(env, "DRIVER=bcache");
290                 add_uevent_var(env, "CACHED_UUID=%pU", dc->disk_sb.sb->disk_uuid.b),
291                 add_uevent_var(env, "CACHED_LABEL=%s", buf);
292                 kobject_uevent_env(
293                         &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
294                 kfree(env);
295         }
296
297         if (attr == &sysfs_attach) {
298                 if (uuid_parse(buf, &dc->disk_sb.sb->user_uuid))
299                         return -EINVAL;
300
301                 list_for_each_entry(c, &bch_cache_sets, list) {
302                         v = bch_cached_dev_attach(dc, c);
303                         if (!v)
304                                 return size;
305                 }
306
307                 pr_err("Can't attach %s: cache set not found", buf);
308                 size = v;
309         }
310
311         if (attr == &sysfs_detach && dc->disk.c)
312                 bch_cached_dev_detach(dc);
313
314         if (attr == &sysfs_stop)
315                 bch_blockdev_stop(&dc->disk);
316
317         return size;
318 }
319
320 STORE(bch_cached_dev)
321 {
322         struct cached_dev *dc = container_of(kobj, struct cached_dev,
323                                              disk.kobj);
324
325         mutex_lock(&bch_register_lock);
326         size = __cached_dev_store(kobj, attr, buf, size);
327
328         if (attr == &sysfs_writeback_running)
329                 bch_writeback_queue(dc);
330
331         if (attr == &sysfs_writeback_percent)
332                 schedule_delayed_work(&dc->writeback_pd_update,
333                                       dc->writeback_pd_update_seconds * HZ);
334
335         mutex_unlock(&bch_register_lock);
336         return size;
337 }
338
339 static struct attribute *bch_cached_dev_files[] = {
340         &sysfs_attach,
341         &sysfs_detach,
342         &sysfs_stop,
343         &sysfs_cache_mode,
344         &sysfs_writeback_metadata,
345         &sysfs_writeback_running,
346         &sysfs_writeback_percent,
347         sysfs_pd_controller_files(writeback),
348         &sysfs_dirty_data,
349         &sysfs_dirty_bytes,
350         &sysfs_stripe_size,
351         &sysfs_partial_stripes_expensive,
352         &sysfs_sequential_cutoff,
353         &sysfs_clear_stats,
354         &sysfs_running,
355         &sysfs_state,
356         &sysfs_label,
357         &sysfs_readahead,
358 #ifdef CONFIG_BCACHE_DEBUG
359         &sysfs_verify,
360         &sysfs_bypass_torture_test,
361 #endif
362         NULL
363 };
364 KTYPE(bch_cached_dev);
365
366 SHOW(bch_blockdev_volume)
367 {
368         struct bcache_device *d = container_of(kobj, struct bcache_device,
369                                                kobj);
370
371         sysfs_hprint(size,      le64_to_cpu(d->inode.v.i_size));
372
373         if (attr == &sysfs_label) {
374                 memcpy(buf, d->inode.v.i_label, BCH_SB_LABEL_SIZE);
375                 buf[BCH_SB_LABEL_SIZE + 1] = '\0';
376                 strcat(buf, "\n");
377                 return strlen(buf);
378         }
379
380         return 0;
381 }
382
383 STORE(__bch_blockdev_volume)
384 {
385         struct bcache_device *d = container_of(kobj, struct bcache_device,
386                                                kobj);
387
388         if (attr == &sysfs_size) {
389                 u64 journal_seq = 0;
390                 u64 v = strtoi_h_or_return(buf);
391                 int ret;
392
393                 mutex_lock(&d->inode_lock);
394
395                 if (v < le64_to_cpu(d->inode.v.i_size) ){
396                         ret = bch_inode_truncate(d->c, d->inode.k.p.inode,
397                                                  v >> 9, NULL, NULL);
398                         if (ret) {
399                                 mutex_unlock(&d->inode_lock);
400                                 return ret;
401                         }
402                 }
403                 d->inode.v.i_size = cpu_to_le64(v);
404                 ret = bch_btree_update(d->c, BTREE_ID_INODES,
405                                        &d->inode.k_i, &journal_seq);
406
407                 mutex_unlock(&d->inode_lock);
408
409                 if (ret)
410                         return ret;
411
412                 ret = bch_journal_flush_seq(&d->c->journal, journal_seq);
413                 if (ret)
414                         return ret;
415
416                 set_capacity(d->disk, v >> 9);
417         }
418
419         if (attr == &sysfs_label) {
420                 u64 journal_seq = 0;
421                 int ret;
422
423                 mutex_lock(&d->inode_lock);
424
425                 memcpy(d->inode.v.i_label, buf, BCH_SB_LABEL_SIZE);
426                 ret = bch_btree_update(d->c, BTREE_ID_INODES,
427                                        &d->inode.k_i, &journal_seq);
428
429                 mutex_unlock(&d->inode_lock);
430
431                 return ret ?: bch_journal_flush_seq(&d->c->journal, journal_seq);
432         }
433
434         if (attr == &sysfs_unregister) {
435                 set_bit(BCACHE_DEV_DETACHING, &d->flags);
436                 bch_blockdev_stop(d);
437         }
438
439         return size;
440 }
441 STORE_LOCKED(bch_blockdev_volume)
442
443 static struct attribute *bch_blockdev_volume_files[] = {
444         &sysfs_unregister,
445         &sysfs_label,
446         &sysfs_size,
447         NULL
448 };
449 KTYPE(bch_blockdev_volume);
450
451 static int bch_bset_print_stats(struct cache_set *c, char *buf)
452 {
453         struct bset_stats stats;
454         size_t nodes = 0;
455         struct btree *b;
456         struct bucket_table *tbl;
457         struct rhash_head *pos;
458         unsigned iter;
459
460         memset(&stats, 0, sizeof(stats));
461
462         rcu_read_lock();
463         for_each_cached_btree(b, c, tbl, iter, pos) {
464                 bch_btree_keys_stats(b, &stats);
465                 nodes++;
466         }
467         rcu_read_unlock();
468
469         return snprintf(buf, PAGE_SIZE,
470                         "btree nodes:           %zu\n"
471                         "written sets:          %zu\n"
472                         "written key bytes:     %zu\n"
473                         "unwritten sets:                %zu\n"
474                         "unwritten key bytes:   %zu\n"
475                         "no table sets:         %zu\n"
476                         "no table key bytes:    %zu\n"
477                         "floats:                        %zu\n"
478                         "failed unpacked:       %zu\n"
479                         "failed prev:           %zu\n"
480                         "failed overflow:       %zu\n",
481                         nodes,
482                         stats.sets[BSET_RO_AUX_TREE].nr,
483                         stats.sets[BSET_RO_AUX_TREE].bytes,
484                         stats.sets[BSET_RW_AUX_TREE].nr,
485                         stats.sets[BSET_RW_AUX_TREE].bytes,
486                         stats.sets[BSET_NO_AUX_TREE].nr,
487                         stats.sets[BSET_NO_AUX_TREE].bytes,
488                         stats.floats,
489                         stats.failed_unpacked,
490                         stats.failed_prev,
491                         stats.failed_overflow);
492 }
493
494 static unsigned bch_root_usage(struct cache_set *c)
495 {
496         unsigned bytes = 0;
497         struct bkey_packed *k;
498         struct btree *b;
499         struct btree_node_iter iter;
500
501         goto lock_root;
502
503         do {
504                 six_unlock_read(&b->lock);
505 lock_root:
506                 b = c->btree_roots[BTREE_ID_EXTENTS].b;
507                 six_lock_read(&b->lock);
508         } while (b != c->btree_roots[BTREE_ID_EXTENTS].b);
509
510         for_each_btree_node_key(b, k, &iter, btree_node_is_extents(b))
511                 bytes += bkey_bytes(k);
512
513         six_unlock_read(&b->lock);
514
515         return (bytes * 100) / btree_bytes(c);
516 }
517
518 static size_t bch_cache_size(struct cache_set *c)
519 {
520         size_t ret = 0;
521         struct btree *b;
522
523         mutex_lock(&c->btree_cache_lock);
524         list_for_each_entry(b, &c->btree_cache, list)
525                 ret += btree_bytes(c);
526
527         mutex_unlock(&c->btree_cache_lock);
528         return ret;
529 }
530
531 static unsigned bch_cache_available_percent(struct cache_set *c)
532 {
533         return div64_u64((u64) sectors_available(c) * 100,
534                          c->capacity ?: 1);
535 }
536
537 #if 0
538 static unsigned bch_btree_used(struct cache_set *c)
539 {
540         return div64_u64(c->gc_stats.key_bytes * 100,
541                          (c->gc_stats.nodes ?: 1) * btree_bytes(c));
542 }
543
544 static unsigned bch_average_key_size(struct cache_set *c)
545 {
546         return c->gc_stats.nkeys
547                 ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
548                 : 0;
549 }
550 #endif
551
552 static ssize_t show_cache_set_alloc_debug(struct cache_set *c, char *buf)
553 {
554         struct bucket_stats_cache_set stats = bch_bucket_stats_read_cache_set(c);
555
556         return scnprintf(buf, PAGE_SIZE,
557                          "capacity:\t\t%llu\n"
558                          "compressed:\n"
559                          "\tmeta:\t\t%llu\n"
560                          "\tdirty:\t\t%llu\n"
561                          "\tcached:\t\t%llu\n"
562                          "uncompressed:\n"
563                          "\tmeta:\t\t%llu\n"
564                          "\tdirty:\t\t%llu\n"
565                          "\tcached:\t\t%llu\n"
566                          "persistent reserved sectors:\t%llu\n"
567                          "online reserved sectors:\t%llu\n",
568                          c->capacity,
569                          stats.s[S_COMPRESSED][S_META],
570                          stats.s[S_COMPRESSED][S_DIRTY],
571                          stats.s[S_COMPRESSED][S_CACHED],
572                          stats.s[S_UNCOMPRESSED][S_META],
573                          stats.s[S_UNCOMPRESSED][S_DIRTY],
574                          stats.s[S_UNCOMPRESSED][S_CACHED],
575                          stats.persistent_reserved,
576                          stats.online_reserved);
577 }
578
579 static ssize_t bch_compression_stats(struct cache_set *c, char *buf)
580 {
581         struct btree_iter iter;
582         struct bkey_s_c k;
583         u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
584             nr_compressed_extents = 0,
585             compressed_sectors_compressed = 0,
586             compressed_sectors_uncompressed = 0;
587
588         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, k)
589                 if (k.k->type == BCH_EXTENT) {
590                         struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
591                         const struct bch_extent_ptr *ptr;
592                         const union bch_extent_crc *crc;
593
594                         extent_for_each_ptr_crc(e, ptr, crc) {
595                                 if (crc_compression_type(crc) == BCH_COMPRESSION_NONE) {
596                                         nr_uncompressed_extents++;
597                                         uncompressed_sectors += e.k->size;
598                                 } else {
599                                         nr_compressed_extents++;
600                                         compressed_sectors_compressed +=
601                                                 crc_compressed_size(e.k, crc);
602                                         compressed_sectors_uncompressed +=
603                                                 crc_uncompressed_size(e.k, crc);
604                                 }
605
606                                 /* only looking at the first ptr */
607                                 break;
608                         }
609                 }
610         bch_btree_iter_unlock(&iter);
611
612         return snprintf(buf, PAGE_SIZE,
613                         "uncompressed data:\n"
614                         "       nr extents:                     %llu\n"
615                         "       size (bytes):                   %llu\n"
616                         "compressed data:\n"
617                         "       nr extents:                     %llu\n"
618                         "       compressed size (bytes):        %llu\n"
619                         "       uncompressed size (bytes):      %llu\n",
620                         nr_uncompressed_extents,
621                         uncompressed_sectors << 9,
622                         nr_compressed_extents,
623                         compressed_sectors_compressed << 9,
624                         compressed_sectors_uncompressed << 9);
625 }
626
627 SHOW(bch_cache_set)
628 {
629         struct cache_set *c = container_of(kobj, struct cache_set, kobj);
630
631         sysfs_print(minor,                      c->minor);
632
633         sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
634         sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
635         sysfs_hprint(journal_entry_size_max,    c->journal.entry_size_max);
636
637         sysfs_hprint(block_size,                block_bytes(c));
638         sysfs_print(block_size_bytes,           block_bytes(c));
639         sysfs_hprint(btree_node_size,           c->sb.btree_node_size << 9);
640         sysfs_print(btree_node_size_bytes,      c->sb.btree_node_size << 9);
641
642         sysfs_hprint(btree_cache_size,          bch_cache_size(c));
643         sysfs_print(cache_available_percent,    bch_cache_available_percent(c));
644
645         sysfs_print(btree_gc_running,           c->gc_pos.phase != GC_PHASE_DONE);
646
647 #if 0
648         /* XXX: reimplement */
649         sysfs_print(btree_used_percent, bch_btree_used(c));
650         sysfs_print(btree_nodes,        c->gc_stats.nodes);
651         sysfs_hprint(average_key_size,  bch_average_key_size(c));
652 #endif
653
654         sysfs_print(cache_read_races,
655                     atomic_long_read(&c->cache_read_races));
656
657         sysfs_print(writeback_keys_done,
658                     atomic_long_read(&c->writeback_keys_done));
659         sysfs_print(writeback_keys_failed,
660                     atomic_long_read(&c->writeback_keys_failed));
661
662         /* See count_io_errors for why 88 */
663         sysfs_print(io_error_halflife,  c->error_decay * 88);
664         sysfs_print(io_error_limit,     c->error_limit >> IO_ERROR_SHIFT);
665
666         sysfs_hprint(congested,
667                      ((uint64_t) bch_get_congested(c)) << 9);
668         sysfs_print(congested_read_threshold_us,
669                     c->congested_read_threshold_us);
670         sysfs_print(congested_write_threshold_us,
671                     c->congested_write_threshold_us);
672
673         sysfs_printf(foreground_write_ratelimit_enabled, "%i",
674                      c->foreground_write_ratelimit_enabled);
675         sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
676         sysfs_pd_controller_show(foreground_write, &c->foreground_write_pd);
677
678         sysfs_print(pd_controllers_update_seconds,
679                     c->pd_controllers_update_seconds);
680         sysfs_print(foreground_target_percent, c->foreground_target_percent);
681
682         sysfs_printf(tiering_enabled,           "%i", c->tiering_enabled);
683         sysfs_print(tiering_percent,            c->tiering_percent);
684         sysfs_pd_controller_show(tiering,       &c->tiering_pd);
685
686         sysfs_printf(meta_replicas_have, "%u",  c->sb.meta_replicas_have);
687         sysfs_printf(data_replicas_have, "%u",  c->sb.data_replicas_have);
688
689         /* Debugging: */
690
691         if (attr == &sysfs_journal_debug)
692                 return bch_journal_print_debug(&c->journal, buf);
693
694 #define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
695         BCH_DEBUG_PARAMS()
696 #undef BCH_DEBUG_PARAM
697
698         if (!test_bit(CACHE_SET_RUNNING, &c->flags))
699                 return -EPERM;
700
701         if (attr == &sysfs_bset_tree_stats)
702                 return bch_bset_print_stats(c, buf);
703         if (attr == &sysfs_alloc_debug)
704                 return show_cache_set_alloc_debug(c, buf);
705
706         sysfs_print(tree_depth, c->btree_roots[BTREE_ID_EXTENTS].b->level);
707         sysfs_print(root_usage_percent,         bch_root_usage(c));
708
709         if (attr == &sysfs_compression_stats)
710                 return bch_compression_stats(c, buf);
711
712         sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
713
714         return 0;
715 }
716
717 STORE(__bch_cache_set)
718 {
719         struct cache_set *c = container_of(kobj, struct cache_set, kobj);
720
721         if (attr == &sysfs_unregister) {
722                 bch_cache_set_unregister(c);
723                 return size;
724         }
725
726         if (attr == &sysfs_stop) {
727                 bch_cache_set_stop(c);
728                 return size;
729         }
730
731         if (attr == &sysfs_clear_stats) {
732                 atomic_long_set(&c->writeback_keys_done,        0);
733                 atomic_long_set(&c->writeback_keys_failed,      0);
734                 bch_cache_accounting_clear(&c->accounting);
735
736                 return size;
737         }
738
739         sysfs_strtoul(congested_read_threshold_us,
740                       c->congested_read_threshold_us);
741         sysfs_strtoul(congested_write_threshold_us,
742                       c->congested_write_threshold_us);
743
744         if (attr == &sysfs_io_error_limit) {
745                 c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
746                 return size;
747         }
748
749         /* See count_io_errors() for why 88 */
750         if (attr == &sysfs_io_error_halflife) {
751                 c->error_decay = strtoul_or_return(buf) / 88;
752                 return size;
753         }
754
755         sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
756         sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
757
758         sysfs_strtoul(foreground_write_ratelimit_enabled,
759                       c->foreground_write_ratelimit_enabled);
760
761         if (attr == &sysfs_copy_gc_enabled) {
762                 struct cache *ca;
763                 unsigned i;
764                 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
765                         ?: (ssize_t) size;
766
767                 for_each_cache(ca, c, i)
768                         if (ca->moving_gc_read)
769                                 wake_up_process(ca->moving_gc_read);
770                 return ret;
771         }
772
773         if (attr == &sysfs_tiering_enabled) {
774                 ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
775                         ?: (ssize_t) size;
776
777                 if (c->tiering_read)
778                         wake_up_process(c->tiering_read);
779                 return ret;
780         }
781
782         sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
783
784         if (attr == &sysfs_journal_flush) {
785                 bch_journal_meta_async(&c->journal, NULL);
786
787                 return size;
788         }
789
790         sysfs_strtoul(pd_controllers_update_seconds,
791                       c->pd_controllers_update_seconds);
792         sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
793
794         sysfs_strtoul(tiering_percent,          c->tiering_percent);
795         sysfs_pd_controller_store(tiering,      &c->tiering_pd);
796
797         /* Debugging: */
798
799 #define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
800         BCH_DEBUG_PARAMS()
801 #undef BCH_DEBUG_PARAM
802
803         if (!test_bit(CACHE_SET_RUNNING, &c->flags))
804                 return -EPERM;
805
806         if (test_bit(CACHE_SET_STOPPING, &c->flags))
807                 return -EINTR;
808
809         if (attr == &sysfs_blockdev_volume_create) {
810                 u64 v = strtoi_h_or_return(buf);
811                 int r = bch_blockdev_volume_create(c, v);
812
813                 if (r)
814                         return r;
815         }
816
817         if (attr == &sysfs_trigger_btree_coalesce)
818                 bch_coalesce(c);
819
820         /* Debugging: */
821
822         if (attr == &sysfs_trigger_gc)
823                 bch_gc(c);
824
825         if (attr == &sysfs_prune_cache) {
826                 struct shrink_control sc;
827
828                 sc.gfp_mask = GFP_KERNEL;
829                 sc.nr_to_scan = strtoul_or_return(buf);
830                 c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc);
831         }
832
833         return size;
834 }
835
836 STORE(bch_cache_set)
837 {
838         struct cache_set *c = container_of(kobj, struct cache_set, kobj);
839
840         mutex_lock(&bch_register_lock);
841         size = __bch_cache_set_store(kobj, attr, buf, size);
842         mutex_unlock(&bch_register_lock);
843
844         if (attr == &sysfs_add_device) {
845                 char *path = kstrdup(buf, GFP_KERNEL);
846                 int r = bch_cache_set_add_cache(c, strim(path));
847
848                 kfree(path);
849                 if (r)
850                         return r;
851         }
852
853         return size;
854 }
855
856 static struct attribute *bch_cache_set_files[] = {
857         &sysfs_unregister,
858         &sysfs_stop,
859         &sysfs_journal_write_delay_ms,
860         &sysfs_journal_reclaim_delay_ms,
861         &sysfs_journal_entry_size_max,
862         &sysfs_blockdev_volume_create,
863         &sysfs_add_device,
864
865         &sysfs_block_size,
866         &sysfs_block_size_bytes,
867         &sysfs_btree_node_size,
868         &sysfs_btree_node_size_bytes,
869         &sysfs_tree_depth,
870         &sysfs_root_usage_percent,
871         &sysfs_btree_cache_size,
872         &sysfs_cache_available_percent,
873         &sysfs_compression_stats,
874
875         &sysfs_average_key_size,
876
877         &sysfs_io_error_limit,
878         &sysfs_io_error_halflife,
879         &sysfs_congested,
880         &sysfs_congested_read_threshold_us,
881         &sysfs_congested_write_threshold_us,
882         &sysfs_clear_stats,
883
884         &sysfs_meta_replicas_have,
885         &sysfs_data_replicas_have,
886
887         &sysfs_foreground_target_percent,
888         &sysfs_tiering_percent,
889
890         &sysfs_journal_flush,
891         NULL
892 };
893 KTYPE(bch_cache_set);
894
895 /* internal dir - just a wrapper */
896
897 SHOW(bch_cache_set_internal)
898 {
899         struct cache_set *c = container_of(kobj, struct cache_set, internal);
900         return bch_cache_set_show(&c->kobj, attr, buf);
901 }
902
903 STORE(bch_cache_set_internal)
904 {
905         struct cache_set *c = container_of(kobj, struct cache_set, internal);
906         return bch_cache_set_store(&c->kobj, attr, buf, size);
907 }
908
909 static void bch_cache_set_internal_release(struct kobject *k)
910 {
911 }
912
913 static struct attribute *bch_cache_set_internal_files[] = {
914         &sysfs_journal_debug,
915
916         &sysfs_alloc_debug,
917
918         &sysfs_btree_gc_running,
919
920         &sysfs_btree_nodes,
921         &sysfs_btree_used_percent,
922
923         &sysfs_bset_tree_stats,
924         &sysfs_cache_read_races,
925         &sysfs_writeback_keys_done,
926         &sysfs_writeback_keys_failed,
927
928         &sysfs_trigger_btree_coalesce,
929         &sysfs_trigger_gc,
930         &sysfs_prune_cache,
931         &sysfs_foreground_write_ratelimit_enabled,
932         &sysfs_copy_gc_enabled,
933         &sysfs_tiering_enabled,
934         sysfs_pd_controller_files(tiering),
935         sysfs_pd_controller_files(foreground_write),
936         &sysfs_internal_uuid,
937
938 #define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
939         BCH_DEBUG_PARAMS()
940 #undef BCH_DEBUG_PARAM
941
942         NULL
943 };
944 KTYPE(bch_cache_set_internal);
945
946 /* options */
947
948 SHOW(bch_cache_set_opts_dir)
949 {
950         struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
951
952 #define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)            \
953         if (attr == &sysfs_opt_##_name)                                 \
954                 return _choices == bch_bool_opt || _choices == bch_uint_opt\
955                         ? snprintf(buf, PAGE_SIZE, "%i\n", c->opts._name)\
956                         : bch_snprint_string_list(buf, PAGE_SIZE,       \
957                                                 _choices, c->opts._name);\
958
959         BCH_VISIBLE_OPTS()
960 #undef BCH_OPT
961
962         return 0;
963 }
964
965 STORE(bch_cache_set_opts_dir)
966 {
967         struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
968
969 #define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)            \
970         if (attr == &sysfs_opt_##_name) {                               \
971                 ssize_t v = (_choices == bch_bool_opt ||                \
972                              _choices == bch_uint_opt)                  \
973                         ? strtoul_restrict_or_return(buf, _min, _max - 1)\
974                         : bch_read_string_list(buf, _choices);          \
975                                                                         \
976                 if (v < 0)                                              \
977                         return v;                                       \
978                                                                         \
979                 mutex_lock(&c->sb_lock);                                \
980                 if (attr == &sysfs_opt_compression) {                   \
981                         int ret = bch_check_set_has_compressed_data(c, v);\
982                         if (ret) {                                      \
983                                 mutex_unlock(&c->sb_lock);              \
984                                 return ret;                             \
985                         }                                               \
986                 }                                                       \
987                                                                         \
988                 if (_sb_opt##_BITS && v != _sb_opt(c->disk_sb)) {       \
989                         SET_##_sb_opt(c->disk_sb, v);                   \
990                         bch_write_super(c);                     \
991                 }                                                       \
992                                                                         \
993                 c->opts._name = v;                                      \
994                 mutex_unlock(&c->sb_lock);                              \
995                                                                         \
996                 return size;                                            \
997         }
998
999         BCH_VISIBLE_OPTS()
1000 #undef BCH_OPT
1001
1002         return size;
1003 }
1004
1005 static void bch_cache_set_opts_dir_release(struct kobject *k)
1006 {
1007 }
1008
1009 static struct attribute *bch_cache_set_opts_dir_files[] = {
1010 #define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)    \
1011         &sysfs_opt_##_name,
1012
1013         BCH_VISIBLE_OPTS()
1014 #undef BCH_OPT
1015
1016         NULL
1017 };
1018 KTYPE(bch_cache_set_opts_dir);
1019
1020 /* time stats */
1021
1022 SHOW(bch_cache_set_time_stats)
1023 {
1024         struct cache_set *c = container_of(kobj, struct cache_set, time_stats);
1025
1026 #define BCH_TIME_STAT(name, frequency_units, duration_units)            \
1027         sysfs_print_time_stats(&c->name##_time, name,                   \
1028                                frequency_units, duration_units);
1029         BCH_TIME_STATS()
1030 #undef BCH_TIME_STAT
1031
1032         return 0;
1033 }
1034
1035 STORE(bch_cache_set_time_stats)
1036 {
1037         struct cache_set *c = container_of(kobj, struct cache_set, time_stats);
1038
1039 #define BCH_TIME_STAT(name, frequency_units, duration_units)            \
1040         sysfs_clear_time_stats(&c->name##_time, name);
1041         BCH_TIME_STATS()
1042 #undef BCH_TIME_STAT
1043
1044         return size;
1045 }
1046
1047 static void bch_cache_set_time_stats_release(struct kobject *k)
1048 {
1049 }
1050
1051 static struct attribute *bch_cache_set_time_stats_files[] = {
1052 #define BCH_TIME_STAT(name, frequency_units, duration_units)            \
1053         sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
1054         BCH_TIME_STATS()
1055 #undef BCH_TIME_STAT
1056
1057         NULL
1058 };
1059 KTYPE(bch_cache_set_time_stats);
1060
1061 typedef unsigned (bucket_map_fn)(struct cache *, struct bucket *, void *);
1062
1063 static unsigned bucket_priority_fn(struct cache *ca, struct bucket *g,
1064                                    void *private)
1065 {
1066         int rw = (private ? 1 : 0);
1067
1068         return ca->set->prio_clock[rw].hand - g->prio[rw];
1069 }
1070
1071 static unsigned bucket_sectors_used_fn(struct cache *ca, struct bucket *g,
1072                                        void *private)
1073 {
1074         return bucket_sectors_used(g);
1075 }
1076
1077 static unsigned bucket_oldest_gen_fn(struct cache *ca, struct bucket *g,
1078                                      void *private)
1079 {
1080         return bucket_gc_gen(ca, g);
1081 }
1082
1083 static ssize_t show_quantiles(struct cache *ca, char *buf,
1084                               bucket_map_fn *fn, void *private)
1085 {
1086         int cmp(const void *l, const void *r)
1087         {       return *((unsigned *) r) - *((unsigned *) l); }
1088
1089         size_t n = ca->mi.nbuckets, i;
1090         /* Compute 31 quantiles */
1091         unsigned q[31], *p;
1092         ssize_t ret = 0;
1093
1094         p = vzalloc(ca->mi.nbuckets * sizeof(unsigned));
1095         if (!p)
1096                 return -ENOMEM;
1097
1098         for (i = ca->mi.first_bucket; i < n; i++)
1099                 p[i] = fn(ca, &ca->buckets[i], private);
1100
1101         sort(p, n, sizeof(unsigned), cmp, NULL);
1102
1103         while (n &&
1104                !p[n - 1])
1105                 --n;
1106
1107         for (i = 0; i < ARRAY_SIZE(q); i++)
1108                 q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
1109
1110         vfree(p);
1111
1112         for (i = 0; i < ARRAY_SIZE(q); i++)
1113                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
1114                                  "%u ", q[i]);
1115         buf[ret - 1] = '\n';
1116
1117         return ret;
1118
1119 }
1120
1121 static ssize_t show_reserve_stats(struct cache *ca, char *buf)
1122 {
1123         enum alloc_reserve i;
1124         ssize_t ret;
1125
1126         spin_lock(&ca->freelist_lock);
1127
1128         ret = scnprintf(buf, PAGE_SIZE,
1129                         "free_inc:\t%zu\t%zu\n",
1130                         fifo_used(&ca->free_inc),
1131                         ca->free_inc.size);
1132
1133         for (i = 0; i < RESERVE_NR; i++)
1134                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
1135                                  "free[%u]:\t%zu\t%zu\n", i,
1136                                  fifo_used(&ca->free[i]),
1137                                  ca->free[i].size);
1138
1139         spin_unlock(&ca->freelist_lock);
1140
1141         return ret;
1142 }
1143
1144 static ssize_t show_cache_alloc_debug(struct cache *ca, char *buf)
1145 {
1146         struct cache_set *c = ca->set;
1147         struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
1148
1149         return scnprintf(buf, PAGE_SIZE,
1150                 "free_inc:               %zu/%zu\n"
1151                 "free[RESERVE_PRIO]:     %zu/%zu\n"
1152                 "free[RESERVE_BTREE]:    %zu/%zu\n"
1153                 "free[RESERVE_MOVINGGC]: %zu/%zu\n"
1154                 "free[RESERVE_NONE]:     %zu/%zu\n"
1155                 "alloc:                  %llu/%llu\n"
1156                 "meta:                   %llu/%llu\n"
1157                 "dirty:                  %llu/%llu\n"
1158                 "available:              %llu/%llu\n"
1159                 "freelist_wait:          %s\n"
1160                 "open buckets:           %u/%u (reserved %u)\n"
1161                 "open_buckets_wait:      %s\n",
1162                 fifo_used(&ca->free_inc),               ca->free_inc.size,
1163                 fifo_used(&ca->free[RESERVE_PRIO]),     ca->free[RESERVE_PRIO].size,
1164                 fifo_used(&ca->free[RESERVE_BTREE]),    ca->free[RESERVE_BTREE].size,
1165                 fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
1166                 fifo_used(&ca->free[RESERVE_NONE]),     ca->free[RESERVE_NONE].size,
1167                 stats.buckets_alloc,                    ca->mi.nbuckets - ca->mi.first_bucket,
1168                 stats.buckets_meta,                     ca->mi.nbuckets - ca->mi.first_bucket,
1169                 stats.buckets_dirty,                    ca->mi.nbuckets - ca->mi.first_bucket,
1170                 __buckets_available_cache(ca, stats),   ca->mi.nbuckets - ca->mi.first_bucket,
1171                 c->freelist_wait.list.first             ? "waiting" : "empty",
1172                 c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
1173                 c->open_buckets_wait.list.first         ? "waiting" : "empty");
1174 }
1175
1176 static u64 sectors_written(struct cache *ca)
1177 {
1178         u64 ret = 0;
1179         int cpu;
1180
1181         for_each_possible_cpu(cpu)
1182                 ret += *per_cpu_ptr(ca->sectors_written, cpu);
1183
1184         return ret;
1185 }
1186
1187 SHOW(bch_cache)
1188 {
1189         struct cache *ca = container_of(kobj, struct cache, kobj);
1190         struct cache_set *c = ca->set;
1191         struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
1192
1193         sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
1194
1195         sysfs_hprint(bucket_size,       bucket_bytes(ca));
1196         sysfs_print(bucket_size_bytes,  bucket_bytes(ca));
1197         sysfs_hprint(block_size,        block_bytes(c));
1198         sysfs_print(block_size_bytes,   block_bytes(c));
1199         sysfs_print(first_bucket,       ca->mi.first_bucket);
1200         sysfs_print(nbuckets,           ca->mi.nbuckets);
1201         sysfs_print(discard,            ca->mi.discard);
1202         sysfs_hprint(written, sectors_written(ca) << 9);
1203         sysfs_hprint(btree_written,
1204                      atomic64_read(&ca->btree_sectors_written) << 9);
1205         sysfs_hprint(metadata_written,
1206                      (atomic64_read(&ca->meta_sectors_written) +
1207                       atomic64_read(&ca->btree_sectors_written)) << 9);
1208
1209         sysfs_print(io_errors,
1210                     atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);
1211
1212         sysfs_hprint(dirty_data,        stats.sectors_dirty << 9);
1213         sysfs_print(dirty_bytes,        stats.sectors_dirty << 9);
1214         sysfs_print(dirty_buckets,      stats.buckets_dirty);
1215         sysfs_hprint(cached_data,       stats.sectors_cached << 9);
1216         sysfs_print(cached_bytes,       stats.sectors_cached << 9);
1217         sysfs_print(cached_buckets,     stats.buckets_cached);
1218         sysfs_print(meta_buckets,       stats.buckets_meta);
1219         sysfs_print(alloc_buckets,      stats.buckets_alloc);
1220         sysfs_print(available_buckets,  buckets_available_cache(ca));
1221         sysfs_print(free_buckets,       buckets_free_cache(ca));
1222         sysfs_print(has_data,           ca->mi.has_data);
1223         sysfs_print(has_metadata,       ca->mi.has_metadata);
1224
1225         sysfs_pd_controller_show(copy_gc, &ca->moving_gc_pd);
1226
1227         if (attr == &sysfs_cache_replacement_policy)
1228                 return bch_snprint_string_list(buf, PAGE_SIZE,
1229                                                bch_cache_replacement_policies,
1230                                                ca->mi.replacement);
1231
1232         sysfs_print(tier,               ca->mi.tier);
1233
1234         if (attr == &sysfs_state_rw)
1235                 return bch_snprint_string_list(buf, PAGE_SIZE,
1236                                                bch_cache_state,
1237                                                ca->mi.state);
1238
1239         if (attr == &sysfs_read_priority_stats)
1240                 return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
1241         if (attr == &sysfs_write_priority_stats)
1242                 return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
1243         if (attr == &sysfs_fragmentation_stats)
1244                 return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
1245         if (attr == &sysfs_oldest_gen_stats)
1246                 return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
1247         if (attr == &sysfs_reserve_stats)
1248                 return show_reserve_stats(ca, buf);
1249         if (attr == &sysfs_alloc_debug)
1250                 return show_cache_alloc_debug(ca, buf);
1251
1252         return 0;
1253 }
1254
1255 STORE(__bch_cache)
1256 {
1257         struct cache *ca = container_of(kobj, struct cache, kobj);
1258         struct cache_set *c = ca->set;
1259         struct bch_member *mi;
1260
1261         sysfs_pd_controller_store(copy_gc, &ca->moving_gc_pd);
1262
1263         if (attr == &sysfs_discard) {
1264                 bool v = strtoul_or_return(buf);
1265
1266                 mutex_lock(&c->sb_lock);
1267                 mi = &bch_sb_get_members(c->disk_sb)->members[ca->dev_idx];
1268
1269                 if (v != BCH_MEMBER_DISCARD(mi)) {
1270                         SET_BCH_MEMBER_DISCARD(mi, v);
1271                         bch_write_super(c);
1272                 }
1273                 mutex_unlock(&c->sb_lock);
1274         }
1275
1276         if (attr == &sysfs_cache_replacement_policy) {
1277                 ssize_t v = bch_read_string_list(buf, bch_cache_replacement_policies);
1278
1279                 if (v < 0)
1280                         return v;
1281
1282                 mutex_lock(&c->sb_lock);
1283                 mi = &bch_sb_get_members(c->disk_sb)->members[ca->dev_idx];
1284
1285                 if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
1286                         SET_BCH_MEMBER_REPLACEMENT(mi, v);
1287                         bch_write_super(c);
1288                 }
1289                 mutex_unlock(&c->sb_lock);
1290         }
1291
1292         if (attr == &sysfs_state_rw) {
1293                 char name[BDEVNAME_SIZE];
1294                 const char *err = NULL;
1295                 ssize_t v = bch_read_string_list(buf, bch_cache_state);
1296
1297                 if (v < 0)
1298                         return v;
1299
1300                 if (v == ca->mi.state)
1301                         return size;
1302
1303                 switch (v) {
1304                 case BCH_MEMBER_STATE_ACTIVE:
1305                         err = bch_cache_read_write(ca);
1306                         break;
1307                 case BCH_MEMBER_STATE_RO:
1308                         bch_cache_read_only(ca);
1309                         break;
1310                 case BCH_MEMBER_STATE_FAILED:
1311                 case BCH_MEMBER_STATE_SPARE:
1312                         /*
1313                          * XXX: need to migrate data off and set correct state
1314                          */
1315                         pr_err("can't set %s %s: not supported",
1316                                bdevname(ca->disk_sb.bdev, name),
1317                                bch_cache_state[v]);
1318                         return -EINVAL;
1319                 }
1320
1321                 if (err) {
1322                         pr_err("can't set %s %s: %s",
1323                                bdevname(ca->disk_sb.bdev, name),
1324                                bch_cache_state[v], err);
1325                         return -EINVAL;
1326                 }
1327         }
1328
1329         if (attr == &sysfs_unregister) {
1330                 bool force = false;
1331
1332                 if (!strncmp(buf, "force", 5) &&
1333                     (buf[5] == '\0' || buf[5] == '\n'))
1334                         force = true;
1335                 bch_cache_remove(ca, force);
1336         }
1337
1338         if (attr == &sysfs_clear_stats) {
1339                 int cpu;
1340
1341                 for_each_possible_cpu(cpu)
1342                         *per_cpu_ptr(ca->sectors_written, cpu) = 0;
1343
1344                 atomic64_set(&ca->btree_sectors_written, 0);
1345                 atomic64_set(&ca->meta_sectors_written, 0);
1346                 atomic_set(&ca->io_count, 0);
1347                 atomic_set(&ca->io_errors, 0);
1348         }
1349
1350         return size;
1351 }
1352 STORE_LOCKED(bch_cache)
1353
1354 static struct attribute *bch_cache_files[] = {
1355         &sysfs_uuid,
1356         &sysfs_unregister,
1357         &sysfs_bucket_size,
1358         &sysfs_bucket_size_bytes,
1359         &sysfs_block_size,
1360         &sysfs_block_size_bytes,
1361         &sysfs_first_bucket,
1362         &sysfs_nbuckets,
1363         &sysfs_read_priority_stats,
1364         &sysfs_write_priority_stats,
1365         &sysfs_fragmentation_stats,
1366         &sysfs_oldest_gen_stats,
1367         &sysfs_reserve_stats,
1368         &sysfs_available_buckets,
1369         &sysfs_free_buckets,
1370         &sysfs_dirty_data,
1371         &sysfs_dirty_bytes,
1372         &sysfs_dirty_buckets,
1373         &sysfs_cached_data,
1374         &sysfs_cached_bytes,
1375         &sysfs_cached_buckets,
1376         &sysfs_meta_buckets,
1377         &sysfs_alloc_buckets,
1378         &sysfs_has_data,
1379         &sysfs_has_metadata,
1380         &sysfs_discard,
1381         &sysfs_written,
1382         &sysfs_btree_written,
1383         &sysfs_metadata_written,
1384         &sysfs_io_errors,
1385         &sysfs_clear_stats,
1386         &sysfs_cache_replacement_policy,
1387         &sysfs_tier,
1388         &sysfs_state_rw,
1389         &sysfs_alloc_debug,
1390
1391         sysfs_pd_controller_files(copy_gc),
1392         NULL
1393 };
1394 KTYPE(bch_cache);