2 * bcache sysfs interfaces
4 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
5 * Copyright 2012 Google, Inc.
13 #include "btree_cache.h"
14 #include "btree_iter.h"
15 #include "btree_update.h"
25 #include "writeback.h"
27 #include <linux/blkdev.h>
28 #include <linux/sort.h>
30 write_attribute(attach);
31 write_attribute(detach);
32 write_attribute(unregister);
33 write_attribute(stop);
34 write_attribute(clear_stats);
35 write_attribute(trigger_btree_coalesce);
36 write_attribute(trigger_gc);
37 write_attribute(prune_cache);
38 write_attribute(blockdev_volume_create);
39 write_attribute(add_device);
42 read_attribute(minor);
43 read_attribute(bucket_size);
44 read_attribute(bucket_size_bytes);
45 read_attribute(block_size);
46 read_attribute(block_size_bytes);
47 read_attribute(btree_node_size);
48 read_attribute(btree_node_size_bytes);
49 read_attribute(first_bucket);
50 read_attribute(nbuckets);
51 read_attribute(tree_depth);
52 read_attribute(root_usage_percent);
53 read_attribute(read_priority_stats);
54 read_attribute(write_priority_stats);
55 read_attribute(fragmentation_stats);
56 read_attribute(oldest_gen_stats);
57 read_attribute(reserve_stats);
58 read_attribute(btree_cache_size);
59 read_attribute(cache_available_percent);
60 read_attribute(compression_stats);
61 read_attribute(written);
62 read_attribute(btree_written);
63 read_attribute(metadata_written);
64 read_attribute(journal_debug);
65 write_attribute(journal_flush);
66 read_attribute(internal_uuid);
68 read_attribute(btree_gc_running);
70 read_attribute(btree_nodes);
71 read_attribute(btree_used_percent);
72 read_attribute(average_key_size);
73 read_attribute(available_buckets);
74 read_attribute(free_buckets);
75 read_attribute(dirty_data);
76 read_attribute(dirty_bytes);
77 read_attribute(dirty_buckets);
78 read_attribute(cached_data);
79 read_attribute(cached_bytes);
80 read_attribute(cached_buckets);
81 read_attribute(meta_buckets);
82 read_attribute(alloc_buckets);
83 read_attribute(has_data);
84 read_attribute(has_metadata);
85 read_attribute(bset_tree_stats);
86 read_attribute(alloc_debug);
88 read_attribute(state);
89 read_attribute(cache_read_races);
90 read_attribute(writeback_keys_done);
91 read_attribute(writeback_keys_failed);
92 read_attribute(io_errors);
93 rw_attribute(io_error_limit);
94 rw_attribute(io_error_halflife);
95 read_attribute(congested);
96 rw_attribute(congested_read_threshold_us);
97 rw_attribute(congested_write_threshold_us);
99 rw_attribute(sequential_cutoff);
100 rw_attribute(cache_mode);
101 rw_attribute(writeback_metadata);
102 rw_attribute(writeback_running);
103 rw_attribute(writeback_percent);
104 sysfs_pd_controller_attribute(writeback);
106 read_attribute(stripe_size);
107 read_attribute(partial_stripes_expensive);
109 rw_attribute(journal_write_delay_ms);
110 rw_attribute(journal_reclaim_delay_ms);
111 read_attribute(journal_entry_size_max);
113 rw_attribute(discard);
114 rw_attribute(running);
116 rw_attribute(readahead);
117 rw_attribute(verify);
118 rw_attribute(bypass_torture_test);
119 rw_attribute(cache_replacement_policy);
121 rw_attribute(foreground_write_ratelimit_enabled);
122 rw_attribute(copy_gc_enabled);
123 sysfs_pd_controller_attribute(copy_gc);
124 rw_attribute(tiering_enabled);
125 rw_attribute(tiering_percent);
126 sysfs_pd_controller_attribute(tiering);
128 sysfs_pd_controller_attribute(foreground_write);
130 rw_attribute(pd_controllers_update_seconds);
132 rw_attribute(foreground_target_percent);
135 read_attribute(meta_replicas_have);
136 read_attribute(data_replicas_have);
137 read_attribute(tier);
139 #define BCH_DEBUG_PARAM(name, description) \
143 #undef BCH_DEBUG_PARAM
145 #define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
146 static struct attribute sysfs_opt_##_name = { \
148 .mode = S_IRUGO|(_perm ? S_IWUSR : 0) \
154 #define BCH_TIME_STAT(name, frequency_units, duration_units) \
155 sysfs_time_stats_attribute(name, frequency_units, duration_units);
159 static struct attribute sysfs_state_rw = {
161 .mode = S_IRUGO|S_IWUSR
166 struct cached_dev *dc = container_of(kobj, struct cached_dev,
168 const char *states[] = { "no cache", "clean", "dirty", "inconsistent" };
170 #define var(stat) (dc->stat)
172 if (attr == &sysfs_cache_mode)
173 return bch_snprint_string_list(buf, PAGE_SIZE,
175 BDEV_CACHE_MODE(dc->disk_sb.sb));
177 var_printf(verify, "%i");
178 var_printf(bypass_torture_test, "%i");
179 var_printf(writeback_metadata, "%i");
180 var_printf(writeback_running, "%i");
181 var_print(writeback_percent);
182 sysfs_pd_controller_show(writeback, &dc->writeback_pd);
184 sysfs_hprint(dirty_data,
185 bcache_dev_sectors_dirty(&dc->disk) << 9);
186 sysfs_print(dirty_bytes,
187 bcache_dev_sectors_dirty(&dc->disk) << 9);
189 sysfs_hprint(stripe_size, dc->disk.stripe_size << 9);
190 var_printf(partial_stripes_expensive, "%u");
192 var_hprint(sequential_cutoff);
193 var_hprint(readahead);
195 sysfs_print(running, atomic_read(&dc->running));
196 sysfs_print(state, states[BDEV_STATE(dc->disk_sb.sb)]);
198 if (attr == &sysfs_label) {
199 memcpy(buf, dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
200 buf[BCH_SB_LABEL_SIZE + 1] = '\0';
211 struct cached_dev *dc = container_of(kobj, struct cached_dev,
215 struct kobj_uevent_env *env;
217 #define d_strtoul(var) sysfs_strtoul(var, dc->var)
218 #define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
219 #define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
222 d_strtoul(bypass_torture_test);
223 d_strtoul(writeback_metadata);
224 d_strtoul(writeback_running);
225 sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent, 0, 40);
226 sysfs_pd_controller_store(writeback, &dc->writeback_pd);
228 d_strtoi_h(sequential_cutoff);
229 d_strtoi_h(readahead);
231 if (attr == &sysfs_clear_stats)
232 bch_cache_accounting_clear(&dc->accounting);
234 if (attr == &sysfs_running &&
235 strtoul_or_return(buf))
236 bch_cached_dev_run(dc);
238 if (attr == &sysfs_cache_mode) {
239 ssize_t v = bch_read_string_list(buf, bch_cache_modes + 1);
244 if ((unsigned) v != BDEV_CACHE_MODE(dc->disk_sb.sb)) {
245 SET_BDEV_CACHE_MODE(dc->disk_sb.sb, v);
246 bch_write_bdev_super(dc, NULL);
250 if (attr == &sysfs_label) {
254 if (size > BCH_SB_LABEL_SIZE)
257 mutex_lock(&dc->disk.inode_lock);
259 memcpy(dc->disk_sb.sb->label, buf, size);
260 if (size < BCH_SB_LABEL_SIZE)
261 dc->disk_sb.sb->label[size] = '\0';
262 if (size && dc->disk_sb.sb->label[size - 1] == '\n')
263 dc->disk_sb.sb->label[size - 1] = '\0';
265 memcpy(dc->disk.inode.v.i_label,
266 dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
268 bch_write_bdev_super(dc, NULL);
271 ret = bch_btree_update(dc->disk.c, BTREE_ID_INODES,
275 mutex_unlock(&dc->disk.inode_lock);
281 ret = bch_journal_flush_seq(&dc->disk.c->journal,
286 env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
289 add_uevent_var(env, "DRIVER=bcache");
290 add_uevent_var(env, "CACHED_UUID=%pU", dc->disk_sb.sb->disk_uuid.b),
291 add_uevent_var(env, "CACHED_LABEL=%s", buf);
293 &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
297 if (attr == &sysfs_attach) {
298 if (uuid_parse(buf, &dc->disk_sb.sb->user_uuid))
301 list_for_each_entry(c, &bch_cache_sets, list) {
302 v = bch_cached_dev_attach(dc, c);
307 pr_err("Can't attach %s: cache set not found", buf);
311 if (attr == &sysfs_detach && dc->disk.c)
312 bch_cached_dev_detach(dc);
314 if (attr == &sysfs_stop)
315 bch_blockdev_stop(&dc->disk);
320 STORE(bch_cached_dev)
322 struct cached_dev *dc = container_of(kobj, struct cached_dev,
325 mutex_lock(&bch_register_lock);
326 size = __cached_dev_store(kobj, attr, buf, size);
328 if (attr == &sysfs_writeback_running)
329 bch_writeback_queue(dc);
331 if (attr == &sysfs_writeback_percent)
332 schedule_delayed_work(&dc->writeback_pd_update,
333 dc->writeback_pd_update_seconds * HZ);
335 mutex_unlock(&bch_register_lock);
339 static struct attribute *bch_cached_dev_files[] = {
344 &sysfs_writeback_metadata,
345 &sysfs_writeback_running,
346 &sysfs_writeback_percent,
347 sysfs_pd_controller_files(writeback),
351 &sysfs_partial_stripes_expensive,
352 &sysfs_sequential_cutoff,
358 #ifdef CONFIG_BCACHE_DEBUG
360 &sysfs_bypass_torture_test,
364 KTYPE(bch_cached_dev);
366 SHOW(bch_blockdev_volume)
368 struct bcache_device *d = container_of(kobj, struct bcache_device,
371 sysfs_hprint(size, le64_to_cpu(d->inode.v.i_size));
373 if (attr == &sysfs_label) {
374 memcpy(buf, d->inode.v.i_label, BCH_SB_LABEL_SIZE);
375 buf[BCH_SB_LABEL_SIZE + 1] = '\0';
383 STORE(__bch_blockdev_volume)
385 struct bcache_device *d = container_of(kobj, struct bcache_device,
388 if (attr == &sysfs_size) {
390 u64 v = strtoi_h_or_return(buf);
393 mutex_lock(&d->inode_lock);
395 if (v < le64_to_cpu(d->inode.v.i_size) ){
396 ret = bch_inode_truncate(d->c, d->inode.k.p.inode,
399 mutex_unlock(&d->inode_lock);
403 d->inode.v.i_size = cpu_to_le64(v);
404 ret = bch_btree_update(d->c, BTREE_ID_INODES,
405 &d->inode.k_i, &journal_seq);
407 mutex_unlock(&d->inode_lock);
412 ret = bch_journal_flush_seq(&d->c->journal, journal_seq);
416 set_capacity(d->disk, v >> 9);
419 if (attr == &sysfs_label) {
423 mutex_lock(&d->inode_lock);
425 memcpy(d->inode.v.i_label, buf, BCH_SB_LABEL_SIZE);
426 ret = bch_btree_update(d->c, BTREE_ID_INODES,
427 &d->inode.k_i, &journal_seq);
429 mutex_unlock(&d->inode_lock);
431 return ret ?: bch_journal_flush_seq(&d->c->journal, journal_seq);
434 if (attr == &sysfs_unregister) {
435 set_bit(BCACHE_DEV_DETACHING, &d->flags);
436 bch_blockdev_stop(d);
441 STORE_LOCKED(bch_blockdev_volume)
443 static struct attribute *bch_blockdev_volume_files[] = {
449 KTYPE(bch_blockdev_volume);
451 static int bch_bset_print_stats(struct cache_set *c, char *buf)
453 struct bset_stats stats;
456 struct bucket_table *tbl;
457 struct rhash_head *pos;
460 memset(&stats, 0, sizeof(stats));
463 for_each_cached_btree(b, c, tbl, iter, pos) {
464 bch_btree_keys_stats(b, &stats);
469 return snprintf(buf, PAGE_SIZE,
471 "written sets: %zu\n"
472 "written key bytes: %zu\n"
473 "unwritten sets: %zu\n"
474 "unwritten key bytes: %zu\n"
475 "no table sets: %zu\n"
476 "no table key bytes: %zu\n"
478 "failed unpacked: %zu\n"
480 "failed overflow: %zu\n",
482 stats.sets[BSET_RO_AUX_TREE].nr,
483 stats.sets[BSET_RO_AUX_TREE].bytes,
484 stats.sets[BSET_RW_AUX_TREE].nr,
485 stats.sets[BSET_RW_AUX_TREE].bytes,
486 stats.sets[BSET_NO_AUX_TREE].nr,
487 stats.sets[BSET_NO_AUX_TREE].bytes,
489 stats.failed_unpacked,
491 stats.failed_overflow);
494 static unsigned bch_root_usage(struct cache_set *c)
497 struct bkey_packed *k;
499 struct btree_node_iter iter;
504 six_unlock_read(&b->lock);
506 b = c->btree_roots[BTREE_ID_EXTENTS].b;
507 six_lock_read(&b->lock);
508 } while (b != c->btree_roots[BTREE_ID_EXTENTS].b);
510 for_each_btree_node_key(b, k, &iter, btree_node_is_extents(b))
511 bytes += bkey_bytes(k);
513 six_unlock_read(&b->lock);
515 return (bytes * 100) / btree_bytes(c);
518 static size_t bch_cache_size(struct cache_set *c)
523 mutex_lock(&c->btree_cache_lock);
524 list_for_each_entry(b, &c->btree_cache, list)
525 ret += btree_bytes(c);
527 mutex_unlock(&c->btree_cache_lock);
531 static unsigned bch_cache_available_percent(struct cache_set *c)
533 return div64_u64((u64) sectors_available(c) * 100,
538 static unsigned bch_btree_used(struct cache_set *c)
540 return div64_u64(c->gc_stats.key_bytes * 100,
541 (c->gc_stats.nodes ?: 1) * btree_bytes(c));
544 static unsigned bch_average_key_size(struct cache_set *c)
546 return c->gc_stats.nkeys
547 ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
552 static ssize_t show_cache_set_alloc_debug(struct cache_set *c, char *buf)
554 struct bucket_stats_cache_set stats = bch_bucket_stats_read_cache_set(c);
556 return scnprintf(buf, PAGE_SIZE,
557 "capacity:\t\t%llu\n"
561 "\tcached:\t\t%llu\n"
565 "\tcached:\t\t%llu\n"
566 "persistent reserved sectors:\t%llu\n"
567 "online reserved sectors:\t%llu\n",
569 stats.s[S_COMPRESSED][S_META],
570 stats.s[S_COMPRESSED][S_DIRTY],
571 stats.s[S_COMPRESSED][S_CACHED],
572 stats.s[S_UNCOMPRESSED][S_META],
573 stats.s[S_UNCOMPRESSED][S_DIRTY],
574 stats.s[S_UNCOMPRESSED][S_CACHED],
575 stats.persistent_reserved,
576 stats.online_reserved);
579 static ssize_t bch_compression_stats(struct cache_set *c, char *buf)
581 struct btree_iter iter;
583 u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
584 nr_compressed_extents = 0,
585 compressed_sectors_compressed = 0,
586 compressed_sectors_uncompressed = 0;
588 for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, k)
589 if (k.k->type == BCH_EXTENT) {
590 struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
591 const struct bch_extent_ptr *ptr;
592 const union bch_extent_crc *crc;
594 extent_for_each_ptr_crc(e, ptr, crc) {
595 if (crc_compression_type(crc) == BCH_COMPRESSION_NONE) {
596 nr_uncompressed_extents++;
597 uncompressed_sectors += e.k->size;
599 nr_compressed_extents++;
600 compressed_sectors_compressed +=
601 crc_compressed_size(e.k, crc);
602 compressed_sectors_uncompressed +=
603 crc_uncompressed_size(e.k, crc);
606 /* only looking at the first ptr */
610 bch_btree_iter_unlock(&iter);
612 return snprintf(buf, PAGE_SIZE,
613 "uncompressed data:\n"
614 " nr extents: %llu\n"
615 " size (bytes): %llu\n"
617 " nr extents: %llu\n"
618 " compressed size (bytes): %llu\n"
619 " uncompressed size (bytes): %llu\n",
620 nr_uncompressed_extents,
621 uncompressed_sectors << 9,
622 nr_compressed_extents,
623 compressed_sectors_compressed << 9,
624 compressed_sectors_uncompressed << 9);
629 struct cache_set *c = container_of(kobj, struct cache_set, kobj);
631 sysfs_print(minor, c->minor);
633 sysfs_print(journal_write_delay_ms, c->journal.write_delay_ms);
634 sysfs_print(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
635 sysfs_hprint(journal_entry_size_max, c->journal.entry_size_max);
637 sysfs_hprint(block_size, block_bytes(c));
638 sysfs_print(block_size_bytes, block_bytes(c));
639 sysfs_hprint(btree_node_size, c->sb.btree_node_size << 9);
640 sysfs_print(btree_node_size_bytes, c->sb.btree_node_size << 9);
642 sysfs_hprint(btree_cache_size, bch_cache_size(c));
643 sysfs_print(cache_available_percent, bch_cache_available_percent(c));
645 sysfs_print(btree_gc_running, c->gc_pos.phase != GC_PHASE_DONE);
648 /* XXX: reimplement */
649 sysfs_print(btree_used_percent, bch_btree_used(c));
650 sysfs_print(btree_nodes, c->gc_stats.nodes);
651 sysfs_hprint(average_key_size, bch_average_key_size(c));
654 sysfs_print(cache_read_races,
655 atomic_long_read(&c->cache_read_races));
657 sysfs_print(writeback_keys_done,
658 atomic_long_read(&c->writeback_keys_done));
659 sysfs_print(writeback_keys_failed,
660 atomic_long_read(&c->writeback_keys_failed));
662 /* See count_io_errors for why 88 */
663 sysfs_print(io_error_halflife, c->error_decay * 88);
664 sysfs_print(io_error_limit, c->error_limit >> IO_ERROR_SHIFT);
666 sysfs_hprint(congested,
667 ((uint64_t) bch_get_congested(c)) << 9);
668 sysfs_print(congested_read_threshold_us,
669 c->congested_read_threshold_us);
670 sysfs_print(congested_write_threshold_us,
671 c->congested_write_threshold_us);
673 sysfs_printf(foreground_write_ratelimit_enabled, "%i",
674 c->foreground_write_ratelimit_enabled);
675 sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
676 sysfs_pd_controller_show(foreground_write, &c->foreground_write_pd);
678 sysfs_print(pd_controllers_update_seconds,
679 c->pd_controllers_update_seconds);
680 sysfs_print(foreground_target_percent, c->foreground_target_percent);
682 sysfs_printf(tiering_enabled, "%i", c->tiering_enabled);
683 sysfs_print(tiering_percent, c->tiering_percent);
684 sysfs_pd_controller_show(tiering, &c->tiering_pd);
686 sysfs_printf(meta_replicas_have, "%u", c->sb.meta_replicas_have);
687 sysfs_printf(data_replicas_have, "%u", c->sb.data_replicas_have);
691 if (attr == &sysfs_journal_debug)
692 return bch_journal_print_debug(&c->journal, buf);
694 #define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
696 #undef BCH_DEBUG_PARAM
698 if (!test_bit(CACHE_SET_RUNNING, &c->flags))
701 if (attr == &sysfs_bset_tree_stats)
702 return bch_bset_print_stats(c, buf);
703 if (attr == &sysfs_alloc_debug)
704 return show_cache_set_alloc_debug(c, buf);
706 sysfs_print(tree_depth, c->btree_roots[BTREE_ID_EXTENTS].b->level);
707 sysfs_print(root_usage_percent, bch_root_usage(c));
709 if (attr == &sysfs_compression_stats)
710 return bch_compression_stats(c, buf);
712 sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
717 STORE(__bch_cache_set)
719 struct cache_set *c = container_of(kobj, struct cache_set, kobj);
721 if (attr == &sysfs_unregister) {
722 bch_cache_set_unregister(c);
726 if (attr == &sysfs_stop) {
727 bch_cache_set_stop(c);
731 if (attr == &sysfs_clear_stats) {
732 atomic_long_set(&c->writeback_keys_done, 0);
733 atomic_long_set(&c->writeback_keys_failed, 0);
734 bch_cache_accounting_clear(&c->accounting);
739 sysfs_strtoul(congested_read_threshold_us,
740 c->congested_read_threshold_us);
741 sysfs_strtoul(congested_write_threshold_us,
742 c->congested_write_threshold_us);
744 if (attr == &sysfs_io_error_limit) {
745 c->error_limit = strtoul_or_return(buf) << IO_ERROR_SHIFT;
749 /* See count_io_errors() for why 88 */
750 if (attr == &sysfs_io_error_halflife) {
751 c->error_decay = strtoul_or_return(buf) / 88;
755 sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
756 sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
758 sysfs_strtoul(foreground_write_ratelimit_enabled,
759 c->foreground_write_ratelimit_enabled);
761 if (attr == &sysfs_copy_gc_enabled) {
764 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
767 for_each_cache(ca, c, i)
768 if (ca->moving_gc_read)
769 wake_up_process(ca->moving_gc_read);
773 if (attr == &sysfs_tiering_enabled) {
774 ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
778 wake_up_process(c->tiering_read);
782 sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
784 if (attr == &sysfs_journal_flush) {
785 bch_journal_meta_async(&c->journal, NULL);
790 sysfs_strtoul(pd_controllers_update_seconds,
791 c->pd_controllers_update_seconds);
792 sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
794 sysfs_strtoul(tiering_percent, c->tiering_percent);
795 sysfs_pd_controller_store(tiering, &c->tiering_pd);
799 #define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
801 #undef BCH_DEBUG_PARAM
803 if (!test_bit(CACHE_SET_RUNNING, &c->flags))
806 if (test_bit(CACHE_SET_STOPPING, &c->flags))
809 if (attr == &sysfs_blockdev_volume_create) {
810 u64 v = strtoi_h_or_return(buf);
811 int r = bch_blockdev_volume_create(c, v);
817 if (attr == &sysfs_trigger_btree_coalesce)
822 if (attr == &sysfs_trigger_gc)
825 if (attr == &sysfs_prune_cache) {
826 struct shrink_control sc;
828 sc.gfp_mask = GFP_KERNEL;
829 sc.nr_to_scan = strtoul_or_return(buf);
830 c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc);
838 struct cache_set *c = container_of(kobj, struct cache_set, kobj);
840 mutex_lock(&bch_register_lock);
841 size = __bch_cache_set_store(kobj, attr, buf, size);
842 mutex_unlock(&bch_register_lock);
844 if (attr == &sysfs_add_device) {
845 char *path = kstrdup(buf, GFP_KERNEL);
846 int r = bch_cache_set_add_cache(c, strim(path));
856 static struct attribute *bch_cache_set_files[] = {
859 &sysfs_journal_write_delay_ms,
860 &sysfs_journal_reclaim_delay_ms,
861 &sysfs_journal_entry_size_max,
862 &sysfs_blockdev_volume_create,
866 &sysfs_block_size_bytes,
867 &sysfs_btree_node_size,
868 &sysfs_btree_node_size_bytes,
870 &sysfs_root_usage_percent,
871 &sysfs_btree_cache_size,
872 &sysfs_cache_available_percent,
873 &sysfs_compression_stats,
875 &sysfs_average_key_size,
877 &sysfs_io_error_limit,
878 &sysfs_io_error_halflife,
880 &sysfs_congested_read_threshold_us,
881 &sysfs_congested_write_threshold_us,
884 &sysfs_meta_replicas_have,
885 &sysfs_data_replicas_have,
887 &sysfs_foreground_target_percent,
888 &sysfs_tiering_percent,
890 &sysfs_journal_flush,
893 KTYPE(bch_cache_set);
895 /* internal dir - just a wrapper */
897 SHOW(bch_cache_set_internal)
899 struct cache_set *c = container_of(kobj, struct cache_set, internal);
900 return bch_cache_set_show(&c->kobj, attr, buf);
903 STORE(bch_cache_set_internal)
905 struct cache_set *c = container_of(kobj, struct cache_set, internal);
906 return bch_cache_set_store(&c->kobj, attr, buf, size);
909 static void bch_cache_set_internal_release(struct kobject *k)
913 static struct attribute *bch_cache_set_internal_files[] = {
914 &sysfs_journal_debug,
918 &sysfs_btree_gc_running,
921 &sysfs_btree_used_percent,
923 &sysfs_bset_tree_stats,
924 &sysfs_cache_read_races,
925 &sysfs_writeback_keys_done,
926 &sysfs_writeback_keys_failed,
928 &sysfs_trigger_btree_coalesce,
931 &sysfs_foreground_write_ratelimit_enabled,
932 &sysfs_copy_gc_enabled,
933 &sysfs_tiering_enabled,
934 sysfs_pd_controller_files(tiering),
935 sysfs_pd_controller_files(foreground_write),
936 &sysfs_internal_uuid,
938 #define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
940 #undef BCH_DEBUG_PARAM
944 KTYPE(bch_cache_set_internal);
948 SHOW(bch_cache_set_opts_dir)
950 struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
952 #define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
953 if (attr == &sysfs_opt_##_name) \
954 return _choices == bch_bool_opt || _choices == bch_uint_opt\
955 ? snprintf(buf, PAGE_SIZE, "%i\n", c->opts._name)\
956 : bch_snprint_string_list(buf, PAGE_SIZE, \
957 _choices, c->opts._name);\
965 STORE(bch_cache_set_opts_dir)
967 struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
969 #define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
970 if (attr == &sysfs_opt_##_name) { \
971 ssize_t v = (_choices == bch_bool_opt || \
972 _choices == bch_uint_opt) \
973 ? strtoul_restrict_or_return(buf, _min, _max - 1)\
974 : bch_read_string_list(buf, _choices); \
979 mutex_lock(&c->sb_lock); \
980 if (attr == &sysfs_opt_compression) { \
981 int ret = bch_check_set_has_compressed_data(c, v);\
983 mutex_unlock(&c->sb_lock); \
988 if (_sb_opt##_BITS && v != _sb_opt(c->disk_sb)) { \
989 SET_##_sb_opt(c->disk_sb, v); \
990 bch_write_super(c); \
994 mutex_unlock(&c->sb_lock); \
1005 static void bch_cache_set_opts_dir_release(struct kobject *k)
1009 static struct attribute *bch_cache_set_opts_dir_files[] = {
1010 #define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
1018 KTYPE(bch_cache_set_opts_dir);
1022 SHOW(bch_cache_set_time_stats)
1024 struct cache_set *c = container_of(kobj, struct cache_set, time_stats);
1026 #define BCH_TIME_STAT(name, frequency_units, duration_units) \
1027 sysfs_print_time_stats(&c->name##_time, name, \
1028 frequency_units, duration_units);
1030 #undef BCH_TIME_STAT
1035 STORE(bch_cache_set_time_stats)
1037 struct cache_set *c = container_of(kobj, struct cache_set, time_stats);
1039 #define BCH_TIME_STAT(name, frequency_units, duration_units) \
1040 sysfs_clear_time_stats(&c->name##_time, name);
1042 #undef BCH_TIME_STAT
1047 static void bch_cache_set_time_stats_release(struct kobject *k)
1051 static struct attribute *bch_cache_set_time_stats_files[] = {
1052 #define BCH_TIME_STAT(name, frequency_units, duration_units) \
1053 sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
1055 #undef BCH_TIME_STAT
1059 KTYPE(bch_cache_set_time_stats);
1061 typedef unsigned (bucket_map_fn)(struct cache *, struct bucket *, void *);
1063 static unsigned bucket_priority_fn(struct cache *ca, struct bucket *g,
1066 int rw = (private ? 1 : 0);
1068 return ca->set->prio_clock[rw].hand - g->prio[rw];
1071 static unsigned bucket_sectors_used_fn(struct cache *ca, struct bucket *g,
1074 return bucket_sectors_used(g);
1077 static unsigned bucket_oldest_gen_fn(struct cache *ca, struct bucket *g,
1080 return bucket_gc_gen(ca, g);
1083 static ssize_t show_quantiles(struct cache *ca, char *buf,
1084 bucket_map_fn *fn, void *private)
1086 int cmp(const void *l, const void *r)
1087 { return *((unsigned *) r) - *((unsigned *) l); }
1089 size_t n = ca->mi.nbuckets, i;
1090 /* Compute 31 quantiles */
1094 p = vzalloc(ca->mi.nbuckets * sizeof(unsigned));
1098 for (i = ca->mi.first_bucket; i < n; i++)
1099 p[i] = fn(ca, &ca->buckets[i], private);
1101 sort(p, n, sizeof(unsigned), cmp, NULL);
1107 for (i = 0; i < ARRAY_SIZE(q); i++)
1108 q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
1112 for (i = 0; i < ARRAY_SIZE(q); i++)
1113 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
1115 buf[ret - 1] = '\n';
1121 static ssize_t show_reserve_stats(struct cache *ca, char *buf)
1123 enum alloc_reserve i;
1126 spin_lock(&ca->freelist_lock);
1128 ret = scnprintf(buf, PAGE_SIZE,
1129 "free_inc:\t%zu\t%zu\n",
1130 fifo_used(&ca->free_inc),
1133 for (i = 0; i < RESERVE_NR; i++)
1134 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
1135 "free[%u]:\t%zu\t%zu\n", i,
1136 fifo_used(&ca->free[i]),
1139 spin_unlock(&ca->freelist_lock);
1144 static ssize_t show_cache_alloc_debug(struct cache *ca, char *buf)
1146 struct cache_set *c = ca->set;
1147 struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
1149 return scnprintf(buf, PAGE_SIZE,
1150 "free_inc: %zu/%zu\n"
1151 "free[RESERVE_PRIO]: %zu/%zu\n"
1152 "free[RESERVE_BTREE]: %zu/%zu\n"
1153 "free[RESERVE_MOVINGGC]: %zu/%zu\n"
1154 "free[RESERVE_NONE]: %zu/%zu\n"
1155 "alloc: %llu/%llu\n"
1157 "dirty: %llu/%llu\n"
1158 "available: %llu/%llu\n"
1159 "freelist_wait: %s\n"
1160 "open buckets: %u/%u (reserved %u)\n"
1161 "open_buckets_wait: %s\n",
1162 fifo_used(&ca->free_inc), ca->free_inc.size,
1163 fifo_used(&ca->free[RESERVE_PRIO]), ca->free[RESERVE_PRIO].size,
1164 fifo_used(&ca->free[RESERVE_BTREE]), ca->free[RESERVE_BTREE].size,
1165 fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
1166 fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
1167 stats.buckets_alloc, ca->mi.nbuckets - ca->mi.first_bucket,
1168 stats.buckets_meta, ca->mi.nbuckets - ca->mi.first_bucket,
1169 stats.buckets_dirty, ca->mi.nbuckets - ca->mi.first_bucket,
1170 __buckets_available_cache(ca, stats), ca->mi.nbuckets - ca->mi.first_bucket,
1171 c->freelist_wait.list.first ? "waiting" : "empty",
1172 c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
1173 c->open_buckets_wait.list.first ? "waiting" : "empty");
1176 static u64 sectors_written(struct cache *ca)
1181 for_each_possible_cpu(cpu)
1182 ret += *per_cpu_ptr(ca->sectors_written, cpu);
1189 struct cache *ca = container_of(kobj, struct cache, kobj);
1190 struct cache_set *c = ca->set;
1191 struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
1193 sysfs_printf(uuid, "%pU\n", ca->uuid.b);
1195 sysfs_hprint(bucket_size, bucket_bytes(ca));
1196 sysfs_print(bucket_size_bytes, bucket_bytes(ca));
1197 sysfs_hprint(block_size, block_bytes(c));
1198 sysfs_print(block_size_bytes, block_bytes(c));
1199 sysfs_print(first_bucket, ca->mi.first_bucket);
1200 sysfs_print(nbuckets, ca->mi.nbuckets);
1201 sysfs_print(discard, ca->mi.discard);
1202 sysfs_hprint(written, sectors_written(ca) << 9);
1203 sysfs_hprint(btree_written,
1204 atomic64_read(&ca->btree_sectors_written) << 9);
1205 sysfs_hprint(metadata_written,
1206 (atomic64_read(&ca->meta_sectors_written) +
1207 atomic64_read(&ca->btree_sectors_written)) << 9);
1209 sysfs_print(io_errors,
1210 atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);
1212 sysfs_hprint(dirty_data, stats.sectors_dirty << 9);
1213 sysfs_print(dirty_bytes, stats.sectors_dirty << 9);
1214 sysfs_print(dirty_buckets, stats.buckets_dirty);
1215 sysfs_hprint(cached_data, stats.sectors_cached << 9);
1216 sysfs_print(cached_bytes, stats.sectors_cached << 9);
1217 sysfs_print(cached_buckets, stats.buckets_cached);
1218 sysfs_print(meta_buckets, stats.buckets_meta);
1219 sysfs_print(alloc_buckets, stats.buckets_alloc);
1220 sysfs_print(available_buckets, buckets_available_cache(ca));
1221 sysfs_print(free_buckets, buckets_free_cache(ca));
1222 sysfs_print(has_data, ca->mi.has_data);
1223 sysfs_print(has_metadata, ca->mi.has_metadata);
1225 sysfs_pd_controller_show(copy_gc, &ca->moving_gc_pd);
1227 if (attr == &sysfs_cache_replacement_policy)
1228 return bch_snprint_string_list(buf, PAGE_SIZE,
1229 bch_cache_replacement_policies,
1230 ca->mi.replacement);
1232 sysfs_print(tier, ca->mi.tier);
1234 if (attr == &sysfs_state_rw)
1235 return bch_snprint_string_list(buf, PAGE_SIZE,
1239 if (attr == &sysfs_read_priority_stats)
1240 return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
1241 if (attr == &sysfs_write_priority_stats)
1242 return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
1243 if (attr == &sysfs_fragmentation_stats)
1244 return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
1245 if (attr == &sysfs_oldest_gen_stats)
1246 return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
1247 if (attr == &sysfs_reserve_stats)
1248 return show_reserve_stats(ca, buf);
1249 if (attr == &sysfs_alloc_debug)
1250 return show_cache_alloc_debug(ca, buf);
1257 struct cache *ca = container_of(kobj, struct cache, kobj);
1258 struct cache_set *c = ca->set;
1259 struct bch_member *mi;
1261 sysfs_pd_controller_store(copy_gc, &ca->moving_gc_pd);
1263 if (attr == &sysfs_discard) {
1264 bool v = strtoul_or_return(buf);
1266 mutex_lock(&c->sb_lock);
1267 mi = &bch_sb_get_members(c->disk_sb)->members[ca->dev_idx];
1269 if (v != BCH_MEMBER_DISCARD(mi)) {
1270 SET_BCH_MEMBER_DISCARD(mi, v);
1273 mutex_unlock(&c->sb_lock);
1276 if (attr == &sysfs_cache_replacement_policy) {
1277 ssize_t v = bch_read_string_list(buf, bch_cache_replacement_policies);
1282 mutex_lock(&c->sb_lock);
1283 mi = &bch_sb_get_members(c->disk_sb)->members[ca->dev_idx];
1285 if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
1286 SET_BCH_MEMBER_REPLACEMENT(mi, v);
1289 mutex_unlock(&c->sb_lock);
1292 if (attr == &sysfs_state_rw) {
1293 char name[BDEVNAME_SIZE];
1294 const char *err = NULL;
1295 ssize_t v = bch_read_string_list(buf, bch_cache_state);
1300 if (v == ca->mi.state)
1304 case BCH_MEMBER_STATE_ACTIVE:
1305 err = bch_cache_read_write(ca);
1307 case BCH_MEMBER_STATE_RO:
1308 bch_cache_read_only(ca);
1310 case BCH_MEMBER_STATE_FAILED:
1311 case BCH_MEMBER_STATE_SPARE:
1313 * XXX: need to migrate data off and set correct state
1315 pr_err("can't set %s %s: not supported",
1316 bdevname(ca->disk_sb.bdev, name),
1317 bch_cache_state[v]);
1322 pr_err("can't set %s %s: %s",
1323 bdevname(ca->disk_sb.bdev, name),
1324 bch_cache_state[v], err);
1329 if (attr == &sysfs_unregister) {
1332 if (!strncmp(buf, "force", 5) &&
1333 (buf[5] == '\0' || buf[5] == '\n'))
1335 bch_cache_remove(ca, force);
1338 if (attr == &sysfs_clear_stats) {
1341 for_each_possible_cpu(cpu)
1342 *per_cpu_ptr(ca->sectors_written, cpu) = 0;
1344 atomic64_set(&ca->btree_sectors_written, 0);
1345 atomic64_set(&ca->meta_sectors_written, 0);
1346 atomic_set(&ca->io_count, 0);
1347 atomic_set(&ca->io_errors, 0);
1352 STORE_LOCKED(bch_cache)
1354 static struct attribute *bch_cache_files[] = {
1358 &sysfs_bucket_size_bytes,
1360 &sysfs_block_size_bytes,
1361 &sysfs_first_bucket,
1363 &sysfs_read_priority_stats,
1364 &sysfs_write_priority_stats,
1365 &sysfs_fragmentation_stats,
1366 &sysfs_oldest_gen_stats,
1367 &sysfs_reserve_stats,
1368 &sysfs_available_buckets,
1369 &sysfs_free_buckets,
1372 &sysfs_dirty_buckets,
1374 &sysfs_cached_bytes,
1375 &sysfs_cached_buckets,
1376 &sysfs_meta_buckets,
1377 &sysfs_alloc_buckets,
1379 &sysfs_has_metadata,
1382 &sysfs_btree_written,
1383 &sysfs_metadata_written,
1386 &sysfs_cache_replacement_policy,
1391 sysfs_pd_controller_files(copy_gc),