]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/sysfs.c
bcachefs-in-userspace improvements
[bcachefs-tools-debian] / libbcachefs / sysfs.c
1 /*
2  * bcache sysfs interfaces
3  *
4  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
5  * Copyright 2012 Google, Inc.
6  */
7
8 #include "bcachefs.h"
9 #include "alloc.h"
10 #include "compress.h"
11 #include "sysfs.h"
12 #include "btree_cache.h"
13 #include "btree_iter.h"
14 #include "btree_update.h"
15 #include "btree_gc.h"
16 #include "buckets.h"
17 #include "inode.h"
18 #include "journal.h"
19 #include "keylist.h"
20 #include "move.h"
21 #include "opts.h"
22 #include "super-io.h"
23 #include "tier.h"
24
25 #include <linux/blkdev.h>
26 #include <linux/sort.h>
27
28 #include "util.h"
29
30 #define SYSFS_OPS(type)                                                 \
31 struct sysfs_ops type ## _sysfs_ops = {                                 \
32         .show   = type ## _show,                                        \
33         .store  = type ## _store                                        \
34 }
35
36 #define SHOW(fn)                                                        \
37 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
38                            char *buf)                                   \
39
40 #define STORE(fn)                                                       \
41 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
42                             const char *buf, size_t size)               \
43
44 #define __sysfs_attribute(_name, _mode)                                 \
45         static struct attribute sysfs_##_name =                         \
46                 { .name = #_name, .mode = _mode }
47
48 #define write_attribute(n)      __sysfs_attribute(n, S_IWUSR)
49 #define read_attribute(n)       __sysfs_attribute(n, S_IRUGO)
50 #define rw_attribute(n)         __sysfs_attribute(n, S_IRUGO|S_IWUSR)
51
52 #define sysfs_printf(file, fmt, ...)                                    \
53 do {                                                                    \
54         if (attr == &sysfs_ ## file)                                    \
55                 return snprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__); \
56 } while (0)
57
58 #define sysfs_print(file, var)                                          \
59 do {                                                                    \
60         if (attr == &sysfs_ ## file)                                    \
61                 return snprint(buf, PAGE_SIZE, var);                    \
62 } while (0)
63
64 #define sysfs_hprint(file, val)                                         \
65 do {                                                                    \
66         if (attr == &sysfs_ ## file) {                                  \
67                 ssize_t ret = bch2_hprint(buf, val);                    \
68                 strcat(buf, "\n");                                      \
69                 return ret + 1;                                         \
70         }                                                               \
71 } while (0)
72
73 #define var_printf(_var, fmt)   sysfs_printf(_var, fmt, var(_var))
74 #define var_print(_var)         sysfs_print(_var, var(_var))
75 #define var_hprint(_var)        sysfs_hprint(_var, var(_var))
76
77 #define sysfs_strtoul(file, var)                                        \
78 do {                                                                    \
79         if (attr == &sysfs_ ## file)                                    \
80                 return strtoul_safe(buf, var) ?: (ssize_t) size;        \
81 } while (0)
82
83 #define sysfs_strtoul_clamp(file, var, min, max)                        \
84 do {                                                                    \
85         if (attr == &sysfs_ ## file)                                    \
86                 return strtoul_safe_clamp(buf, var, min, max)           \
87                         ?: (ssize_t) size;                              \
88 } while (0)
89
90 #define strtoul_or_return(cp)                                           \
91 ({                                                                      \
92         unsigned long _v;                                               \
93         int _r = kstrtoul(cp, 10, &_v);                                 \
94         if (_r)                                                         \
95                 return _r;                                              \
96         _v;                                                             \
97 })
98
99 #define strtoul_restrict_or_return(cp, min, max)                        \
100 ({                                                                      \
101         unsigned long __v = 0;                                          \
102         int _r = strtoul_safe_restrict(cp, __v, min, max);              \
103         if (_r)                                                         \
104                 return _r;                                              \
105         __v;                                                            \
106 })
107
108 #define strtoi_h_or_return(cp)                                          \
109 ({                                                                      \
110         u64 _v;                                                         \
111         int _r = strtoi_h(cp, &_v);                                     \
112         if (_r)                                                         \
113                 return _r;                                              \
114         _v;                                                             \
115 })
116
117 #define sysfs_hatoi(file, var)                                          \
118 do {                                                                    \
119         if (attr == &sysfs_ ## file)                                    \
120                 return strtoi_h(buf, &var) ?: (ssize_t) size;           \
121 } while (0)
122
123 write_attribute(trigger_btree_coalesce);
124 write_attribute(trigger_gc);
125 write_attribute(prune_cache);
126
127 read_attribute(uuid);
128 read_attribute(minor);
129 read_attribute(bucket_size);
130 read_attribute(bucket_size_bytes);
131 read_attribute(block_size);
132 read_attribute(block_size_bytes);
133 read_attribute(btree_node_size);
134 read_attribute(btree_node_size_bytes);
135 read_attribute(first_bucket);
136 read_attribute(nbuckets);
137 read_attribute(tree_depth);
138 read_attribute(root_usage_percent);
139 read_attribute(read_priority_stats);
140 read_attribute(write_priority_stats);
141 read_attribute(fragmentation_stats);
142 read_attribute(oldest_gen_stats);
143 read_attribute(reserve_stats);
144 read_attribute(btree_cache_size);
145 read_attribute(cache_available_percent);
146 read_attribute(compression_stats);
147 read_attribute(written);
148 read_attribute(btree_written);
149 read_attribute(metadata_written);
150 read_attribute(journal_debug);
151 write_attribute(journal_flush);
152 read_attribute(internal_uuid);
153
154 read_attribute(btree_gc_running);
155
156 read_attribute(btree_nodes);
157 read_attribute(btree_used_percent);
158 read_attribute(average_key_size);
159 read_attribute(available_buckets);
160 read_attribute(free_buckets);
161 read_attribute(dirty_data);
162 read_attribute(dirty_bytes);
163 read_attribute(dirty_buckets);
164 read_attribute(cached_data);
165 read_attribute(cached_bytes);
166 read_attribute(cached_buckets);
167 read_attribute(meta_buckets);
168 read_attribute(alloc_buckets);
169 read_attribute(has_data);
170 read_attribute(has_metadata);
171 read_attribute(bset_tree_stats);
172 read_attribute(alloc_debug);
173
174 read_attribute(cache_read_races);
175
176 rw_attribute(journal_write_delay_ms);
177 rw_attribute(journal_reclaim_delay_ms);
178 read_attribute(journal_entry_size_max);
179
180 rw_attribute(discard);
181 rw_attribute(cache_replacement_policy);
182
183 rw_attribute(foreground_write_ratelimit_enabled);
184 rw_attribute(copy_gc_enabled);
185 sysfs_pd_controller_attribute(copy_gc);
186
187 rw_attribute(tier);
188 rw_attribute(tiering_enabled);
189 rw_attribute(tiering_percent);
190 sysfs_pd_controller_attribute(tiering);
191
192 sysfs_pd_controller_attribute(foreground_write);
193
194 rw_attribute(pd_controllers_update_seconds);
195
196 rw_attribute(foreground_target_percent);
197
198 read_attribute(meta_replicas_have);
199 read_attribute(data_replicas_have);
200
201 #define BCH_DEBUG_PARAM(name, description)                              \
202         rw_attribute(name);
203
204         BCH_DEBUG_PARAMS()
205 #undef BCH_DEBUG_PARAM
206
207 #define BCH_OPT(_name, _mode, ...)                                      \
208         static struct attribute sysfs_opt_##_name = {                   \
209                 .name = #_name, .mode = _mode,                          \
210         };
211
212         BCH_VISIBLE_OPTS()
213 #undef BCH_OPT
214
215 #define BCH_TIME_STAT(name, frequency_units, duration_units)            \
216         sysfs_time_stats_attribute(name, frequency_units, duration_units);
217         BCH_TIME_STATS()
218 #undef BCH_TIME_STAT
219
220 static struct attribute sysfs_state_rw = {
221         .name = "state",
222         .mode = S_IRUGO
223 };
224
225 static int bch2_bset_print_stats(struct bch_fs *c, char *buf)
226 {
227         struct bset_stats stats;
228         size_t nodes = 0;
229         struct btree *b;
230         struct bucket_table *tbl;
231         struct rhash_head *pos;
232         unsigned iter;
233
234         memset(&stats, 0, sizeof(stats));
235
236         rcu_read_lock();
237         for_each_cached_btree(b, c, tbl, iter, pos) {
238                 bch2_btree_keys_stats(b, &stats);
239                 nodes++;
240         }
241         rcu_read_unlock();
242
243         return snprintf(buf, PAGE_SIZE,
244                         "btree nodes:           %zu\n"
245                         "written sets:          %zu\n"
246                         "written key bytes:     %zu\n"
247                         "unwritten sets:                %zu\n"
248                         "unwritten key bytes:   %zu\n"
249                         "no table sets:         %zu\n"
250                         "no table key bytes:    %zu\n"
251                         "floats:                        %zu\n"
252                         "failed unpacked:       %zu\n"
253                         "failed prev:           %zu\n"
254                         "failed overflow:       %zu\n",
255                         nodes,
256                         stats.sets[BSET_RO_AUX_TREE].nr,
257                         stats.sets[BSET_RO_AUX_TREE].bytes,
258                         stats.sets[BSET_RW_AUX_TREE].nr,
259                         stats.sets[BSET_RW_AUX_TREE].bytes,
260                         stats.sets[BSET_NO_AUX_TREE].nr,
261                         stats.sets[BSET_NO_AUX_TREE].bytes,
262                         stats.floats,
263                         stats.failed_unpacked,
264                         stats.failed_prev,
265                         stats.failed_overflow);
266 }
267
268 static unsigned bch2_root_usage(struct bch_fs *c)
269 {
270         unsigned bytes = 0;
271         struct bkey_packed *k;
272         struct btree *b;
273         struct btree_node_iter iter;
274
275         goto lock_root;
276
277         do {
278                 six_unlock_read(&b->lock);
279 lock_root:
280                 b = c->btree_roots[BTREE_ID_EXTENTS].b;
281                 six_lock_read(&b->lock);
282         } while (b != c->btree_roots[BTREE_ID_EXTENTS].b);
283
284         for_each_btree_node_key(b, k, &iter, btree_node_is_extents(b))
285                 bytes += bkey_bytes(k);
286
287         six_unlock_read(&b->lock);
288
289         return (bytes * 100) / btree_bytes(c);
290 }
291
292 static size_t bch2_btree_cache_size(struct bch_fs *c)
293 {
294         size_t ret = 0;
295         struct btree *b;
296
297         mutex_lock(&c->btree_cache_lock);
298         list_for_each_entry(b, &c->btree_cache, list)
299                 ret += btree_bytes(c);
300
301         mutex_unlock(&c->btree_cache_lock);
302         return ret;
303 }
304
305 static unsigned bch2_fs_available_percent(struct bch_fs *c)
306 {
307         return div64_u64((u64) sectors_available(c) * 100,
308                          c->capacity ?: 1);
309 }
310
311 #if 0
312 static unsigned bch2_btree_used(struct bch_fs *c)
313 {
314         return div64_u64(c->gc_stats.key_bytes * 100,
315                          (c->gc_stats.nodes ?: 1) * btree_bytes(c));
316 }
317
318 static unsigned bch2_average_key_size(struct bch_fs *c)
319 {
320         return c->gc_stats.nkeys
321                 ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
322                 : 0;
323 }
324 #endif
325
326 static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
327 {
328         struct bch_fs_usage stats = bch2_fs_usage_read(c);
329
330         return scnprintf(buf, PAGE_SIZE,
331                          "capacity:\t\t%llu\n"
332                          "compressed:\n"
333                          "\tmeta:\t\t%llu\n"
334                          "\tdirty:\t\t%llu\n"
335                          "\tcached:\t\t%llu\n"
336                          "uncompressed:\n"
337                          "\tmeta:\t\t%llu\n"
338                          "\tdirty:\t\t%llu\n"
339                          "\tcached:\t\t%llu\n"
340                          "persistent reserved sectors:\t%llu\n"
341                          "online reserved sectors:\t%llu\n",
342                          c->capacity,
343                          stats.s[S_COMPRESSED][S_META],
344                          stats.s[S_COMPRESSED][S_DIRTY],
345                          stats.s[S_COMPRESSED][S_CACHED],
346                          stats.s[S_UNCOMPRESSED][S_META],
347                          stats.s[S_UNCOMPRESSED][S_DIRTY],
348                          stats.s[S_UNCOMPRESSED][S_CACHED],
349                          stats.persistent_reserved,
350                          stats.online_reserved);
351 }
352
353 static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
354 {
355         struct btree_iter iter;
356         struct bkey_s_c k;
357         u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
358             nr_compressed_extents = 0,
359             compressed_sectors_compressed = 0,
360             compressed_sectors_uncompressed = 0;
361
362         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, k)
363                 if (k.k->type == BCH_EXTENT) {
364                         struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
365                         const struct bch_extent_ptr *ptr;
366                         const union bch_extent_crc *crc;
367
368                         extent_for_each_ptr_crc(e, ptr, crc) {
369                                 if (crc_compression_type(crc) == BCH_COMPRESSION_NONE) {
370                                         nr_uncompressed_extents++;
371                                         uncompressed_sectors += e.k->size;
372                                 } else {
373                                         nr_compressed_extents++;
374                                         compressed_sectors_compressed +=
375                                                 crc_compressed_size(e.k, crc);
376                                         compressed_sectors_uncompressed +=
377                                                 crc_uncompressed_size(e.k, crc);
378                                 }
379
380                                 /* only looking at the first ptr */
381                                 break;
382                         }
383                 }
384         bch2_btree_iter_unlock(&iter);
385
386         return snprintf(buf, PAGE_SIZE,
387                         "uncompressed data:\n"
388                         "       nr extents:                     %llu\n"
389                         "       size (bytes):                   %llu\n"
390                         "compressed data:\n"
391                         "       nr extents:                     %llu\n"
392                         "       compressed size (bytes):        %llu\n"
393                         "       uncompressed size (bytes):      %llu\n",
394                         nr_uncompressed_extents,
395                         uncompressed_sectors << 9,
396                         nr_compressed_extents,
397                         compressed_sectors_compressed << 9,
398                         compressed_sectors_uncompressed << 9);
399 }
400
401 SHOW(bch2_fs)
402 {
403         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
404
405         sysfs_print(minor,                      c->minor);
406
407         sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
408         sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
409         sysfs_hprint(journal_entry_size_max,    c->journal.entry_size_max);
410
411         sysfs_hprint(block_size,                block_bytes(c));
412         sysfs_print(block_size_bytes,           block_bytes(c));
413         sysfs_hprint(btree_node_size,           c->sb.btree_node_size << 9);
414         sysfs_print(btree_node_size_bytes,      c->sb.btree_node_size << 9);
415
416         sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
417         sysfs_print(cache_available_percent,    bch2_fs_available_percent(c));
418
419         sysfs_print(btree_gc_running,           c->gc_pos.phase != GC_PHASE_DONE);
420
421 #if 0
422         /* XXX: reimplement */
423         sysfs_print(btree_used_percent, bch2_btree_used(c));
424         sysfs_print(btree_nodes,        c->gc_stats.nodes);
425         sysfs_hprint(average_key_size,  bch2_average_key_size(c));
426 #endif
427
428         sysfs_print(cache_read_races,
429                     atomic_long_read(&c->cache_read_races));
430
431         sysfs_printf(foreground_write_ratelimit_enabled, "%i",
432                      c->foreground_write_ratelimit_enabled);
433         sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
434         sysfs_pd_controller_show(foreground_write, &c->foreground_write_pd);
435
436         sysfs_print(pd_controllers_update_seconds,
437                     c->pd_controllers_update_seconds);
438         sysfs_print(foreground_target_percent, c->foreground_target_percent);
439
440         sysfs_printf(tiering_enabled,           "%i", c->tiering_enabled);
441         sysfs_print(tiering_percent,            c->tiering_percent);
442
443         sysfs_pd_controller_show(tiering,       &c->tiers[1].pd); /* XXX */
444
445         sysfs_printf(meta_replicas_have, "%u",  c->sb.meta_replicas_have);
446         sysfs_printf(data_replicas_have, "%u",  c->sb.data_replicas_have);
447
448         /* Debugging: */
449
450         if (attr == &sysfs_journal_debug)
451                 return bch2_journal_print_debug(&c->journal, buf);
452
453 #define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
454         BCH_DEBUG_PARAMS()
455 #undef BCH_DEBUG_PARAM
456
457         if (!bch2_fs_running(c))
458                 return -EPERM;
459
460         if (attr == &sysfs_bset_tree_stats)
461                 return bch2_bset_print_stats(c, buf);
462         if (attr == &sysfs_alloc_debug)
463                 return show_fs_alloc_debug(c, buf);
464
465         sysfs_print(tree_depth, c->btree_roots[BTREE_ID_EXTENTS].b->level);
466         sysfs_print(root_usage_percent,         bch2_root_usage(c));
467
468         if (attr == &sysfs_compression_stats)
469                 return bch2_compression_stats(c, buf);
470
471         sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
472
473         return 0;
474 }
475
476 STORE(__bch2_fs)
477 {
478         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
479
480         sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
481         sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
482
483         sysfs_strtoul(foreground_write_ratelimit_enabled,
484                       c->foreground_write_ratelimit_enabled);
485
486         if (attr == &sysfs_copy_gc_enabled) {
487                 struct bch_dev *ca;
488                 unsigned i;
489                 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
490                         ?: (ssize_t) size;
491
492                 for_each_member_device(ca, c, i)
493                         if (ca->moving_gc_read)
494                                 wake_up_process(ca->moving_gc_read);
495                 return ret;
496         }
497
498         if (attr == &sysfs_tiering_enabled) {
499                 ssize_t ret = strtoul_safe(buf, c->tiering_enabled)
500                         ?: (ssize_t) size;
501
502                 bch2_tiering_start(c); /* issue wakeups */
503                 return ret;
504         }
505
506         sysfs_pd_controller_store(foreground_write, &c->foreground_write_pd);
507
508         sysfs_strtoul(pd_controllers_update_seconds,
509                       c->pd_controllers_update_seconds);
510         sysfs_strtoul(foreground_target_percent, c->foreground_target_percent);
511
512         sysfs_strtoul(tiering_percent,          c->tiering_percent);
513         sysfs_pd_controller_store(tiering,      &c->tiers[1].pd); /* XXX */
514
515         /* Debugging: */
516
517 #define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
518         BCH_DEBUG_PARAMS()
519 #undef BCH_DEBUG_PARAM
520
521         if (!bch2_fs_running(c))
522                 return -EPERM;
523
524         if (attr == &sysfs_journal_flush) {
525                 bch2_journal_meta_async(&c->journal, NULL);
526
527                 return size;
528         }
529
530         if (attr == &sysfs_trigger_btree_coalesce)
531                 bch2_coalesce(c);
532
533         /* Debugging: */
534
535         if (attr == &sysfs_trigger_gc)
536                 bch2_gc(c);
537
538         if (attr == &sysfs_prune_cache) {
539                 struct shrink_control sc;
540
541                 sc.gfp_mask = GFP_KERNEL;
542                 sc.nr_to_scan = strtoul_or_return(buf);
543                 c->btree_cache_shrink.scan_objects(&c->btree_cache_shrink, &sc);
544         }
545
546         return size;
547 }
548
549 STORE(bch2_fs)
550 {
551         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
552
553         mutex_lock(&c->state_lock);
554         size = __bch2_fs_store(kobj, attr, buf, size);
555         mutex_unlock(&c->state_lock);
556
557         return size;
558 }
559 SYSFS_OPS(bch2_fs);
560
561 struct attribute *bch2_fs_files[] = {
562         &sysfs_journal_write_delay_ms,
563         &sysfs_journal_reclaim_delay_ms,
564         &sysfs_journal_entry_size_max,
565
566         &sysfs_block_size,
567         &sysfs_block_size_bytes,
568         &sysfs_btree_node_size,
569         &sysfs_btree_node_size_bytes,
570         &sysfs_tree_depth,
571         &sysfs_root_usage_percent,
572         &sysfs_btree_cache_size,
573         &sysfs_cache_available_percent,
574         &sysfs_compression_stats,
575
576         &sysfs_average_key_size,
577
578         &sysfs_meta_replicas_have,
579         &sysfs_data_replicas_have,
580
581         &sysfs_foreground_target_percent,
582         &sysfs_tiering_percent,
583
584         &sysfs_journal_flush,
585         NULL
586 };
587
588 /* internal dir - just a wrapper */
589
590 SHOW(bch2_fs_internal)
591 {
592         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
593         return bch2_fs_show(&c->kobj, attr, buf);
594 }
595
596 STORE(bch2_fs_internal)
597 {
598         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
599         return bch2_fs_store(&c->kobj, attr, buf, size);
600 }
601 SYSFS_OPS(bch2_fs_internal);
602
603 struct attribute *bch2_fs_internal_files[] = {
604         &sysfs_journal_debug,
605
606         &sysfs_alloc_debug,
607
608         &sysfs_btree_gc_running,
609
610         &sysfs_btree_nodes,
611         &sysfs_btree_used_percent,
612
613         &sysfs_bset_tree_stats,
614         &sysfs_cache_read_races,
615
616         &sysfs_trigger_btree_coalesce,
617         &sysfs_trigger_gc,
618         &sysfs_prune_cache,
619         &sysfs_foreground_write_ratelimit_enabled,
620         &sysfs_copy_gc_enabled,
621         &sysfs_tiering_enabled,
622         sysfs_pd_controller_files(tiering),
623         sysfs_pd_controller_files(foreground_write),
624         &sysfs_internal_uuid,
625
626 #define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
627         BCH_DEBUG_PARAMS()
628 #undef BCH_DEBUG_PARAM
629
630         NULL
631 };
632
633 /* options */
634
635 SHOW(bch2_fs_opts_dir)
636 {
637         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
638
639         return bch2_opt_show(&c->opts, attr->name, buf, PAGE_SIZE);
640 }
641
642 STORE(bch2_fs_opts_dir)
643 {
644         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
645         const struct bch_option *opt;
646         enum bch_opt_id id;
647         u64 v;
648
649         id = bch2_parse_sysfs_opt(attr->name, buf, &v);
650         if (id < 0)
651                 return id;
652
653         opt = &bch2_opt_table[id];
654
655         mutex_lock(&c->sb_lock);
656
657         if (id == Opt_compression) {
658                 int ret = bch2_check_set_has_compressed_data(c, v);
659                 if (ret) {
660                         mutex_unlock(&c->sb_lock);
661                         return ret;
662                 }
663         }
664
665         if (opt->set_sb != SET_NO_SB_OPT) {
666                 opt->set_sb(c->disk_sb, v);
667                 bch2_write_super(c);
668         }
669
670         bch2_opt_set(&c->opts, id, v);
671
672         mutex_unlock(&c->sb_lock);
673
674         return size;
675 }
676 SYSFS_OPS(bch2_fs_opts_dir);
677
678 struct attribute *bch2_fs_opts_dir_files[] = {
679 #define BCH_OPT(_name, ...)                                             \
680         &sysfs_opt_##_name,
681
682         BCH_VISIBLE_OPTS()
683 #undef BCH_OPT
684
685         NULL
686 };
687
688 /* time stats */
689
690 SHOW(bch2_fs_time_stats)
691 {
692         struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
693
694 #define BCH_TIME_STAT(name, frequency_units, duration_units)            \
695         sysfs_print_time_stats(&c->name##_time, name,                   \
696                                frequency_units, duration_units);
697         BCH_TIME_STATS()
698 #undef BCH_TIME_STAT
699
700         return 0;
701 }
702
703 STORE(bch2_fs_time_stats)
704 {
705         struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
706
707 #define BCH_TIME_STAT(name, frequency_units, duration_units)            \
708         sysfs_clear_time_stats(&c->name##_time, name);
709         BCH_TIME_STATS()
710 #undef BCH_TIME_STAT
711
712         return size;
713 }
714 SYSFS_OPS(bch2_fs_time_stats);
715
716 struct attribute *bch2_fs_time_stats_files[] = {
717 #define BCH_TIME_STAT(name, frequency_units, duration_units)            \
718         sysfs_time_stats_attribute_list(name, frequency_units, duration_units)
719         BCH_TIME_STATS()
720 #undef BCH_TIME_STAT
721
722         NULL
723 };
724
725 typedef unsigned (bucket_map_fn)(struct bch_dev *, struct bucket *, void *);
726
727 static unsigned bucket_priority_fn(struct bch_dev *ca, struct bucket *g,
728                                    void *private)
729 {
730         int rw = (private ? 1 : 0);
731
732         return ca->fs->prio_clock[rw].hand - g->prio[rw];
733 }
734
735 static unsigned bucket_sectors_used_fn(struct bch_dev *ca, struct bucket *g,
736                                        void *private)
737 {
738         return bucket_sectors_used(g);
739 }
740
741 static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, struct bucket *g,
742                                      void *private)
743 {
744         return bucket_gc_gen(ca, g);
745 }
746
747 static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
748                               bucket_map_fn *fn, void *private)
749 {
750         int cmp(const void *l, const void *r)
751         {       return *((unsigned *) r) - *((unsigned *) l); }
752
753         size_t n = ca->mi.nbuckets, i;
754         /* Compute 31 quantiles */
755         unsigned q[31], *p;
756         ssize_t ret = 0;
757
758         p = vzalloc(ca->mi.nbuckets * sizeof(unsigned));
759         if (!p)
760                 return -ENOMEM;
761
762         for (i = ca->mi.first_bucket; i < n; i++)
763                 p[i] = fn(ca, &ca->buckets[i], private);
764
765         sort(p, n, sizeof(unsigned), cmp, NULL);
766
767         while (n &&
768                !p[n - 1])
769                 --n;
770
771         for (i = 0; i < ARRAY_SIZE(q); i++)
772                 q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
773
774         vfree(p);
775
776         for (i = 0; i < ARRAY_SIZE(q); i++)
777                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
778                                  "%u ", q[i]);
779         buf[ret - 1] = '\n';
780
781         return ret;
782
783 }
784
785 static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
786 {
787         enum alloc_reserve i;
788         ssize_t ret;
789
790         spin_lock(&ca->freelist_lock);
791
792         ret = scnprintf(buf, PAGE_SIZE,
793                         "free_inc:\t%zu\t%zu\n",
794                         fifo_used(&ca->free_inc),
795                         ca->free_inc.size);
796
797         for (i = 0; i < RESERVE_NR; i++)
798                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
799                                  "free[%u]:\t%zu\t%zu\n", i,
800                                  fifo_used(&ca->free[i]),
801                                  ca->free[i].size);
802
803         spin_unlock(&ca->freelist_lock);
804
805         return ret;
806 }
807
808 static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
809 {
810         struct bch_fs *c = ca->fs;
811         struct bch_dev_usage stats = bch2_dev_usage_read(ca);
812
813         return scnprintf(buf, PAGE_SIZE,
814                 "free_inc:               %zu/%zu\n"
815                 "free[RESERVE_PRIO]:     %zu/%zu\n"
816                 "free[RESERVE_BTREE]:    %zu/%zu\n"
817                 "free[RESERVE_MOVINGGC]: %zu/%zu\n"
818                 "free[RESERVE_NONE]:     %zu/%zu\n"
819                 "alloc:                  %llu/%llu\n"
820                 "meta:                   %llu/%llu\n"
821                 "dirty:                  %llu/%llu\n"
822                 "available:              %llu/%llu\n"
823                 "freelist_wait:          %s\n"
824                 "open buckets:           %u/%u (reserved %u)\n"
825                 "open_buckets_wait:      %s\n",
826                 fifo_used(&ca->free_inc),               ca->free_inc.size,
827                 fifo_used(&ca->free[RESERVE_PRIO]),     ca->free[RESERVE_PRIO].size,
828                 fifo_used(&ca->free[RESERVE_BTREE]),    ca->free[RESERVE_BTREE].size,
829                 fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
830                 fifo_used(&ca->free[RESERVE_NONE]),     ca->free[RESERVE_NONE].size,
831                 stats.buckets_alloc,                    ca->mi.nbuckets - ca->mi.first_bucket,
832                 stats.buckets_meta,                     ca->mi.nbuckets - ca->mi.first_bucket,
833                 stats.buckets_dirty,                    ca->mi.nbuckets - ca->mi.first_bucket,
834                 __dev_buckets_available(ca, stats),     ca->mi.nbuckets - ca->mi.first_bucket,
835                 c->freelist_wait.list.first             ? "waiting" : "empty",
836                 c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
837                 c->open_buckets_wait.list.first         ? "waiting" : "empty");
838 }
839
840 static u64 sectors_written(struct bch_dev *ca)
841 {
842         u64 ret = 0;
843         int cpu;
844
845         for_each_possible_cpu(cpu)
846                 ret += *per_cpu_ptr(ca->sectors_written, cpu);
847
848         return ret;
849 }
850
851 SHOW(bch2_dev)
852 {
853         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
854         struct bch_fs *c = ca->fs;
855         struct bch_dev_usage stats = bch2_dev_usage_read(ca);
856
857         sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
858
859         sysfs_hprint(bucket_size,       bucket_bytes(ca));
860         sysfs_print(bucket_size_bytes,  bucket_bytes(ca));
861         sysfs_hprint(block_size,        block_bytes(c));
862         sysfs_print(block_size_bytes,   block_bytes(c));
863         sysfs_print(first_bucket,       ca->mi.first_bucket);
864         sysfs_print(nbuckets,           ca->mi.nbuckets);
865         sysfs_print(discard,            ca->mi.discard);
866         sysfs_hprint(written, sectors_written(ca) << 9);
867         sysfs_hprint(btree_written,
868                      atomic64_read(&ca->btree_sectors_written) << 9);
869         sysfs_hprint(metadata_written,
870                      (atomic64_read(&ca->meta_sectors_written) +
871                       atomic64_read(&ca->btree_sectors_written)) << 9);
872
873         sysfs_hprint(dirty_data,        stats.sectors[S_DIRTY] << 9);
874         sysfs_print(dirty_bytes,        stats.sectors[S_DIRTY] << 9);
875         sysfs_print(dirty_buckets,      stats.buckets_dirty);
876         sysfs_hprint(cached_data,       stats.sectors[S_CACHED] << 9);
877         sysfs_print(cached_bytes,       stats.sectors[S_CACHED] << 9);
878         sysfs_print(cached_buckets,     stats.buckets_cached);
879         sysfs_print(meta_buckets,       stats.buckets_meta);
880         sysfs_print(alloc_buckets,      stats.buckets_alloc);
881         sysfs_print(available_buckets,  dev_buckets_available(ca));
882         sysfs_print(free_buckets,       dev_buckets_free(ca));
883         sysfs_print(has_data,           ca->mi.has_data);
884         sysfs_print(has_metadata,       ca->mi.has_metadata);
885
886         sysfs_pd_controller_show(copy_gc, &ca->moving_gc_pd);
887
888         if (attr == &sysfs_cache_replacement_policy)
889                 return bch2_snprint_string_list(buf, PAGE_SIZE,
890                                                 bch2_cache_replacement_policies,
891                                                 ca->mi.replacement);
892
893         sysfs_print(tier,               ca->mi.tier);
894
895         if (attr == &sysfs_state_rw)
896                 return bch2_snprint_string_list(buf, PAGE_SIZE,
897                                                 bch2_dev_state,
898                                                 ca->mi.state);
899
900         if (attr == &sysfs_read_priority_stats)
901                 return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
902         if (attr == &sysfs_write_priority_stats)
903                 return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
904         if (attr == &sysfs_fragmentation_stats)
905                 return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
906         if (attr == &sysfs_oldest_gen_stats)
907                 return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
908         if (attr == &sysfs_reserve_stats)
909                 return show_reserve_stats(ca, buf);
910         if (attr == &sysfs_alloc_debug)
911                 return show_dev_alloc_debug(ca, buf);
912
913         return 0;
914 }
915
916 STORE(bch2_dev)
917 {
918         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
919         struct bch_fs *c = ca->fs;
920         struct bch_member *mi;
921
922         sysfs_pd_controller_store(copy_gc, &ca->moving_gc_pd);
923
924         if (attr == &sysfs_discard) {
925                 bool v = strtoul_or_return(buf);
926
927                 mutex_lock(&c->sb_lock);
928                 mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
929
930                 if (v != BCH_MEMBER_DISCARD(mi)) {
931                         SET_BCH_MEMBER_DISCARD(mi, v);
932                         bch2_write_super(c);
933                 }
934                 mutex_unlock(&c->sb_lock);
935         }
936
937         if (attr == &sysfs_cache_replacement_policy) {
938                 ssize_t v = bch2_read_string_list(buf, bch2_cache_replacement_policies);
939
940                 if (v < 0)
941                         return v;
942
943                 mutex_lock(&c->sb_lock);
944                 mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
945
946                 if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
947                         SET_BCH_MEMBER_REPLACEMENT(mi, v);
948                         bch2_write_super(c);
949                 }
950                 mutex_unlock(&c->sb_lock);
951         }
952
953         if (attr == &sysfs_tier) {
954                 unsigned prev_tier;
955                 unsigned v = strtoul_restrict_or_return(buf,
956                                         0, BCH_TIER_MAX - 1);
957
958                 mutex_lock(&c->sb_lock);
959                 prev_tier = ca->mi.tier;
960
961                 if (v == ca->mi.tier) {
962                         mutex_unlock(&c->sb_lock);
963                         return size;
964                 }
965
966                 mi = &bch2_sb_get_members(c->disk_sb)->members[ca->dev_idx];
967                 SET_BCH_MEMBER_TIER(mi, v);
968                 bch2_write_super(c);
969
970                 bch2_dev_group_remove(&c->tiers[prev_tier].devs, ca);
971                 bch2_dev_group_add(&c->tiers[ca->mi.tier].devs, ca);
972                 mutex_unlock(&c->sb_lock);
973
974                 bch2_recalc_capacity(c);
975                 bch2_tiering_start(c);
976         }
977
978         return size;
979 }
980 SYSFS_OPS(bch2_dev);
981
982 struct attribute *bch2_dev_files[] = {
983         &sysfs_uuid,
984         &sysfs_bucket_size,
985         &sysfs_bucket_size_bytes,
986         &sysfs_block_size,
987         &sysfs_block_size_bytes,
988         &sysfs_first_bucket,
989         &sysfs_nbuckets,
990         &sysfs_read_priority_stats,
991         &sysfs_write_priority_stats,
992         &sysfs_fragmentation_stats,
993         &sysfs_oldest_gen_stats,
994         &sysfs_reserve_stats,
995         &sysfs_available_buckets,
996         &sysfs_free_buckets,
997         &sysfs_dirty_data,
998         &sysfs_dirty_bytes,
999         &sysfs_dirty_buckets,
1000         &sysfs_cached_data,
1001         &sysfs_cached_bytes,
1002         &sysfs_cached_buckets,
1003         &sysfs_meta_buckets,
1004         &sysfs_alloc_buckets,
1005         &sysfs_has_data,
1006         &sysfs_has_metadata,
1007         &sysfs_discard,
1008         &sysfs_written,
1009         &sysfs_btree_written,
1010         &sysfs_metadata_written,
1011         &sysfs_cache_replacement_policy,
1012         &sysfs_tier,
1013         &sysfs_state_rw,
1014         &sysfs_alloc_debug,
1015
1016         sysfs_pd_controller_files(copy_gc),
1017         NULL
1018 };