]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/sysfs.c
Update bcachefs sources to d7f6da1d60 bcachefs: fix missing include
[bcachefs-tools-debian] / libbcachefs / sysfs.c
1 /*
2  * bcache sysfs interfaces
3  *
4  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
5  * Copyright 2012 Google, Inc.
6  */
7
8 #ifndef NO_BCACHEFS_SYSFS
9
10 #include "bcachefs.h"
11 #include "alloc_background.h"
12 #include "compress.h"
13 #include "sysfs.h"
14 #include "btree_cache.h"
15 #include "btree_io.h"
16 #include "btree_iter.h"
17 #include "btree_update.h"
18 #include "btree_update_interior.h"
19 #include "btree_gc.h"
20 #include "buckets.h"
21 #include "disk_groups.h"
22 #include "inode.h"
23 #include "journal.h"
24 #include "keylist.h"
25 #include "move.h"
26 #include "opts.h"
27 #include "rebalance.h"
28 #include "replicas.h"
29 #include "super-io.h"
30 #include "tests.h"
31
32 #include <linux/blkdev.h>
33 #include <linux/sort.h>
34 #include <linux/sched/clock.h>
35
36 #include "util.h"
37
38 #define SYSFS_OPS(type)                                                 \
39 struct sysfs_ops type ## _sysfs_ops = {                                 \
40         .show   = type ## _show,                                        \
41         .store  = type ## _store                                        \
42 }
43
44 #define SHOW(fn)                                                        \
45 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
46                            char *buf)                                   \
47
48 #define STORE(fn)                                                       \
49 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
50                             const char *buf, size_t size)               \
51
52 #define __sysfs_attribute(_name, _mode)                                 \
53         static struct attribute sysfs_##_name =                         \
54                 { .name = #_name, .mode = _mode }
55
56 #define write_attribute(n)      __sysfs_attribute(n, S_IWUSR)
57 #define read_attribute(n)       __sysfs_attribute(n, S_IRUGO)
58 #define rw_attribute(n)         __sysfs_attribute(n, S_IRUGO|S_IWUSR)
59
60 #define sysfs_printf(file, fmt, ...)                                    \
61 do {                                                                    \
62         if (attr == &sysfs_ ## file)                                    \
63                 return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
64 } while (0)
65
66 #define sysfs_print(file, var)                                          \
67 do {                                                                    \
68         if (attr == &sysfs_ ## file)                                    \
69                 return snprint(buf, PAGE_SIZE, var);                    \
70 } while (0)
71
72 #define sysfs_hprint(file, val)                                         \
73 do {                                                                    \
74         if (attr == &sysfs_ ## file) {                                  \
75                 ssize_t ret = bch2_hprint(buf, val);                    \
76                 strcat(buf, "\n");                                      \
77                 return ret + 1;                                         \
78         }                                                               \
79 } while (0)
80
81 #define var_printf(_var, fmt)   sysfs_printf(_var, fmt, var(_var))
82 #define var_print(_var)         sysfs_print(_var, var(_var))
83 #define var_hprint(_var)        sysfs_hprint(_var, var(_var))
84
85 #define sysfs_strtoul(file, var)                                        \
86 do {                                                                    \
87         if (attr == &sysfs_ ## file)                                    \
88                 return strtoul_safe(buf, var) ?: (ssize_t) size;        \
89 } while (0)
90
91 #define sysfs_strtoul_clamp(file, var, min, max)                        \
92 do {                                                                    \
93         if (attr == &sysfs_ ## file)                                    \
94                 return strtoul_safe_clamp(buf, var, min, max)           \
95                         ?: (ssize_t) size;                              \
96 } while (0)
97
98 #define strtoul_or_return(cp)                                           \
99 ({                                                                      \
100         unsigned long _v;                                               \
101         int _r = kstrtoul(cp, 10, &_v);                                 \
102         if (_r)                                                         \
103                 return _r;                                              \
104         _v;                                                             \
105 })
106
107 #define strtoul_restrict_or_return(cp, min, max)                        \
108 ({                                                                      \
109         unsigned long __v = 0;                                          \
110         int _r = strtoul_safe_restrict(cp, __v, min, max);              \
111         if (_r)                                                         \
112                 return _r;                                              \
113         __v;                                                            \
114 })
115
116 #define strtoi_h_or_return(cp)                                          \
117 ({                                                                      \
118         u64 _v;                                                         \
119         int _r = strtoi_h(cp, &_v);                                     \
120         if (_r)                                                         \
121                 return _r;                                              \
122         _v;                                                             \
123 })
124
125 #define sysfs_hatoi(file, var)                                          \
126 do {                                                                    \
127         if (attr == &sysfs_ ## file)                                    \
128                 return strtoi_h(buf, &var) ?: (ssize_t) size;           \
129 } while (0)
130
131 write_attribute(trigger_journal_flush);
132 write_attribute(trigger_btree_coalesce);
133 write_attribute(trigger_gc);
134 write_attribute(prune_cache);
135 rw_attribute(btree_gc_periodic);
136
137 read_attribute(uuid);
138 read_attribute(minor);
139 read_attribute(bucket_size);
140 read_attribute(block_size);
141 read_attribute(btree_node_size);
142 read_attribute(first_bucket);
143 read_attribute(nbuckets);
144 read_attribute(durability);
145 read_attribute(iodone);
146
147 read_attribute(io_latency_read);
148 read_attribute(io_latency_write);
149 read_attribute(io_latency_stats_read);
150 read_attribute(io_latency_stats_write);
151 read_attribute(congested);
152
153 read_attribute(bucket_quantiles_last_read);
154 read_attribute(bucket_quantiles_last_write);
155 read_attribute(bucket_quantiles_fragmentation);
156 read_attribute(bucket_quantiles_oldest_gen);
157
158 read_attribute(reserve_stats);
159 read_attribute(btree_cache_size);
160 read_attribute(compression_stats);
161 read_attribute(journal_debug);
162 read_attribute(journal_pins);
163 read_attribute(btree_updates);
164 read_attribute(dirty_btree_nodes);
165
166 read_attribute(internal_uuid);
167
168 read_attribute(has_data);
169 read_attribute(alloc_debug);
170 write_attribute(wake_allocator);
171
172 read_attribute(read_realloc_races);
173 read_attribute(extent_migrate_done);
174 read_attribute(extent_migrate_raced);
175
176 rw_attribute(journal_write_delay_ms);
177 rw_attribute(journal_reclaim_delay_ms);
178
179 rw_attribute(discard);
180 rw_attribute(cache_replacement_policy);
181 rw_attribute(label);
182
183 rw_attribute(copy_gc_enabled);
184 sysfs_pd_controller_attribute(copy_gc);
185
186 rw_attribute(rebalance_enabled);
187 sysfs_pd_controller_attribute(rebalance);
188 read_attribute(rebalance_work);
189 rw_attribute(promote_whole_extents);
190
191 rw_attribute(pd_controllers_update_seconds);
192
193 read_attribute(meta_replicas_have);
194 read_attribute(data_replicas_have);
195
196 #ifdef CONFIG_BCACHEFS_TESTS
197 write_attribute(perf_test);
198 #endif /* CONFIG_BCACHEFS_TESTS */
199
200 #define BCH_DEBUG_PARAM(name, description)                              \
201         rw_attribute(name);
202
203         BCH_DEBUG_PARAMS()
204 #undef BCH_DEBUG_PARAM
205
206 #define x(_name)                                                \
207         static struct attribute sysfs_time_stat_##_name =               \
208                 { .name = #_name, .mode = S_IRUGO };
209         BCH_TIME_STATS()
210 #undef x
211
212 static struct attribute sysfs_state_rw = {
213         .name = "state",
214         .mode = S_IRUGO
215 };
216
217 static size_t bch2_btree_cache_size(struct bch_fs *c)
218 {
219         size_t ret = 0;
220         struct btree *b;
221
222         mutex_lock(&c->btree_cache.lock);
223         list_for_each_entry(b, &c->btree_cache.live, list)
224                 ret += btree_bytes(c);
225
226         mutex_unlock(&c->btree_cache.lock);
227         return ret;
228 }
229
230 static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
231 {
232         char *out = buf, *end = buf + PAGE_SIZE;
233         struct bch_fs_usage stats = bch2_fs_usage_read(c);
234         unsigned replicas, type;
235
236         out += scnprintf(out, end - out,
237                          "capacity:\t\t%llu\n",
238                          c->capacity);
239
240         for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) {
241                 out += scnprintf(out, end - out,
242                                  "%u replicas:\n",
243                                  replicas + 1);
244
245                 for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
246                         out += scnprintf(out, end - out,
247                                          "\t%s:\t\t%llu\n",
248                                          bch2_data_types[type],
249                                          stats.replicas[replicas].data[type]);
250                 out += scnprintf(out, end - out,
251                                  "\treserved:\t%llu\n",
252                                  stats.replicas[replicas].persistent_reserved);
253         }
254
255         out += scnprintf(out, end - out, "bucket usage\n");
256
257         for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
258                 out += scnprintf(out, end - out,
259                                  "\t%s:\t\t%llu\n",
260                                  bch2_data_types[type],
261                                  stats.buckets[type]);
262
263         out += scnprintf(out, end - out,
264                          "online reserved:\t%llu\n",
265                          stats.online_reserved);
266
267         return out - buf;
268 }
269
270 static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
271 {
272         struct btree_iter iter;
273         struct bkey_s_c k;
274         u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
275             nr_compressed_extents = 0,
276             compressed_sectors_compressed = 0,
277             compressed_sectors_uncompressed = 0;
278
279         if (!bch2_fs_running(c))
280                 return -EPERM;
281
282         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
283                 if (k.k->type == BCH_EXTENT) {
284                         struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
285                         const struct bch_extent_ptr *ptr;
286                         struct bch_extent_crc_unpacked crc;
287
288                         extent_for_each_ptr_crc(e, ptr, crc) {
289                                 if (crc.compression_type == BCH_COMPRESSION_NONE) {
290                                         nr_uncompressed_extents++;
291                                         uncompressed_sectors += e.k->size;
292                                 } else {
293                                         nr_compressed_extents++;
294                                         compressed_sectors_compressed +=
295                                                 crc.compressed_size;
296                                         compressed_sectors_uncompressed +=
297                                                 crc.uncompressed_size;
298                                 }
299
300                                 /* only looking at the first ptr */
301                                 break;
302                         }
303                 }
304         bch2_btree_iter_unlock(&iter);
305
306         return scnprintf(buf, PAGE_SIZE,
307                         "uncompressed data:\n"
308                         "       nr extents:                     %llu\n"
309                         "       size (bytes):                   %llu\n"
310                         "compressed data:\n"
311                         "       nr extents:                     %llu\n"
312                         "       compressed size (bytes):        %llu\n"
313                         "       uncompressed size (bytes):      %llu\n",
314                         nr_uncompressed_extents,
315                         uncompressed_sectors << 9,
316                         nr_compressed_extents,
317                         compressed_sectors_compressed << 9,
318                         compressed_sectors_uncompressed << 9);
319 }
320
321 SHOW(bch2_fs)
322 {
323         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
324
325         sysfs_print(minor,                      c->minor);
326         sysfs_printf(internal_uuid, "%pU",      c->sb.uuid.b);
327
328         sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
329         sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
330
331         sysfs_print(block_size,                 block_bytes(c));
332         sysfs_print(btree_node_size,            btree_bytes(c));
333         sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
334
335         sysfs_print(read_realloc_races,
336                     atomic_long_read(&c->read_realloc_races));
337         sysfs_print(extent_migrate_done,
338                     atomic_long_read(&c->extent_migrate_done));
339         sysfs_print(extent_migrate_raced,
340                     atomic_long_read(&c->extent_migrate_raced));
341
342         sysfs_printf(btree_gc_periodic, "%u",   (int) c->btree_gc_periodic);
343
344         sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
345
346         sysfs_print(pd_controllers_update_seconds,
347                     c->pd_controllers_update_seconds);
348
349         sysfs_printf(rebalance_enabled,         "%i", c->rebalance.enabled);
350         sysfs_pd_controller_show(rebalance,     &c->rebalance.pd); /* XXX */
351
352         if (attr == &sysfs_rebalance_work)
353                 return bch2_rebalance_work_show(c, buf);
354
355         sysfs_print(promote_whole_extents,      c->promote_whole_extents);
356
357         sysfs_printf(meta_replicas_have, "%u",  bch2_replicas_online(c, true));
358         sysfs_printf(data_replicas_have, "%u",  bch2_replicas_online(c, false));
359
360         /* Debugging: */
361
362         if (attr == &sysfs_alloc_debug)
363                 return show_fs_alloc_debug(c, buf);
364
365         if (attr == &sysfs_journal_debug)
366                 return bch2_journal_print_debug(&c->journal, buf);
367
368         if (attr == &sysfs_journal_pins)
369                 return bch2_journal_print_pins(&c->journal, buf);
370
371         if (attr == &sysfs_btree_updates)
372                 return bch2_btree_updates_print(c, buf);
373
374         if (attr == &sysfs_dirty_btree_nodes)
375                 return bch2_dirty_btree_nodes_print(c, buf);
376
377         if (attr == &sysfs_compression_stats)
378                 return bch2_compression_stats(c, buf);
379
380 #define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
381         BCH_DEBUG_PARAMS()
382 #undef BCH_DEBUG_PARAM
383
384         return 0;
385 }
386
387 STORE(__bch2_fs)
388 {
389         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
390
391         sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
392         sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
393
394         if (attr == &sysfs_btree_gc_periodic) {
395                 ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
396                         ?: (ssize_t) size;
397
398                 wake_up_process(c->gc_thread);
399                 return ret;
400         }
401
402         if (attr == &sysfs_copy_gc_enabled) {
403                 struct bch_dev *ca;
404                 unsigned i;
405                 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
406                         ?: (ssize_t) size;
407
408                 for_each_member_device(ca, c, i)
409                         if (ca->copygc_thread)
410                                 wake_up_process(ca->copygc_thread);
411                 return ret;
412         }
413
414         if (attr == &sysfs_rebalance_enabled) {
415                 ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
416                         ?: (ssize_t) size;
417
418                 rebalance_wakeup(c);
419                 return ret;
420         }
421
422         sysfs_strtoul(pd_controllers_update_seconds,
423                       c->pd_controllers_update_seconds);
424         sysfs_pd_controller_store(rebalance,    &c->rebalance.pd);
425
426         sysfs_strtoul(promote_whole_extents,    c->promote_whole_extents);
427
428         /* Debugging: */
429
430 #define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
431         BCH_DEBUG_PARAMS()
432 #undef BCH_DEBUG_PARAM
433
434         if (!bch2_fs_running(c))
435                 return -EPERM;
436
437         /* Debugging: */
438
439         if (attr == &sysfs_trigger_journal_flush)
440                 bch2_journal_meta_async(&c->journal, NULL);
441
442         if (attr == &sysfs_trigger_btree_coalesce)
443                 bch2_coalesce(c);
444
445         if (attr == &sysfs_trigger_gc)
446                 bch2_gc(c);
447
448         if (attr == &sysfs_prune_cache) {
449                 struct shrink_control sc;
450
451                 sc.gfp_mask = GFP_KERNEL;
452                 sc.nr_to_scan = strtoul_or_return(buf);
453                 c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
454         }
455 #ifdef CONFIG_BCACHEFS_TESTS
456         if (attr == &sysfs_perf_test) {
457                 char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
458                 char *test              = strsep(&p, " \t\n");
459                 char *nr_str            = strsep(&p, " \t\n");
460                 char *threads_str       = strsep(&p, " \t\n");
461                 unsigned threads;
462                 u64 nr;
463                 int ret = -EINVAL;
464
465                 if (threads_str &&
466                     !(ret = kstrtouint(threads_str, 10, &threads)) &&
467                     !(ret = bch2_strtoull_h(nr_str, &nr)))
468                         bch2_btree_perf_test(c, test, nr, threads);
469                 else
470                         size = ret;
471                 kfree(tmp);
472         }
473 #endif
474         return size;
475 }
476
477 STORE(bch2_fs)
478 {
479         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
480
481         mutex_lock(&c->state_lock);
482         size = __bch2_fs_store(kobj, attr, buf, size);
483         mutex_unlock(&c->state_lock);
484
485         return size;
486 }
487 SYSFS_OPS(bch2_fs);
488
489 struct attribute *bch2_fs_files[] = {
490         &sysfs_minor,
491         &sysfs_block_size,
492         &sysfs_btree_node_size,
493         &sysfs_btree_cache_size,
494
495         &sysfs_meta_replicas_have,
496         &sysfs_data_replicas_have,
497
498         &sysfs_journal_write_delay_ms,
499         &sysfs_journal_reclaim_delay_ms,
500
501         &sysfs_promote_whole_extents,
502
503         &sysfs_compression_stats,
504
505 #ifdef CONFIG_BCACHEFS_TESTS
506         &sysfs_perf_test,
507 #endif
508         NULL
509 };
510
511 /* internal dir - just a wrapper */
512
513 SHOW(bch2_fs_internal)
514 {
515         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
516         return bch2_fs_show(&c->kobj, attr, buf);
517 }
518
519 STORE(bch2_fs_internal)
520 {
521         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
522         return bch2_fs_store(&c->kobj, attr, buf, size);
523 }
524 SYSFS_OPS(bch2_fs_internal);
525
526 struct attribute *bch2_fs_internal_files[] = {
527         &sysfs_alloc_debug,
528         &sysfs_journal_debug,
529         &sysfs_journal_pins,
530         &sysfs_btree_updates,
531         &sysfs_dirty_btree_nodes,
532
533         &sysfs_read_realloc_races,
534         &sysfs_extent_migrate_done,
535         &sysfs_extent_migrate_raced,
536
537         &sysfs_trigger_journal_flush,
538         &sysfs_trigger_btree_coalesce,
539         &sysfs_trigger_gc,
540         &sysfs_prune_cache,
541
542         &sysfs_copy_gc_enabled,
543
544         &sysfs_rebalance_enabled,
545         &sysfs_rebalance_work,
546         sysfs_pd_controller_files(rebalance),
547
548         &sysfs_internal_uuid,
549
550 #define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
551         BCH_DEBUG_PARAMS()
552 #undef BCH_DEBUG_PARAM
553
554         NULL
555 };
556
557 /* options */
558
559 SHOW(bch2_fs_opts_dir)
560 {
561         char *out = buf, *end = buf + PAGE_SIZE;
562         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
563         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
564         int id = opt - bch2_opt_table;
565         u64 v = bch2_opt_get_by_id(&c->opts, id);
566
567         out += bch2_opt_to_text(c, out, end - out, opt, v, OPT_SHOW_FULL_LIST);
568         out += scnprintf(out, end - out, "\n");
569
570         return out - buf;
571 }
572
573 STORE(bch2_fs_opts_dir)
574 {
575         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
576         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
577         int ret, id = opt - bch2_opt_table;
578         char *tmp;
579         u64 v;
580
581         tmp = kstrdup(buf, GFP_KERNEL);
582         if (!tmp)
583                 return -ENOMEM;
584
585         ret = bch2_opt_parse(c, opt, strim(tmp), &v);
586         kfree(tmp);
587
588         if (ret < 0)
589                 return ret;
590
591         if (id == Opt_compression ||
592             id == Opt_background_compression) {
593                 int ret = bch2_check_set_has_compressed_data(c, v);
594                 if (ret) {
595                         mutex_unlock(&c->sb_lock);
596                         return ret;
597                 }
598         }
599
600         if (opt->set_sb != SET_NO_SB_OPT) {
601                 mutex_lock(&c->sb_lock);
602                 opt->set_sb(c->disk_sb.sb, v);
603                 bch2_write_super(c);
604                 mutex_unlock(&c->sb_lock);
605         }
606
607         bch2_opt_set_by_id(&c->opts, id, v);
608
609         if ((id == Opt_background_target ||
610              id == Opt_background_compression) && v) {
611                 bch2_rebalance_add_work(c, S64_MAX);
612                 rebalance_wakeup(c);
613         }
614
615         return size;
616 }
617 SYSFS_OPS(bch2_fs_opts_dir);
618
619 struct attribute *bch2_fs_opts_dir_files[] = { NULL };
620
621 int bch2_opts_create_sysfs_files(struct kobject *kobj)
622 {
623         const struct bch_option *i;
624         int ret;
625
626         for (i = bch2_opt_table;
627              i < bch2_opt_table + bch2_opts_nr;
628              i++) {
629                 if (i->mode == OPT_INTERNAL)
630                         continue;
631
632                 ret = sysfs_create_file(kobj, &i->attr);
633                 if (ret)
634                         return ret;
635         }
636
637         return 0;
638 }
639
640 /* time stats */
641
642 SHOW(bch2_fs_time_stats)
643 {
644         struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
645
646 #define x(name)                                         \
647         if (attr == &sysfs_time_stat_##name)                            \
648                 return bch2_time_stats_print(&c->times[BCH_TIME_##name],\
649                                              buf, PAGE_SIZE);
650         BCH_TIME_STATS()
651 #undef x
652
653         return 0;
654 }
655
656 STORE(bch2_fs_time_stats)
657 {
658         return size;
659 }
660 SYSFS_OPS(bch2_fs_time_stats);
661
662 struct attribute *bch2_fs_time_stats_files[] = {
663 #define x(name)                                         \
664         &sysfs_time_stat_##name,
665         BCH_TIME_STATS()
666 #undef x
667         NULL
668 };
669
670 typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
671                                  size_t, void *);
672
673 static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
674                                   size_t b, void *private)
675 {
676         int rw = (private ? 1 : 0);
677
678         return bucket_last_io(c, bucket(ca, b), rw);
679 }
680
681 static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
682                                        size_t b, void *private)
683 {
684         struct bucket *g = bucket(ca, b);
685         return bucket_sectors_used(g->mark);
686 }
687
688 static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
689                                      size_t b, void *private)
690 {
691         return bucket_gc_gen(ca, b);
692 }
693
694 static int unsigned_cmp(const void *_l, const void *_r)
695 {
696         unsigned l = *((unsigned *) _l);
697         unsigned r = *((unsigned *) _r);
698
699         return (l > r) - (l < r);
700 }
701
702 static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
703                               char *buf, bucket_map_fn *fn, void *private)
704 {
705         size_t i, n;
706         /* Compute 31 quantiles */
707         unsigned q[31], *p;
708         ssize_t ret = 0;
709
710         down_read(&ca->bucket_lock);
711         n = ca->mi.nbuckets;
712
713         p = vzalloc(n * sizeof(unsigned));
714         if (!p) {
715                 up_read(&ca->bucket_lock);
716                 return -ENOMEM;
717         }
718
719         for (i = ca->mi.first_bucket; i < n; i++)
720                 p[i] = fn(c, ca, i, private);
721
722         sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
723         up_read(&ca->bucket_lock);
724
725         while (n &&
726                !p[n - 1])
727                 --n;
728
729         for (i = 0; i < ARRAY_SIZE(q); i++)
730                 q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
731
732         vfree(p);
733
734         for (i = 0; i < ARRAY_SIZE(q); i++)
735                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
736                                  "%u ", q[i]);
737         buf[ret - 1] = '\n';
738
739         return ret;
740 }
741
742 static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
743 {
744         enum alloc_reserve i;
745         ssize_t ret;
746
747         spin_lock(&ca->freelist_lock);
748
749         ret = scnprintf(buf, PAGE_SIZE,
750                         "free_inc:\t%zu\t%zu\n",
751                         fifo_used(&ca->free_inc),
752                         ca->free_inc.size);
753
754         for (i = 0; i < RESERVE_NR; i++)
755                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
756                                  "free[%u]:\t%zu\t%zu\n", i,
757                                  fifo_used(&ca->free[i]),
758                                  ca->free[i].size);
759
760         spin_unlock(&ca->freelist_lock);
761
762         return ret;
763 }
764
765 static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
766 {
767         struct bch_fs *c = ca->fs;
768         struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
769
770         return scnprintf(buf, PAGE_SIZE,
771                 "free_inc:               %zu/%zu\n"
772                 "free[RESERVE_BTREE]:    %zu/%zu\n"
773                 "free[RESERVE_MOVINGGC]: %zu/%zu\n"
774                 "free[RESERVE_NONE]:     %zu/%zu\n"
775                 "buckets:\n"
776                 "    capacity:           %llu\n"
777                 "    alloc:              %llu\n"
778                 "    sb:                 %llu\n"
779                 "    journal:            %llu\n"
780                 "    meta:               %llu\n"
781                 "    user:               %llu\n"
782                 "    cached:             %llu\n"
783                 "    available:          %lli\n"
784                 "sectors:\n"
785                 "    sb:                 %llu\n"
786                 "    journal:            %llu\n"
787                 "    meta:               %llu\n"
788                 "    user:               %llu\n"
789                 "    cached:             %llu\n"
790                 "    fragmented:         %llu\n"
791                 "    copygc threshold:   %llu\n"
792                 "freelist_wait:          %s\n"
793                 "open buckets:           %u/%u (reserved %u)\n"
794                 "open_buckets_wait:      %s\n",
795                 fifo_used(&ca->free_inc),               ca->free_inc.size,
796                 fifo_used(&ca->free[RESERVE_BTREE]),    ca->free[RESERVE_BTREE].size,
797                 fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
798                 fifo_used(&ca->free[RESERVE_NONE]),     ca->free[RESERVE_NONE].size,
799                 ca->mi.nbuckets - ca->mi.first_bucket,
800                 stats.buckets_alloc,
801                 stats.buckets[BCH_DATA_SB],
802                 stats.buckets[BCH_DATA_JOURNAL],
803                 stats.buckets[BCH_DATA_BTREE],
804                 stats.buckets[BCH_DATA_USER],
805                 stats.buckets[BCH_DATA_CACHED],
806                 ca->mi.nbuckets - ca->mi.first_bucket - stats.buckets_unavailable,
807                 stats.sectors[BCH_DATA_SB],
808                 stats.sectors[BCH_DATA_JOURNAL],
809                 stats.sectors[BCH_DATA_BTREE],
810                 stats.sectors[BCH_DATA_USER],
811                 stats.sectors[BCH_DATA_CACHED],
812                 stats.sectors_fragmented,
813                 ca->copygc_threshold,
814                 c->freelist_wait.list.first             ? "waiting" : "empty",
815                 c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
816                 c->open_buckets_wait.list.first         ? "waiting" : "empty");
817 }
818
819 static const char * const bch2_rw[] = {
820         "read",
821         "write",
822         NULL
823 };
824
825 static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
826 {
827         char *out = buf, *end = buf + PAGE_SIZE;
828         int rw, i, cpu;
829
830         for (rw = 0; rw < 2; rw++) {
831                 out += scnprintf(out, end - out, "%s:\n", bch2_rw[rw]);
832
833                 for (i = 1; i < BCH_DATA_NR; i++) {
834                         u64 n = 0;
835
836                         for_each_possible_cpu(cpu)
837                                 n += per_cpu_ptr(ca->io_done, cpu)->sectors[rw][i];
838
839                         out += scnprintf(out, end - out, "%-12s:%12llu\n",
840                                          bch2_data_types[i], n << 9);
841                 }
842         }
843
844         return out - buf;
845 }
846
847 SHOW(bch2_dev)
848 {
849         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
850         struct bch_fs *c = ca->fs;
851         char *out = buf, *end = buf + PAGE_SIZE;
852
853         sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
854
855         sysfs_print(bucket_size,        bucket_bytes(ca));
856         sysfs_print(block_size,         block_bytes(c));
857         sysfs_print(first_bucket,       ca->mi.first_bucket);
858         sysfs_print(nbuckets,           ca->mi.nbuckets);
859         sysfs_print(durability,         ca->mi.durability);
860         sysfs_print(discard,            ca->mi.discard);
861
862         if (attr == &sysfs_label) {
863                 if (ca->mi.group) {
864                         mutex_lock(&c->sb_lock);
865                         out += bch2_disk_path_print(&c->disk_sb, out, end - out,
866                                                     ca->mi.group - 1);
867                         mutex_unlock(&c->sb_lock);
868                 } else {
869                         out += scnprintf(out, end - out, "none");
870                 }
871
872                 out += scnprintf(out, end - out, "\n");
873                 return out - buf;
874         }
875
876         if (attr == &sysfs_has_data) {
877                 out += bch2_scnprint_flag_list(out, end - out,
878                                                bch2_data_types,
879                                                bch2_dev_has_data(c, ca));
880                 out += scnprintf(out, end - out, "\n");
881                 return out - buf;
882         }
883
884         sysfs_pd_controller_show(copy_gc, &ca->copygc_pd);
885
886         if (attr == &sysfs_cache_replacement_policy) {
887                 out += bch2_scnprint_string_list(out, end - out,
888                                                  bch2_cache_replacement_policies,
889                                                  ca->mi.replacement);
890                 out += scnprintf(out, end - out, "\n");
891                 return out - buf;
892         }
893
894         if (attr == &sysfs_state_rw) {
895                 out += bch2_scnprint_string_list(out, end - out,
896                                                  bch2_dev_state,
897                                                  ca->mi.state);
898                 out += scnprintf(out, end - out, "\n");
899                 return out - buf;
900         }
901
902         if (attr == &sysfs_iodone)
903                 return show_dev_iodone(ca, buf);
904
905         sysfs_print(io_latency_read,            atomic64_read(&ca->cur_latency[READ]));
906         sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
907
908         if (attr == &sysfs_io_latency_stats_read)
909                 return bch2_time_stats_print(&ca->io_latency[READ], buf, PAGE_SIZE);
910         if (attr == &sysfs_io_latency_stats_write)
911                 return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
912
913         sysfs_printf(congested,                 "%u%%",
914                      clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
915                      * 100 / CONGESTED_MAX);
916
917         if (attr == &sysfs_bucket_quantiles_last_read)
918                 return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
919         if (attr == &sysfs_bucket_quantiles_last_write)
920                 return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
921         if (attr == &sysfs_bucket_quantiles_fragmentation)
922                 return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
923         if (attr == &sysfs_bucket_quantiles_oldest_gen)
924                 return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
925
926         if (attr == &sysfs_reserve_stats)
927                 return show_reserve_stats(ca, buf);
928         if (attr == &sysfs_alloc_debug)
929                 return show_dev_alloc_debug(ca, buf);
930
931         return 0;
932 }
933
934 STORE(bch2_dev)
935 {
936         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
937         struct bch_fs *c = ca->fs;
938         struct bch_member *mi;
939
940         sysfs_pd_controller_store(copy_gc, &ca->copygc_pd);
941
942         if (attr == &sysfs_discard) {
943                 bool v = strtoul_or_return(buf);
944
945                 mutex_lock(&c->sb_lock);
946                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
947
948                 if (v != BCH_MEMBER_DISCARD(mi)) {
949                         SET_BCH_MEMBER_DISCARD(mi, v);
950                         bch2_write_super(c);
951                 }
952                 mutex_unlock(&c->sb_lock);
953         }
954
955         if (attr == &sysfs_cache_replacement_policy) {
956                 ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
957
958                 if (v < 0)
959                         return v;
960
961                 mutex_lock(&c->sb_lock);
962                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
963
964                 if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
965                         SET_BCH_MEMBER_REPLACEMENT(mi, v);
966                         bch2_write_super(c);
967                 }
968                 mutex_unlock(&c->sb_lock);
969         }
970
971         if (attr == &sysfs_label) {
972                 char *tmp;
973                 int ret;
974
975                 tmp = kstrdup(buf, GFP_KERNEL);
976                 if (!tmp)
977                         return -ENOMEM;
978
979                 ret = bch2_dev_group_set(c, ca, strim(tmp));
980                 kfree(tmp);
981                 if (ret)
982                         return ret;
983         }
984
985         if (attr == &sysfs_wake_allocator)
986                 bch2_wake_allocator(ca);
987
988         return size;
989 }
990 SYSFS_OPS(bch2_dev);
991
992 struct attribute *bch2_dev_files[] = {
993         &sysfs_uuid,
994         &sysfs_bucket_size,
995         &sysfs_block_size,
996         &sysfs_first_bucket,
997         &sysfs_nbuckets,
998         &sysfs_durability,
999
1000         /* settings: */
1001         &sysfs_discard,
1002         &sysfs_cache_replacement_policy,
1003         &sysfs_state_rw,
1004         &sysfs_label,
1005
1006         &sysfs_has_data,
1007         &sysfs_iodone,
1008
1009         &sysfs_io_latency_read,
1010         &sysfs_io_latency_write,
1011         &sysfs_io_latency_stats_read,
1012         &sysfs_io_latency_stats_write,
1013         &sysfs_congested,
1014
1015         /* alloc info - other stats: */
1016         &sysfs_bucket_quantiles_last_read,
1017         &sysfs_bucket_quantiles_last_write,
1018         &sysfs_bucket_quantiles_fragmentation,
1019         &sysfs_bucket_quantiles_oldest_gen,
1020
1021         &sysfs_reserve_stats,
1022
1023         /* debug: */
1024         &sysfs_alloc_debug,
1025         &sysfs_wake_allocator,
1026
1027         sysfs_pd_controller_files(copy_gc),
1028         NULL
1029 };
1030
1031 #endif  /* _BCACHEFS_SYSFS_H_ */