]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/sysfs.c
Update bcachefs sources to d83b992f65 bcachefs: Rewrite journal_seq_blacklist machinery
[bcachefs-tools-debian] / libbcachefs / sysfs.c
1 /*
2  * bcache sysfs interfaces
3  *
4  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
5  * Copyright 2012 Google, Inc.
6  */
7
8 #ifndef NO_BCACHEFS_SYSFS
9
10 #include "bcachefs.h"
11 #include "alloc_background.h"
12 #include "sysfs.h"
13 #include "btree_cache.h"
14 #include "btree_io.h"
15 #include "btree_iter.h"
16 #include "btree_update.h"
17 #include "btree_update_interior.h"
18 #include "btree_gc.h"
19 #include "buckets.h"
20 #include "disk_groups.h"
21 #include "ec.h"
22 #include "inode.h"
23 #include "journal.h"
24 #include "keylist.h"
25 #include "move.h"
26 #include "opts.h"
27 #include "rebalance.h"
28 #include "replicas.h"
29 #include "super-io.h"
30 #include "tests.h"
31
32 #include <linux/blkdev.h>
33 #include <linux/sort.h>
34 #include <linux/sched/clock.h>
35
36 #include "util.h"
37
38 #define SYSFS_OPS(type)                                                 \
39 struct sysfs_ops type ## _sysfs_ops = {                                 \
40         .show   = type ## _show,                                        \
41         .store  = type ## _store                                        \
42 }
43
44 #define SHOW(fn)                                                        \
45 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
46                            char *buf)                                   \
47
48 #define STORE(fn)                                                       \
49 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
50                             const char *buf, size_t size)               \
51
52 #define __sysfs_attribute(_name, _mode)                                 \
53         static struct attribute sysfs_##_name =                         \
54                 { .name = #_name, .mode = _mode }
55
56 #define write_attribute(n)      __sysfs_attribute(n, S_IWUSR)
57 #define read_attribute(n)       __sysfs_attribute(n, S_IRUGO)
58 #define rw_attribute(n)         __sysfs_attribute(n, S_IRUGO|S_IWUSR)
59
60 #define sysfs_printf(file, fmt, ...)                                    \
61 do {                                                                    \
62         if (attr == &sysfs_ ## file)                                    \
63                 return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
64 } while (0)
65
66 #define sysfs_print(file, var)                                          \
67 do {                                                                    \
68         if (attr == &sysfs_ ## file)                                    \
69                 return snprint(buf, PAGE_SIZE, var);                    \
70 } while (0)
71
72 #define sysfs_hprint(file, val)                                         \
73 do {                                                                    \
74         if (attr == &sysfs_ ## file) {                                  \
75                 struct printbuf out = _PBUF(buf, PAGE_SIZE);            \
76                 bch2_hprint(&out, val);                                 \
77                 pr_buf(&out, "\n");                                     \
78                 return out.pos - buf;                                   \
79         }                                                               \
80 } while (0)
81
82 #define var_printf(_var, fmt)   sysfs_printf(_var, fmt, var(_var))
83 #define var_print(_var)         sysfs_print(_var, var(_var))
84 #define var_hprint(_var)        sysfs_hprint(_var, var(_var))
85
86 #define sysfs_strtoul(file, var)                                        \
87 do {                                                                    \
88         if (attr == &sysfs_ ## file)                                    \
89                 return strtoul_safe(buf, var) ?: (ssize_t) size;        \
90 } while (0)
91
92 #define sysfs_strtoul_clamp(file, var, min, max)                        \
93 do {                                                                    \
94         if (attr == &sysfs_ ## file)                                    \
95                 return strtoul_safe_clamp(buf, var, min, max)           \
96                         ?: (ssize_t) size;                              \
97 } while (0)
98
99 #define strtoul_or_return(cp)                                           \
100 ({                                                                      \
101         unsigned long _v;                                               \
102         int _r = kstrtoul(cp, 10, &_v);                                 \
103         if (_r)                                                         \
104                 return _r;                                              \
105         _v;                                                             \
106 })
107
108 #define strtoul_restrict_or_return(cp, min, max)                        \
109 ({                                                                      \
110         unsigned long __v = 0;                                          \
111         int _r = strtoul_safe_restrict(cp, __v, min, max);              \
112         if (_r)                                                         \
113                 return _r;                                              \
114         __v;                                                            \
115 })
116
117 #define strtoi_h_or_return(cp)                                          \
118 ({                                                                      \
119         u64 _v;                                                         \
120         int _r = strtoi_h(cp, &_v);                                     \
121         if (_r)                                                         \
122                 return _r;                                              \
123         _v;                                                             \
124 })
125
126 #define sysfs_hatoi(file, var)                                          \
127 do {                                                                    \
128         if (attr == &sysfs_ ## file)                                    \
129                 return strtoi_h(buf, &var) ?: (ssize_t) size;           \
130 } while (0)
131
132 write_attribute(trigger_journal_flush);
133 write_attribute(trigger_btree_coalesce);
134 write_attribute(trigger_gc);
135 write_attribute(trigger_alloc_write);
136 write_attribute(prune_cache);
137 rw_attribute(btree_gc_periodic);
138
139 read_attribute(uuid);
140 read_attribute(minor);
141 read_attribute(bucket_size);
142 read_attribute(block_size);
143 read_attribute(btree_node_size);
144 read_attribute(first_bucket);
145 read_attribute(nbuckets);
146 read_attribute(durability);
147 read_attribute(iodone);
148
149 read_attribute(io_latency_read);
150 read_attribute(io_latency_write);
151 read_attribute(io_latency_stats_read);
152 read_attribute(io_latency_stats_write);
153 read_attribute(congested);
154
155 read_attribute(bucket_quantiles_last_read);
156 read_attribute(bucket_quantiles_last_write);
157 read_attribute(bucket_quantiles_fragmentation);
158 read_attribute(bucket_quantiles_oldest_gen);
159
160 read_attribute(reserve_stats);
161 read_attribute(btree_cache_size);
162 read_attribute(compression_stats);
163 read_attribute(journal_debug);
164 read_attribute(journal_pins);
165 read_attribute(btree_updates);
166 read_attribute(dirty_btree_nodes);
167
168 read_attribute(internal_uuid);
169
170 read_attribute(has_data);
171 read_attribute(alloc_debug);
172 write_attribute(wake_allocator);
173
174 read_attribute(read_realloc_races);
175 read_attribute(extent_migrate_done);
176 read_attribute(extent_migrate_raced);
177
178 rw_attribute(journal_write_delay_ms);
179 rw_attribute(journal_reclaim_delay_ms);
180
181 rw_attribute(discard);
182 rw_attribute(cache_replacement_policy);
183 rw_attribute(label);
184
185 rw_attribute(copy_gc_enabled);
186 sysfs_pd_controller_attribute(copy_gc);
187
188 rw_attribute(rebalance_enabled);
189 sysfs_pd_controller_attribute(rebalance);
190 read_attribute(rebalance_work);
191 rw_attribute(promote_whole_extents);
192
193 read_attribute(new_stripes);
194
195 rw_attribute(pd_controllers_update_seconds);
196
197 read_attribute(meta_replicas_have);
198 read_attribute(data_replicas_have);
199
200 #ifdef CONFIG_BCACHEFS_TESTS
201 write_attribute(perf_test);
202 #endif /* CONFIG_BCACHEFS_TESTS */
203
204 #define BCH_DEBUG_PARAM(name, description)                              \
205         rw_attribute(name);
206
207         BCH_DEBUG_PARAMS()
208 #undef BCH_DEBUG_PARAM
209
210 #define x(_name)                                                \
211         static struct attribute sysfs_time_stat_##_name =               \
212                 { .name = #_name, .mode = S_IRUGO };
213         BCH_TIME_STATS()
214 #undef x
215
216 static struct attribute sysfs_state_rw = {
217         .name = "state",
218         .mode = S_IRUGO
219 };
220
221 static size_t bch2_btree_cache_size(struct bch_fs *c)
222 {
223         size_t ret = 0;
224         struct btree *b;
225
226         mutex_lock(&c->btree_cache.lock);
227         list_for_each_entry(b, &c->btree_cache.live, list)
228                 ret += btree_bytes(c);
229
230         mutex_unlock(&c->btree_cache.lock);
231         return ret;
232 }
233
234 static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
235 {
236         struct printbuf out = _PBUF(buf, PAGE_SIZE);
237         struct bch_fs_usage *fs_usage = bch2_fs_usage_read(c);
238         unsigned i;
239
240         if (!fs_usage)
241                 return -ENOMEM;
242
243         pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
244
245         pr_buf(&out, "hidden:\t\t\t\t%llu\n",
246                fs_usage->hidden);
247         pr_buf(&out, "data:\t\t\t\t%llu\n",
248                fs_usage->data);
249         pr_buf(&out, "cached:\t\t\t\t%llu\n",
250                fs_usage->cached);
251         pr_buf(&out, "reserved:\t\t\t%llu\n",
252                fs_usage->reserved);
253         pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
254                fs_usage->nr_inodes);
255         pr_buf(&out, "online reserved:\t\t%llu\n",
256                fs_usage->online_reserved);
257
258         for (i = 0;
259              i < ARRAY_SIZE(fs_usage->persistent_reserved);
260              i++) {
261                 pr_buf(&out, "%u replicas:\n", i + 1);
262                 pr_buf(&out, "\treserved:\t\t%llu\n",
263                        fs_usage->persistent_reserved[i]);
264         }
265
266         for (i = 0; i < c->replicas.nr; i++) {
267                 struct bch_replicas_entry *e =
268                         cpu_replicas_entry(&c->replicas, i);
269
270                 pr_buf(&out, "\t");
271                 bch2_replicas_entry_to_text(&out, e);
272                 pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]);
273         }
274
275         percpu_up_read_preempt_enable(&c->mark_lock);
276
277         kfree(fs_usage);
278
279         return out.pos - buf;
280 }
281
282 static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
283 {
284         struct btree_trans trans;
285         struct btree_iter *iter;
286         struct bkey_s_c k;
287         u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
288             nr_compressed_extents = 0,
289             compressed_sectors_compressed = 0,
290             compressed_sectors_uncompressed = 0;
291
292         if (!test_bit(BCH_FS_STARTED, &c->flags))
293                 return -EPERM;
294
295         bch2_trans_init(&trans, c);
296
297         for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS_MIN, 0, k)
298                 if (k.k->type == KEY_TYPE_extent) {
299                         struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
300                         const union bch_extent_entry *entry;
301                         struct extent_ptr_decoded p;
302
303                         extent_for_each_ptr_decode(e, p, entry) {
304                                 if (p.crc.compression_type == BCH_COMPRESSION_NONE) {
305                                         nr_uncompressed_extents++;
306                                         uncompressed_sectors += e.k->size;
307                                 } else {
308                                         nr_compressed_extents++;
309                                         compressed_sectors_compressed +=
310                                                 p.crc.compressed_size;
311                                         compressed_sectors_uncompressed +=
312                                                 p.crc.uncompressed_size;
313                                 }
314
315                                 /* only looking at the first ptr */
316                                 break;
317                         }
318                 }
319         bch2_trans_exit(&trans);
320
321         return scnprintf(buf, PAGE_SIZE,
322                         "uncompressed data:\n"
323                         "       nr extents:                     %llu\n"
324                         "       size (bytes):                   %llu\n"
325                         "compressed data:\n"
326                         "       nr extents:                     %llu\n"
327                         "       compressed size (bytes):        %llu\n"
328                         "       uncompressed size (bytes):      %llu\n",
329                         nr_uncompressed_extents,
330                         uncompressed_sectors << 9,
331                         nr_compressed_extents,
332                         compressed_sectors_compressed << 9,
333                         compressed_sectors_uncompressed << 9);
334 }
335
336 static ssize_t bch2_new_stripes(struct bch_fs *c, char *buf)
337 {
338         char *out = buf, *end = buf + PAGE_SIZE;
339         struct ec_stripe_head *h;
340         struct ec_stripe_new *s;
341
342         mutex_lock(&c->ec_new_stripe_lock);
343         list_for_each_entry(h, &c->ec_new_stripe_list, list) {
344                 out += scnprintf(out, end - out,
345                                  "target %u algo %u redundancy %u:\n",
346                                  h->target, h->algo, h->redundancy);
347
348                 if (h->s)
349                         out += scnprintf(out, end - out,
350                                          "\tpending: blocks %u allocated %u\n",
351                                          h->s->blocks.nr,
352                                          bitmap_weight(h->s->blocks_allocated,
353                                                        h->s->blocks.nr));
354
355                 mutex_lock(&h->lock);
356                 list_for_each_entry(s, &h->stripes, list)
357                         out += scnprintf(out, end - out,
358                                          "\tin flight: blocks %u allocated %u pin %u\n",
359                                          s->blocks.nr,
360                                          bitmap_weight(s->blocks_allocated,
361                                                        s->blocks.nr),
362                                          atomic_read(&s->pin));
363                 mutex_unlock(&h->lock);
364
365         }
366         mutex_unlock(&c->ec_new_stripe_lock);
367
368         return out - buf;
369 }
370
371 SHOW(bch2_fs)
372 {
373         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
374
375         sysfs_print(minor,                      c->minor);
376         sysfs_printf(internal_uuid, "%pU",      c->sb.uuid.b);
377
378         sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
379         sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
380
381         sysfs_print(block_size,                 block_bytes(c));
382         sysfs_print(btree_node_size,            btree_bytes(c));
383         sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
384
385         sysfs_print(read_realloc_races,
386                     atomic_long_read(&c->read_realloc_races));
387         sysfs_print(extent_migrate_done,
388                     atomic_long_read(&c->extent_migrate_done));
389         sysfs_print(extent_migrate_raced,
390                     atomic_long_read(&c->extent_migrate_raced));
391
392         sysfs_printf(btree_gc_periodic, "%u",   (int) c->btree_gc_periodic);
393
394         sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
395
396         sysfs_print(pd_controllers_update_seconds,
397                     c->pd_controllers_update_seconds);
398
399         sysfs_printf(rebalance_enabled,         "%i", c->rebalance.enabled);
400         sysfs_pd_controller_show(rebalance,     &c->rebalance.pd); /* XXX */
401
402         if (attr == &sysfs_rebalance_work)
403                 return bch2_rebalance_work_show(c, buf);
404
405         sysfs_print(promote_whole_extents,      c->promote_whole_extents);
406
407         sysfs_printf(meta_replicas_have, "%i",  bch2_replicas_online(c, true));
408         sysfs_printf(data_replicas_have, "%i",  bch2_replicas_online(c, false));
409
410         /* Debugging: */
411
412         if (attr == &sysfs_alloc_debug)
413                 return show_fs_alloc_debug(c, buf);
414
415         if (attr == &sysfs_journal_debug)
416                 return bch2_journal_print_debug(&c->journal, buf);
417
418         if (attr == &sysfs_journal_pins)
419                 return bch2_journal_print_pins(&c->journal, buf);
420
421         if (attr == &sysfs_btree_updates)
422                 return bch2_btree_updates_print(c, buf);
423
424         if (attr == &sysfs_dirty_btree_nodes)
425                 return bch2_dirty_btree_nodes_print(c, buf);
426
427         if (attr == &sysfs_compression_stats)
428                 return bch2_compression_stats(c, buf);
429
430         if (attr == &sysfs_new_stripes)
431                 return bch2_new_stripes(c, buf);
432
433 #define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
434         BCH_DEBUG_PARAMS()
435 #undef BCH_DEBUG_PARAM
436
437         return 0;
438 }
439
440 STORE(__bch2_fs)
441 {
442         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
443
444         sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
445         sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
446
447         if (attr == &sysfs_btree_gc_periodic) {
448                 ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
449                         ?: (ssize_t) size;
450
451                 wake_up_process(c->gc_thread);
452                 return ret;
453         }
454
455         if (attr == &sysfs_copy_gc_enabled) {
456                 struct bch_dev *ca;
457                 unsigned i;
458                 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
459                         ?: (ssize_t) size;
460
461                 for_each_member_device(ca, c, i)
462                         if (ca->copygc_thread)
463                                 wake_up_process(ca->copygc_thread);
464                 return ret;
465         }
466
467         if (attr == &sysfs_rebalance_enabled) {
468                 ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
469                         ?: (ssize_t) size;
470
471                 rebalance_wakeup(c);
472                 return ret;
473         }
474
475         sysfs_strtoul(pd_controllers_update_seconds,
476                       c->pd_controllers_update_seconds);
477         sysfs_pd_controller_store(rebalance,    &c->rebalance.pd);
478
479         sysfs_strtoul(promote_whole_extents,    c->promote_whole_extents);
480
481         /* Debugging: */
482
483 #define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
484         BCH_DEBUG_PARAMS()
485 #undef BCH_DEBUG_PARAM
486
487         if (!test_bit(BCH_FS_STARTED, &c->flags))
488                 return -EPERM;
489
490         /* Debugging: */
491
492         if (attr == &sysfs_trigger_journal_flush)
493                 bch2_journal_meta_async(&c->journal, NULL);
494
495         if (attr == &sysfs_trigger_btree_coalesce)
496                 bch2_coalesce(c);
497
498         if (attr == &sysfs_trigger_gc)
499                 bch2_gc(c, NULL, false, false);
500
501         if (attr == &sysfs_trigger_alloc_write) {
502                 bool wrote;
503
504                 bch2_alloc_write(c, false, &wrote);
505         }
506
507         if (attr == &sysfs_prune_cache) {
508                 struct shrink_control sc;
509
510                 sc.gfp_mask = GFP_KERNEL;
511                 sc.nr_to_scan = strtoul_or_return(buf);
512                 c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
513         }
514 #ifdef CONFIG_BCACHEFS_TESTS
515         if (attr == &sysfs_perf_test) {
516                 char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
517                 char *test              = strsep(&p, " \t\n");
518                 char *nr_str            = strsep(&p, " \t\n");
519                 char *threads_str       = strsep(&p, " \t\n");
520                 unsigned threads;
521                 u64 nr;
522                 int ret = -EINVAL;
523
524                 if (threads_str &&
525                     !(ret = kstrtouint(threads_str, 10, &threads)) &&
526                     !(ret = bch2_strtoull_h(nr_str, &nr)))
527                         bch2_btree_perf_test(c, test, nr, threads);
528                 else
529                         size = ret;
530                 kfree(tmp);
531         }
532 #endif
533         return size;
534 }
535
536 STORE(bch2_fs)
537 {
538         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
539
540         mutex_lock(&c->state_lock);
541         size = __bch2_fs_store(kobj, attr, buf, size);
542         mutex_unlock(&c->state_lock);
543
544         return size;
545 }
546 SYSFS_OPS(bch2_fs);
547
548 struct attribute *bch2_fs_files[] = {
549         &sysfs_minor,
550         &sysfs_block_size,
551         &sysfs_btree_node_size,
552         &sysfs_btree_cache_size,
553
554         &sysfs_meta_replicas_have,
555         &sysfs_data_replicas_have,
556
557         &sysfs_journal_write_delay_ms,
558         &sysfs_journal_reclaim_delay_ms,
559
560         &sysfs_promote_whole_extents,
561
562         &sysfs_compression_stats,
563
564 #ifdef CONFIG_BCACHEFS_TESTS
565         &sysfs_perf_test,
566 #endif
567         NULL
568 };
569
570 /* internal dir - just a wrapper */
571
572 SHOW(bch2_fs_internal)
573 {
574         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
575         return bch2_fs_show(&c->kobj, attr, buf);
576 }
577
578 STORE(bch2_fs_internal)
579 {
580         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
581         return bch2_fs_store(&c->kobj, attr, buf, size);
582 }
583 SYSFS_OPS(bch2_fs_internal);
584
585 struct attribute *bch2_fs_internal_files[] = {
586         &sysfs_alloc_debug,
587         &sysfs_journal_debug,
588         &sysfs_journal_pins,
589         &sysfs_btree_updates,
590         &sysfs_dirty_btree_nodes,
591
592         &sysfs_read_realloc_races,
593         &sysfs_extent_migrate_done,
594         &sysfs_extent_migrate_raced,
595
596         &sysfs_trigger_journal_flush,
597         &sysfs_trigger_btree_coalesce,
598         &sysfs_trigger_gc,
599         &sysfs_trigger_alloc_write,
600         &sysfs_prune_cache,
601
602         &sysfs_copy_gc_enabled,
603
604         &sysfs_rebalance_enabled,
605         &sysfs_rebalance_work,
606         sysfs_pd_controller_files(rebalance),
607
608         &sysfs_new_stripes,
609
610         &sysfs_internal_uuid,
611
612 #define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
613         BCH_DEBUG_PARAMS()
614 #undef BCH_DEBUG_PARAM
615
616         NULL
617 };
618
619 /* options */
620
621 SHOW(bch2_fs_opts_dir)
622 {
623         struct printbuf out = _PBUF(buf, PAGE_SIZE);
624         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
625         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
626         int id = opt - bch2_opt_table;
627         u64 v = bch2_opt_get_by_id(&c->opts, id);
628
629         bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST);
630         pr_buf(&out, "\n");
631
632         return out.pos - buf;
633 }
634
635 STORE(bch2_fs_opts_dir)
636 {
637         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
638         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
639         int ret, id = opt - bch2_opt_table;
640         char *tmp;
641         u64 v;
642
643         tmp = kstrdup(buf, GFP_KERNEL);
644         if (!tmp)
645                 return -ENOMEM;
646
647         ret = bch2_opt_parse(c, opt, strim(tmp), &v);
648         kfree(tmp);
649
650         if (ret < 0)
651                 return ret;
652
653         ret = bch2_opt_check_may_set(c, id, v);
654         if (ret < 0)
655                 return ret;
656
657         if (opt->set_sb != SET_NO_SB_OPT) {
658                 mutex_lock(&c->sb_lock);
659                 opt->set_sb(c->disk_sb.sb, v);
660                 bch2_write_super(c);
661                 mutex_unlock(&c->sb_lock);
662         }
663
664         bch2_opt_set_by_id(&c->opts, id, v);
665
666         if ((id == Opt_background_target ||
667              id == Opt_background_compression) && v) {
668                 bch2_rebalance_add_work(c, S64_MAX);
669                 rebalance_wakeup(c);
670         }
671
672         return size;
673 }
674 SYSFS_OPS(bch2_fs_opts_dir);
675
676 struct attribute *bch2_fs_opts_dir_files[] = { NULL };
677
678 int bch2_opts_create_sysfs_files(struct kobject *kobj)
679 {
680         const struct bch_option *i;
681         int ret;
682
683         for (i = bch2_opt_table;
684              i < bch2_opt_table + bch2_opts_nr;
685              i++) {
686                 if (!(i->mode & (OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME)))
687                         continue;
688
689                 ret = sysfs_create_file(kobj, &i->attr);
690                 if (ret)
691                         return ret;
692         }
693
694         return 0;
695 }
696
697 /* time stats */
698
699 SHOW(bch2_fs_time_stats)
700 {
701         struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
702
703 #define x(name)                                         \
704         if (attr == &sysfs_time_stat_##name)                            \
705                 return bch2_time_stats_print(&c->times[BCH_TIME_##name],\
706                                              buf, PAGE_SIZE);
707         BCH_TIME_STATS()
708 #undef x
709
710         return 0;
711 }
712
713 STORE(bch2_fs_time_stats)
714 {
715         return size;
716 }
717 SYSFS_OPS(bch2_fs_time_stats);
718
719 struct attribute *bch2_fs_time_stats_files[] = {
720 #define x(name)                                         \
721         &sysfs_time_stat_##name,
722         BCH_TIME_STATS()
723 #undef x
724         NULL
725 };
726
727 typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
728                                  size_t, void *);
729
730 static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
731                                   size_t b, void *private)
732 {
733         int rw = (private ? 1 : 0);
734
735         return bucket_last_io(c, bucket(ca, b), rw);
736 }
737
738 static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
739                                        size_t b, void *private)
740 {
741         struct bucket *g = bucket(ca, b);
742         return bucket_sectors_used(g->mark);
743 }
744
745 static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
746                                      size_t b, void *private)
747 {
748         return bucket_gc_gen(ca, b);
749 }
750
751 static int unsigned_cmp(const void *_l, const void *_r)
752 {
753         unsigned l = *((unsigned *) _l);
754         unsigned r = *((unsigned *) _r);
755
756         return (l > r) - (l < r);
757 }
758
759 static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
760                               char *buf, bucket_map_fn *fn, void *private)
761 {
762         size_t i, n;
763         /* Compute 31 quantiles */
764         unsigned q[31], *p;
765         ssize_t ret = 0;
766
767         down_read(&ca->bucket_lock);
768         n = ca->mi.nbuckets;
769
770         p = vzalloc(n * sizeof(unsigned));
771         if (!p) {
772                 up_read(&ca->bucket_lock);
773                 return -ENOMEM;
774         }
775
776         for (i = ca->mi.first_bucket; i < n; i++)
777                 p[i] = fn(c, ca, i, private);
778
779         sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
780         up_read(&ca->bucket_lock);
781
782         while (n &&
783                !p[n - 1])
784                 --n;
785
786         for (i = 0; i < ARRAY_SIZE(q); i++)
787                 q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
788
789         vfree(p);
790
791         for (i = 0; i < ARRAY_SIZE(q); i++)
792                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
793                                  "%u ", q[i]);
794         buf[ret - 1] = '\n';
795
796         return ret;
797 }
798
799 static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
800 {
801         struct printbuf out = _PBUF(buf, PAGE_SIZE);
802         enum alloc_reserve i;
803
804         spin_lock(&ca->freelist_lock);
805
806         pr_buf(&out, "free_inc:\t%zu\t%zu\n",
807                fifo_used(&ca->free_inc),
808                ca->free_inc.size);
809
810         for (i = 0; i < RESERVE_NR; i++)
811                 pr_buf(&out, "free[%u]:\t%zu\t%zu\n", i,
812                        fifo_used(&ca->free[i]),
813                        ca->free[i].size);
814
815         spin_unlock(&ca->freelist_lock);
816
817         return out.pos - buf;
818 }
819
820 static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
821 {
822         struct bch_fs *c = ca->fs;
823         struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
824         unsigned i, nr[BCH_DATA_NR];
825
826         memset(nr, 0, sizeof(nr));
827
828         for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
829                 nr[c->open_buckets[i].type]++;
830
831         return scnprintf(buf, PAGE_SIZE,
832                 "free_inc:               %zu/%zu\n"
833                 "free[RESERVE_BTREE]:    %zu/%zu\n"
834                 "free[RESERVE_MOVINGGC]: %zu/%zu\n"
835                 "free[RESERVE_NONE]:     %zu/%zu\n"
836                 "buckets:\n"
837                 "    capacity:           %llu\n"
838                 "    alloc:              %llu\n"
839                 "    sb:                 %llu\n"
840                 "    journal:            %llu\n"
841                 "    meta:               %llu\n"
842                 "    user:               %llu\n"
843                 "    cached:             %llu\n"
844                 "    erasure coded:      %llu\n"
845                 "    available:          %lli\n"
846                 "sectors:\n"
847                 "    sb:                 %llu\n"
848                 "    journal:            %llu\n"
849                 "    meta:               %llu\n"
850                 "    user:               %llu\n"
851                 "    cached:             %llu\n"
852                 "    fragmented:         %llu\n"
853                 "    copygc threshold:   %llu\n"
854                 "freelist_wait:          %s\n"
855                 "open buckets:           %u/%u (reserved %u)\n"
856                 "open_buckets_wait:      %s\n"
857                 "open_buckets_btree:     %u\n"
858                 "open_buckets_user:      %u\n"
859                 "btree reserve cache:    %u\n",
860                 fifo_used(&ca->free_inc),               ca->free_inc.size,
861                 fifo_used(&ca->free[RESERVE_BTREE]),    ca->free[RESERVE_BTREE].size,
862                 fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
863                 fifo_used(&ca->free[RESERVE_NONE]),     ca->free[RESERVE_NONE].size,
864                 ca->mi.nbuckets - ca->mi.first_bucket,
865                 stats.buckets_alloc,
866                 stats.buckets[BCH_DATA_SB],
867                 stats.buckets[BCH_DATA_JOURNAL],
868                 stats.buckets[BCH_DATA_BTREE],
869                 stats.buckets[BCH_DATA_USER],
870                 stats.buckets[BCH_DATA_CACHED],
871                 stats.buckets_ec,
872                 ca->mi.nbuckets - ca->mi.first_bucket - stats.buckets_unavailable,
873                 stats.sectors[BCH_DATA_SB],
874                 stats.sectors[BCH_DATA_JOURNAL],
875                 stats.sectors[BCH_DATA_BTREE],
876                 stats.sectors[BCH_DATA_USER],
877                 stats.sectors[BCH_DATA_CACHED],
878                 stats.sectors_fragmented,
879                 ca->copygc_threshold,
880                 c->freelist_wait.list.first             ? "waiting" : "empty",
881                 c->open_buckets_nr_free, OPEN_BUCKETS_COUNT,
882                 BTREE_NODE_OPEN_BUCKET_RESERVE,
883                 c->open_buckets_wait.list.first         ? "waiting" : "empty",
884                 nr[BCH_DATA_BTREE],
885                 nr[BCH_DATA_USER],
886                 c->btree_reserve_cache_nr);
887 }
888
889 static const char * const bch2_rw[] = {
890         "read",
891         "write",
892         NULL
893 };
894
895 static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
896 {
897         struct printbuf out = _PBUF(buf, PAGE_SIZE);
898         int rw, i;
899
900         for (rw = 0; rw < 2; rw++) {
901                 pr_buf(&out, "%s:\n", bch2_rw[rw]);
902
903                 for (i = 1; i < BCH_DATA_NR; i++)
904                         pr_buf(&out, "%-12s:%12llu\n",
905                                bch2_data_types[i],
906                                percpu_u64_get(&ca->io_done->sectors[rw][i]) << 9);
907         }
908
909         return out.pos - buf;
910 }
911
912 SHOW(bch2_dev)
913 {
914         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
915         struct bch_fs *c = ca->fs;
916         struct printbuf out = _PBUF(buf, PAGE_SIZE);
917
918         sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
919
920         sysfs_print(bucket_size,        bucket_bytes(ca));
921         sysfs_print(block_size,         block_bytes(c));
922         sysfs_print(first_bucket,       ca->mi.first_bucket);
923         sysfs_print(nbuckets,           ca->mi.nbuckets);
924         sysfs_print(durability,         ca->mi.durability);
925         sysfs_print(discard,            ca->mi.discard);
926
927         if (attr == &sysfs_label) {
928                 if (ca->mi.group) {
929                         mutex_lock(&c->sb_lock);
930                         bch2_disk_path_to_text(&out, &c->disk_sb,
931                                                ca->mi.group - 1);
932                         mutex_unlock(&c->sb_lock);
933                 } else {
934                         pr_buf(&out, "none");
935                 }
936
937                 pr_buf(&out, "\n");
938                 return out.pos - buf;
939         }
940
941         if (attr == &sysfs_has_data) {
942                 bch2_flags_to_text(&out, bch2_data_types,
943                                    bch2_dev_has_data(c, ca));
944                 pr_buf(&out, "\n");
945                 return out.pos - buf;
946         }
947
948         sysfs_pd_controller_show(copy_gc, &ca->copygc_pd);
949
950         if (attr == &sysfs_cache_replacement_policy) {
951                 bch2_string_opt_to_text(&out,
952                                         bch2_cache_replacement_policies,
953                                         ca->mi.replacement);
954                 pr_buf(&out, "\n");
955                 return out.pos - buf;
956         }
957
958         if (attr == &sysfs_state_rw) {
959                 bch2_string_opt_to_text(&out, bch2_dev_state,
960                                         ca->mi.state);
961                 pr_buf(&out, "\n");
962                 return out.pos - buf;
963         }
964
965         if (attr == &sysfs_iodone)
966                 return show_dev_iodone(ca, buf);
967
968         sysfs_print(io_latency_read,            atomic64_read(&ca->cur_latency[READ]));
969         sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
970
971         if (attr == &sysfs_io_latency_stats_read)
972                 return bch2_time_stats_print(&ca->io_latency[READ], buf, PAGE_SIZE);
973         if (attr == &sysfs_io_latency_stats_write)
974                 return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
975
976         sysfs_printf(congested,                 "%u%%",
977                      clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
978                      * 100 / CONGESTED_MAX);
979
980         if (attr == &sysfs_bucket_quantiles_last_read)
981                 return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
982         if (attr == &sysfs_bucket_quantiles_last_write)
983                 return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
984         if (attr == &sysfs_bucket_quantiles_fragmentation)
985                 return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
986         if (attr == &sysfs_bucket_quantiles_oldest_gen)
987                 return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
988
989         if (attr == &sysfs_reserve_stats)
990                 return show_reserve_stats(ca, buf);
991         if (attr == &sysfs_alloc_debug)
992                 return show_dev_alloc_debug(ca, buf);
993
994         return 0;
995 }
996
997 STORE(bch2_dev)
998 {
999         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
1000         struct bch_fs *c = ca->fs;
1001         struct bch_member *mi;
1002
1003         sysfs_pd_controller_store(copy_gc, &ca->copygc_pd);
1004
1005         if (attr == &sysfs_discard) {
1006                 bool v = strtoul_or_return(buf);
1007
1008                 mutex_lock(&c->sb_lock);
1009                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
1010
1011                 if (v != BCH_MEMBER_DISCARD(mi)) {
1012                         SET_BCH_MEMBER_DISCARD(mi, v);
1013                         bch2_write_super(c);
1014                 }
1015                 mutex_unlock(&c->sb_lock);
1016         }
1017
1018         if (attr == &sysfs_cache_replacement_policy) {
1019                 ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
1020
1021                 if (v < 0)
1022                         return v;
1023
1024                 mutex_lock(&c->sb_lock);
1025                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
1026
1027                 if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
1028                         SET_BCH_MEMBER_REPLACEMENT(mi, v);
1029                         bch2_write_super(c);
1030                 }
1031                 mutex_unlock(&c->sb_lock);
1032         }
1033
1034         if (attr == &sysfs_label) {
1035                 char *tmp;
1036                 int ret;
1037
1038                 tmp = kstrdup(buf, GFP_KERNEL);
1039                 if (!tmp)
1040                         return -ENOMEM;
1041
1042                 ret = bch2_dev_group_set(c, ca, strim(tmp));
1043                 kfree(tmp);
1044                 if (ret)
1045                         return ret;
1046         }
1047
1048         if (attr == &sysfs_wake_allocator)
1049                 bch2_wake_allocator(ca);
1050
1051         return size;
1052 }
1053 SYSFS_OPS(bch2_dev);
1054
1055 struct attribute *bch2_dev_files[] = {
1056         &sysfs_uuid,
1057         &sysfs_bucket_size,
1058         &sysfs_block_size,
1059         &sysfs_first_bucket,
1060         &sysfs_nbuckets,
1061         &sysfs_durability,
1062
1063         /* settings: */
1064         &sysfs_discard,
1065         &sysfs_cache_replacement_policy,
1066         &sysfs_state_rw,
1067         &sysfs_label,
1068
1069         &sysfs_has_data,
1070         &sysfs_iodone,
1071
1072         &sysfs_io_latency_read,
1073         &sysfs_io_latency_write,
1074         &sysfs_io_latency_stats_read,
1075         &sysfs_io_latency_stats_write,
1076         &sysfs_congested,
1077
1078         /* alloc info - other stats: */
1079         &sysfs_bucket_quantiles_last_read,
1080         &sysfs_bucket_quantiles_last_write,
1081         &sysfs_bucket_quantiles_fragmentation,
1082         &sysfs_bucket_quantiles_oldest_gen,
1083
1084         &sysfs_reserve_stats,
1085
1086         /* debug: */
1087         &sysfs_alloc_debug,
1088         &sysfs_wake_allocator,
1089
1090         sysfs_pd_controller_files(copy_gc),
1091         NULL
1092 };
1093
1094 #endif  /* _BCACHEFS_SYSFS_H_ */