]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcache/error.c
9ba33ef6aaae1c40ebc2421cd4e89d7491664e84
[bcachefs-tools-debian] / libbcache / error.c
1 #include "bcache.h"
2 #include "error.h"
3 #include "io.h"
4 #include "notify.h"
5 #include "super.h"
6
7 void bch_inconsistent_error(struct cache_set *c)
8 {
9         set_bit(CACHE_SET_ERROR, &c->flags);
10
11         switch (c->opts.errors) {
12         case BCH_ON_ERROR_CONTINUE:
13                 break;
14         case BCH_ON_ERROR_RO:
15                 if (!test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags)) {
16                         /* XXX do something better here? */
17                         bch_cache_set_stop(c);
18                         return;
19                 }
20
21                 if (bch_cache_set_emergency_read_only(c))
22                         bch_err(c, "emergency read only");
23                 break;
24         case BCH_ON_ERROR_PANIC:
25                 panic(bch_fmt(c, "panic after error"));
26                 break;
27         }
28 }
29
30 void bch_fatal_error(struct cache_set *c)
31 {
32         if (bch_cache_set_emergency_read_only(c))
33                 bch_err(c, "emergency read only");
34 }
35
36 /* Nonfatal IO errors, IO error/latency accounting: */
37
38 /* Just does IO error accounting: */
39 void bch_account_io_completion(struct cache *ca)
40 {
41         /*
42          * The halflife of an error is:
43          * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
44          */
45
46         if (ca->set->error_decay) {
47                 unsigned count = atomic_inc_return(&ca->io_count);
48
49                 while (count > ca->set->error_decay) {
50                         unsigned errors;
51                         unsigned old = count;
52                         unsigned new = count - ca->set->error_decay;
53
54                         /*
55                          * First we subtract refresh from count; each time we
56                          * succesfully do so, we rescale the errors once:
57                          */
58
59                         count = atomic_cmpxchg(&ca->io_count, old, new);
60
61                         if (count == old) {
62                                 count = new;
63
64                                 errors = atomic_read(&ca->io_errors);
65                                 do {
66                                         old = errors;
67                                         new = ((uint64_t) errors * 127) / 128;
68                                         errors = atomic_cmpxchg(&ca->io_errors,
69                                                                 old, new);
70                                 } while (old != errors);
71                         }
72                 }
73         }
74 }
75
76 /* IO error accounting and latency accounting: */
77 void bch_account_io_completion_time(struct cache *ca,
78                                     unsigned submit_time_us, int op)
79 {
80         struct cache_set *c;
81         unsigned threshold;
82
83         if (!ca)
84                 return;
85
86         c = ca->set;
87         threshold = op_is_write(op)
88                 ? c->congested_write_threshold_us
89                 : c->congested_read_threshold_us;
90
91         if (threshold && submit_time_us) {
92                 unsigned t = local_clock_us();
93
94                 int us = t - submit_time_us;
95                 int congested = atomic_read(&c->congested);
96
97                 if (us > (int) threshold) {
98                         int ms = us / 1024;
99                         c->congested_last_us = t;
100
101                         ms = min(ms, CONGESTED_MAX + congested);
102                         atomic_sub(ms, &c->congested);
103                 } else if (congested < 0)
104                         atomic_inc(&c->congested);
105         }
106
107         bch_account_io_completion(ca);
108 }
109
110 void bch_nonfatal_io_error_work(struct work_struct *work)
111 {
112         struct cache *ca = container_of(work, struct cache, io_error_work);
113         struct cache_set *c = ca->set;
114         unsigned errors = atomic_read(&ca->io_errors);
115         char buf[BDEVNAME_SIZE];
116         bool dev;
117
118         if (errors < c->error_limit) {
119                 bch_notify_cache_error(ca, false);
120         } else {
121                 bch_notify_cache_error(ca, true);
122
123                 mutex_lock(&bch_register_lock);
124                 dev = bch_cache_may_remove(ca);
125                 if (dev
126                     ? bch_cache_read_only(ca)
127                     : bch_cache_set_emergency_read_only(c))
128                         bch_err(c,
129                                 "too many IO errors on %s, setting %s RO",
130                                 bdevname(ca->disk_sb.bdev, buf),
131                                 dev ? "device" : "filesystem");
132                 mutex_unlock(&bch_register_lock);
133         }
134 }
135
136 void bch_nonfatal_io_error(struct cache *ca)
137 {
138         atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
139         queue_work(system_long_wq, &ca->io_error_work);
140 }