]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/error.c
Disable pristine-tar option in gbp.conf, since there is no pristine-tar branch.
[bcachefs-tools-debian] / libbcachefs / error.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "error.h"
4 #include "recovery.h"
5 #include "super.h"
6 #include <linux/thread_with_file.h>
7
8 #define FSCK_ERR_RATELIMIT_NR   10
9
10 bool bch2_inconsistent_error(struct bch_fs *c)
11 {
12         set_bit(BCH_FS_error, &c->flags);
13
14         switch (c->opts.errors) {
15         case BCH_ON_ERROR_continue:
16                 return false;
17         case BCH_ON_ERROR_ro:
18                 if (bch2_fs_emergency_read_only(c))
19                         bch_err(c, "inconsistency detected - emergency read only");
20                 return true;
21         case BCH_ON_ERROR_panic:
22                 panic(bch2_fmt(c, "panic after error"));
23                 return true;
24         default:
25                 BUG();
26         }
27 }
28
29 int bch2_topology_error(struct bch_fs *c)
30 {
31         set_bit(BCH_FS_topology_error, &c->flags);
32         if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
33                 bch2_inconsistent_error(c);
34                 return -BCH_ERR_btree_need_topology_repair;
35         } else {
36                 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
37                         -BCH_ERR_btree_node_read_validate_error;
38         }
39 }
40
41 void bch2_fatal_error(struct bch_fs *c)
42 {
43         if (bch2_fs_emergency_read_only(c))
44                 bch_err(c, "fatal error - emergency read only");
45 }
46
47 void bch2_io_error_work(struct work_struct *work)
48 {
49         struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
50         struct bch_fs *c = ca->fs;
51         bool dev;
52
53         down_write(&c->state_lock);
54         dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro,
55                                     BCH_FORCE_IF_DEGRADED);
56         if (dev
57             ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
58                                   BCH_FORCE_IF_DEGRADED)
59             : bch2_fs_emergency_read_only(c))
60                 bch_err(ca,
61                         "too many IO errors, setting %s RO",
62                         dev ? "device" : "filesystem");
63         up_write(&c->state_lock);
64 }
65
66 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
67 {
68         atomic64_inc(&ca->errors[type]);
69         //queue_work(system_long_wq, &ca->io_error_work);
70 }
71
72 enum ask_yn {
73         YN_NO,
74         YN_YES,
75         YN_ALLNO,
76         YN_ALLYES,
77 };
78
79 static enum ask_yn parse_yn_response(char *buf)
80 {
81         buf = strim(buf);
82
83         if (strlen(buf) == 1)
84                 switch (buf[0]) {
85                 case 'n':
86                         return YN_NO;
87                 case 'y':
88                         return YN_YES;
89                 case 'N':
90                         return YN_ALLNO;
91                 case 'Y':
92                         return YN_ALLYES;
93                 }
94         return -1;
95 }
96
97 #ifdef __KERNEL__
98 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
99 {
100         struct stdio_redirect *stdio = c->stdio;
101
102         if (c->stdio_filter && c->stdio_filter != current)
103                 stdio = NULL;
104
105         if (!stdio)
106                 return YN_NO;
107
108         char buf[100];
109         int ret;
110
111         do {
112                 bch2_print(c, " (y,n, or Y,N for all errors of this type) ");
113
114                 int r = stdio_redirect_readline(stdio, buf, sizeof(buf) - 1);
115                 if (r < 0)
116                         return YN_NO;
117                 buf[r] = '\0';
118         } while ((ret = parse_yn_response(buf)) < 0);
119
120         return ret;
121 }
122 #else
123
124 #include "tools-util.h"
125
126 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
127 {
128         char *buf = NULL;
129         size_t buflen = 0;
130         int ret;
131
132         do {
133                 fputs(" (y,n, or Y,N for all errors of this type) ", stdout);
134                 fflush(stdout);
135
136                 if (getline(&buf, &buflen, stdin) < 0)
137                         die("error reading from standard input");
138         } while ((ret = parse_yn_response(buf)) < 0);
139
140         free(buf);
141         return ret;
142 }
143
144 #endif
145
146 static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
147 {
148         struct fsck_err_state *s;
149
150         if (!test_bit(BCH_FS_fsck_running, &c->flags))
151                 return NULL;
152
153         list_for_each_entry(s, &c->fsck_error_msgs, list)
154                 if (s->fmt == fmt) {
155                         /*
156                          * move it to the head of the list: repeated fsck errors
157                          * are common
158                          */
159                         list_move(&s->list, &c->fsck_error_msgs);
160                         return s;
161                 }
162
163         s = kzalloc(sizeof(*s), GFP_NOFS);
164         if (!s) {
165                 if (!c->fsck_alloc_msgs_err)
166                         bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
167                 c->fsck_alloc_msgs_err = true;
168                 return NULL;
169         }
170
171         INIT_LIST_HEAD(&s->list);
172         s->fmt = fmt;
173         list_add(&s->list, &c->fsck_error_msgs);
174         return s;
175 }
176
177 int bch2_fsck_err(struct bch_fs *c,
178                   enum bch_fsck_flags flags,
179                   enum bch_sb_error_id err,
180                   const char *fmt, ...)
181 {
182         struct fsck_err_state *s = NULL;
183         va_list args;
184         bool print = true, suppressing = false, inconsistent = false;
185         struct printbuf buf = PRINTBUF, *out = &buf;
186         int ret = -BCH_ERR_fsck_ignore;
187
188         if ((flags & FSCK_CAN_FIX) &&
189             test_bit(err, c->sb.errors_silent))
190                 return -BCH_ERR_fsck_fix;
191
192         bch2_sb_error_count(c, err);
193
194         va_start(args, fmt);
195         prt_vprintf(out, fmt, args);
196         va_end(args);
197
198         mutex_lock(&c->fsck_error_msgs_lock);
199         s = fsck_err_get(c, fmt);
200         if (s) {
201                 /*
202                  * We may be called multiple times for the same error on
203                  * transaction restart - this memoizes instead of asking the user
204                  * multiple times for the same error:
205                  */
206                 if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
207                         ret = s->ret;
208                         mutex_unlock(&c->fsck_error_msgs_lock);
209                         printbuf_exit(&buf);
210                         return ret;
211                 }
212
213                 kfree(s->last_msg);
214                 s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
215
216                 if (c->opts.ratelimit_errors &&
217                     !(flags & FSCK_NO_RATELIMIT) &&
218                     s->nr >= FSCK_ERR_RATELIMIT_NR) {
219                         if (s->nr == FSCK_ERR_RATELIMIT_NR)
220                                 suppressing = true;
221                         else
222                                 print = false;
223                 }
224
225                 s->nr++;
226         }
227
228 #ifdef BCACHEFS_LOG_PREFIX
229         if (!strncmp(fmt, "bcachefs:", 9))
230                 prt_printf(out, bch2_log_msg(c, ""));
231 #endif
232
233         if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
234                 if (c->opts.errors != BCH_ON_ERROR_continue ||
235                     !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
236                         prt_str(out, ", shutting down");
237                         inconsistent = true;
238                         ret = -BCH_ERR_fsck_errors_not_fixed;
239                 } else if (flags & FSCK_CAN_FIX) {
240                         prt_str(out, ", fixing");
241                         ret = -BCH_ERR_fsck_fix;
242                 } else {
243                         prt_str(out, ", continuing");
244                         ret = -BCH_ERR_fsck_ignore;
245                 }
246         } else if (c->opts.fix_errors == FSCK_FIX_exit) {
247                 prt_str(out, ", exiting");
248                 ret = -BCH_ERR_fsck_errors_not_fixed;
249         } else if (flags & FSCK_CAN_FIX) {
250                 int fix = s && s->fix
251                         ? s->fix
252                         : c->opts.fix_errors;
253
254                 if (fix == FSCK_FIX_ask) {
255                         int ask;
256
257                         prt_str(out, ": fix?");
258                         if (bch2_fs_stdio_redirect(c))
259                                 bch2_print(c, "%s", out->buf);
260                         else
261                                 bch2_print_string_as_lines(KERN_ERR, out->buf);
262                         print = false;
263
264                         ask = bch2_fsck_ask_yn(c);
265
266                         if (ask >= YN_ALLNO && s)
267                                 s->fix = ask == YN_ALLNO
268                                         ? FSCK_FIX_no
269                                         : FSCK_FIX_yes;
270
271                         ret = ask & 1
272                                 ? -BCH_ERR_fsck_fix
273                                 : -BCH_ERR_fsck_ignore;
274                 } else if (fix == FSCK_FIX_yes ||
275                            (c->opts.nochanges &&
276                             !(flags & FSCK_CAN_IGNORE))) {
277                         prt_str(out, ", fixing");
278                         ret = -BCH_ERR_fsck_fix;
279                 } else {
280                         prt_str(out, ", not fixing");
281                 }
282         } else if (flags & FSCK_NEED_FSCK) {
283                 prt_str(out, " (run fsck to correct)");
284         } else {
285                 prt_str(out, " (repair unimplemented)");
286         }
287
288         if (ret == -BCH_ERR_fsck_ignore &&
289             (c->opts.fix_errors == FSCK_FIX_exit ||
290              !(flags & FSCK_CAN_IGNORE)))
291                 ret = -BCH_ERR_fsck_errors_not_fixed;
292
293         if (print) {
294                 if (bch2_fs_stdio_redirect(c))
295                         bch2_print(c, "%s\n", out->buf);
296                 else
297                         bch2_print_string_as_lines(KERN_ERR, out->buf);
298         }
299
300         if (test_bit(BCH_FS_fsck_running, &c->flags) &&
301             (ret != -BCH_ERR_fsck_fix &&
302              ret != -BCH_ERR_fsck_ignore))
303                 bch_err(c, "Unable to continue, halting");
304         else if (suppressing)
305                 bch_err(c, "Ratelimiting new instances of previous error");
306
307         if (s)
308                 s->ret = ret;
309
310         mutex_unlock(&c->fsck_error_msgs_lock);
311
312         printbuf_exit(&buf);
313
314         if (inconsistent)
315                 bch2_inconsistent_error(c);
316
317         if (ret == -BCH_ERR_fsck_fix) {
318                 set_bit(BCH_FS_errors_fixed, &c->flags);
319         } else {
320                 set_bit(BCH_FS_errors_not_fixed, &c->flags);
321                 set_bit(BCH_FS_error, &c->flags);
322         }
323
324         return ret;
325 }
326
327 void bch2_flush_fsck_errs(struct bch_fs *c)
328 {
329         struct fsck_err_state *s, *n;
330
331         mutex_lock(&c->fsck_error_msgs_lock);
332
333         list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
334                 if (s->ratelimited && s->last_msg)
335                         bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->last_msg);
336
337                 list_del(&s->list);
338                 kfree(s->last_msg);
339                 kfree(s);
340         }
341
342         mutex_unlock(&c->fsck_error_msgs_lock);
343 }