]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/error.c
Disable pristine-tar option in gbp.conf, since there is no pristine-tar branch.
[bcachefs-tools-debian] / libbcachefs / error.c
index 304ff92500be917022bad5e70e6e4c2a1458d8d4..8ae95b218e8b0c9c1f8ba3a0ab1c2a58cad2b66d 100644 (file)
@@ -1,23 +1,24 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "error.h"
-#include "io.h"
+#include "recovery.h"
 #include "super.h"
+#include <linux/thread_with_file.h>
 
 #define FSCK_ERR_RATELIMIT_NR  10
 
 bool bch2_inconsistent_error(struct bch_fs *c)
 {
-       set_bit(BCH_FS_ERROR, &c->flags);
+       set_bit(BCH_FS_error, &c->flags);
 
        switch (c->opts.errors) {
-       case BCH_ON_ERROR_CONTINUE:
+       case BCH_ON_ERROR_continue:
                return false;
-       case BCH_ON_ERROR_RO:
+       case BCH_ON_ERROR_ro:
                if (bch2_fs_emergency_read_only(c))
-                       bch_err(c, "emergency read only");
+                       bch_err(c, "inconsistency detected - emergency read only");
                return true;
-       case BCH_ON_ERROR_PANIC:
+       case BCH_ON_ERROR_panic:
                panic(bch2_fmt(c, "panic after error"));
                return true;
        default:
@@ -25,10 +26,22 @@ bool bch2_inconsistent_error(struct bch_fs *c)
        }
 }
 
+int bch2_topology_error(struct bch_fs *c)
+{
+       set_bit(BCH_FS_topology_error, &c->flags);
+       if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
+               bch2_inconsistent_error(c);
+               return -BCH_ERR_btree_need_topology_repair;
+       } else {
+               return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
+                       -BCH_ERR_btree_node_read_validate_error;
+       }
+}
+
 void bch2_fatal_error(struct bch_fs *c)
 {
        if (bch2_fs_emergency_read_only(c))
-               bch_err(c, "emergency read only");
+               bch_err(c, "fatal error - emergency read only");
 }
 
 void bch2_io_error_work(struct work_struct *work)
@@ -37,131 +50,294 @@ void bch2_io_error_work(struct work_struct *work)
        struct bch_fs *c = ca->fs;
        bool dev;
 
-       mutex_lock(&c->state_lock);
-       dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO,
+       down_write(&c->state_lock);
+       dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro,
                                    BCH_FORCE_IF_DEGRADED);
        if (dev
-           ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_RO,
+           ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro,
                                  BCH_FORCE_IF_DEGRADED)
            : bch2_fs_emergency_read_only(c))
                bch_err(ca,
                        "too many IO errors, setting %s RO",
                        dev ? "device" : "filesystem");
-       mutex_unlock(&c->state_lock);
+       up_write(&c->state_lock);
 }
 
-void bch2_io_error(struct bch_dev *ca)
+void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type)
 {
+       atomic64_inc(&ca->errors[type]);
        //queue_work(system_long_wq, &ca->io_error_work);
 }
 
+enum ask_yn {
+       YN_NO,
+       YN_YES,
+       YN_ALLNO,
+       YN_ALLYES,
+};
+
+static enum ask_yn parse_yn_response(char *buf)
+{
+       buf = strim(buf);
+
+       if (strlen(buf) == 1)
+               switch (buf[0]) {
+               case 'n':
+                       return YN_NO;
+               case 'y':
+                       return YN_YES;
+               case 'N':
+                       return YN_ALLNO;
+               case 'Y':
+                       return YN_ALLYES;
+               }
+       return -1;
+}
+
 #ifdef __KERNEL__
-#define ask_yn()       false
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
+{
+       struct stdio_redirect *stdio = c->stdio;
+
+       if (c->stdio_filter && c->stdio_filter != current)
+               stdio = NULL;
+
+       if (!stdio)
+               return YN_NO;
+
+       char buf[100];
+       int ret;
+
+       do {
+               bch2_print(c, " (y,n, or Y,N for all errors of this type) ");
+
+               int r = stdio_redirect_readline(stdio, buf, sizeof(buf) - 1);
+               if (r < 0)
+                       return YN_NO;
+               buf[r] = '\0';
+       } while ((ret = parse_yn_response(buf)) < 0);
+
+       return ret;
+}
 #else
+
 #include "tools-util.h"
-#endif
 
-enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
-                               const char *fmt, ...)
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
 {
-       struct fsck_err_state *s;
-       va_list args;
-       bool fix = false, print = true, suppressing = false;
-       char _buf[sizeof(s->buf)], *buf = _buf;
+       char *buf = NULL;
+       size_t buflen = 0;
+       int ret;
 
-       if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) {
-               va_start(args, fmt);
-               vprintk(fmt, args);
-               va_end(args);
+       do {
+               fputs(" (y,n, or Y,N for all errors of this type) ", stdout);
+               fflush(stdout);
 
-               return bch2_inconsistent_error(c)
-                       ? FSCK_ERR_EXIT
-                       : FSCK_ERR_FIX;
-       }
+               if (getline(&buf, &buflen, stdin) < 0)
+                       die("error reading from standard input");
+       } while ((ret = parse_yn_response(buf)) < 0);
+
+       free(buf);
+       return ret;
+}
+
+#endif
+
+static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
+{
+       struct fsck_err_state *s;
 
-       mutex_lock(&c->fsck_error_lock);
+       if (!test_bit(BCH_FS_fsck_running, &c->flags))
+               return NULL;
 
-       list_for_each_entry(s, &c->fsck_errors, list)
-               if (s->fmt == fmt)
-                       goto found;
+       list_for_each_entry(s, &c->fsck_error_msgs, list)
+               if (s->fmt == fmt) {
+                       /*
+                        * move it to the head of the list: repeated fsck errors
+                        * are common
+                        */
+                       list_move(&s->list, &c->fsck_error_msgs);
+                       return s;
+               }
 
-       s = kzalloc(sizeof(*s), GFP_KERNEL);
+       s = kzalloc(sizeof(*s), GFP_NOFS);
        if (!s) {
-               if (!c->fsck_alloc_err)
+               if (!c->fsck_alloc_msgs_err)
                        bch_err(c, "kmalloc err, cannot ratelimit fsck errs");
-               c->fsck_alloc_err = true;
-               buf = _buf;
-               goto print;
+               c->fsck_alloc_msgs_err = true;
+               return NULL;
        }
 
        INIT_LIST_HEAD(&s->list);
        s->fmt = fmt;
-found:
-       list_move(&s->list, &c->fsck_errors);
-       s->nr++;
-       suppressing     = s->nr == FSCK_ERR_RATELIMIT_NR;
-       print           = s->nr <= FSCK_ERR_RATELIMIT_NR;
-       buf             = s->buf;
-print:
+       list_add(&s->list, &c->fsck_error_msgs);
+       return s;
+}
+
+int bch2_fsck_err(struct bch_fs *c,
+                 enum bch_fsck_flags flags,
+                 enum bch_sb_error_id err,
+                 const char *fmt, ...)
+{
+       struct fsck_err_state *s = NULL;
+       va_list args;
+       bool print = true, suppressing = false, inconsistent = false;
+       struct printbuf buf = PRINTBUF, *out = &buf;
+       int ret = -BCH_ERR_fsck_ignore;
+
+       if ((flags & FSCK_CAN_FIX) &&
+           test_bit(err, c->sb.errors_silent))
+               return -BCH_ERR_fsck_fix;
+
+       bch2_sb_error_count(c, err);
+
        va_start(args, fmt);
-       vscnprintf(buf, sizeof(_buf), fmt, args);
+       prt_vprintf(out, fmt, args);
        va_end(args);
 
-       if (c->opts.fix_errors == FSCK_OPT_EXIT) {
-               bch_err(c, "%s, exiting", buf);
+       mutex_lock(&c->fsck_error_msgs_lock);
+       s = fsck_err_get(c, fmt);
+       if (s) {
+               /*
+                * We may be called multiple times for the same error on
+                * transaction restart - this memoizes instead of asking the user
+                * multiple times for the same error:
+                */
+               if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
+                       ret = s->ret;
+                       mutex_unlock(&c->fsck_error_msgs_lock);
+                       printbuf_exit(&buf);
+                       return ret;
+               }
+
+               kfree(s->last_msg);
+               s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
+
+               if (c->opts.ratelimit_errors &&
+                   !(flags & FSCK_NO_RATELIMIT) &&
+                   s->nr >= FSCK_ERR_RATELIMIT_NR) {
+                       if (s->nr == FSCK_ERR_RATELIMIT_NR)
+                               suppressing = true;
+                       else
+                               print = false;
+               }
+
+               s->nr++;
+       }
+
+#ifdef BCACHEFS_LOG_PREFIX
+       if (!strncmp(fmt, "bcachefs:", 9))
+               prt_printf(out, bch2_log_msg(c, ""));
+#endif
+
+       if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
+               if (c->opts.errors != BCH_ON_ERROR_continue ||
+                   !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
+                       prt_str(out, ", shutting down");
+                       inconsistent = true;
+                       ret = -BCH_ERR_fsck_errors_not_fixed;
+               } else if (flags & FSCK_CAN_FIX) {
+                       prt_str(out, ", fixing");
+                       ret = -BCH_ERR_fsck_fix;
+               } else {
+                       prt_str(out, ", continuing");
+                       ret = -BCH_ERR_fsck_ignore;
+               }
+       } else if (c->opts.fix_errors == FSCK_FIX_exit) {
+               prt_str(out, ", exiting");
+               ret = -BCH_ERR_fsck_errors_not_fixed;
        } else if (flags & FSCK_CAN_FIX) {
-               if (c->opts.fix_errors == FSCK_OPT_ASK) {
-                       printk(KERN_ERR "%s: fix?", buf);
-                       fix = ask_yn();
-               } else if (c->opts.fix_errors == FSCK_OPT_YES ||
+               int fix = s && s->fix
+                       ? s->fix
+                       : c->opts.fix_errors;
+
+               if (fix == FSCK_FIX_ask) {
+                       int ask;
+
+                       prt_str(out, ": fix?");
+                       if (bch2_fs_stdio_redirect(c))
+                               bch2_print(c, "%s", out->buf);
+                       else
+                               bch2_print_string_as_lines(KERN_ERR, out->buf);
+                       print = false;
+
+                       ask = bch2_fsck_ask_yn(c);
+
+                       if (ask >= YN_ALLNO && s)
+                               s->fix = ask == YN_ALLNO
+                                       ? FSCK_FIX_no
+                                       : FSCK_FIX_yes;
+
+                       ret = ask & 1
+                               ? -BCH_ERR_fsck_fix
+                               : -BCH_ERR_fsck_ignore;
+               } else if (fix == FSCK_FIX_yes ||
                           (c->opts.nochanges &&
                            !(flags & FSCK_CAN_IGNORE))) {
-                       if (print)
-                               bch_err(c, "%s, fixing", buf);
-                       fix = true;
+                       prt_str(out, ", fixing");
+                       ret = -BCH_ERR_fsck_fix;
                } else {
-                       if (print)
-                               bch_err(c, "%s, not fixing", buf);
-                       fix = false;
+                       prt_str(out, ", not fixing");
                }
        } else if (flags & FSCK_NEED_FSCK) {
-               if (print)
-                       bch_err(c, "%s (run fsck to correct)", buf);
+               prt_str(out, " (run fsck to correct)");
        } else {
-               if (print)
-                       bch_err(c, "%s (repair unimplemented)", buf);
+               prt_str(out, " (repair unimplemented)");
        }
 
-       if (suppressing)
+       if (ret == -BCH_ERR_fsck_ignore &&
+           (c->opts.fix_errors == FSCK_FIX_exit ||
+            !(flags & FSCK_CAN_IGNORE)))
+               ret = -BCH_ERR_fsck_errors_not_fixed;
+
+       if (print) {
+               if (bch2_fs_stdio_redirect(c))
+                       bch2_print(c, "%s\n", out->buf);
+               else
+                       bch2_print_string_as_lines(KERN_ERR, out->buf);
+       }
+
+       if (test_bit(BCH_FS_fsck_running, &c->flags) &&
+           (ret != -BCH_ERR_fsck_fix &&
+            ret != -BCH_ERR_fsck_ignore))
+               bch_err(c, "Unable to continue, halting");
+       else if (suppressing)
                bch_err(c, "Ratelimiting new instances of previous error");
 
-       mutex_unlock(&c->fsck_error_lock);
+       if (s)
+               s->ret = ret;
+
+       mutex_unlock(&c->fsck_error_msgs_lock);
 
-       if (fix) {
-               set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
-               return FSCK_ERR_FIX;
+       printbuf_exit(&buf);
+
+       if (inconsistent)
+               bch2_inconsistent_error(c);
+
+       if (ret == -BCH_ERR_fsck_fix) {
+               set_bit(BCH_FS_errors_fixed, &c->flags);
        } else {
-               set_bit(BCH_FS_ERROR, &c->flags);
-               return c->opts.fix_errors == FSCK_OPT_EXIT ||
-                       !(flags & FSCK_CAN_IGNORE)
-                       ? FSCK_ERR_EXIT
-                       : FSCK_ERR_IGNORE;
+               set_bit(BCH_FS_errors_not_fixed, &c->flags);
+               set_bit(BCH_FS_error, &c->flags);
        }
+
+       return ret;
 }
 
 void bch2_flush_fsck_errs(struct bch_fs *c)
 {
        struct fsck_err_state *s, *n;
 
-       mutex_lock(&c->fsck_error_lock);
+       mutex_lock(&c->fsck_error_msgs_lock);
 
-       list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
-               if (s->nr > FSCK_ERR_RATELIMIT_NR)
-                       bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->buf);
+       list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
+               if (s->ratelimited && s->last_msg)
+                       bch_err(c, "Saw %llu errors like:\n    %s", s->nr, s->last_msg);
 
                list_del(&s->list);
+               kfree(s->last_msg);
                kfree(s);
        }
 
-       mutex_unlock(&c->fsck_error_lock);
+       mutex_unlock(&c->fsck_error_msgs_lock);
 }