X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libbcachefs%2Ferror.c;h=d32c8bebe46c32f7abc1a11ad49ee80752f2a623;hb=b5fd066153c40a70a29caa1ea7987723ab687763;hp=ca2a06e2e4eb3dbd751aa24ecb4c1622cc2bb594;hpb=1cf4d51dc4661f336f5318c176a3561ddf5bf04f;p=bcachefs-tools-debian diff --git a/libbcachefs/error.c b/libbcachefs/error.c index ca2a06e..d32c8be 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -1,29 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "error.h" -#include "io.h" #include "super.h" +#include "thread_with_file.h" -void bch2_inconsistent_error(struct bch_fs *c) +#define FSCK_ERR_RATELIMIT_NR 10 + +bool bch2_inconsistent_error(struct bch_fs *c) { - set_bit(BCH_FS_ERROR, &c->flags); + set_bit(BCH_FS_error, &c->flags); switch (c->opts.errors) { - case BCH_ON_ERROR_CONTINUE: - break; - case BCH_ON_ERROR_RO: + case BCH_ON_ERROR_continue: + return false; + case BCH_ON_ERROR_ro: if (bch2_fs_emergency_read_only(c)) - bch_err(c, "emergency read only"); - break; - case BCH_ON_ERROR_PANIC: + bch_err(c, "inconsistency detected - emergency read only"); + return true; + case BCH_ON_ERROR_panic: panic(bch2_fmt(c, "panic after error")); - break; + return true; + default: + BUG(); } } +void bch2_topology_error(struct bch_fs *c) +{ + set_bit(BCH_FS_topology_error, &c->flags); + if (!test_bit(BCH_FS_fsck_running, &c->flags)) + bch2_inconsistent_error(c); +} + void bch2_fatal_error(struct bch_fs *c) { if (bch2_fs_emergency_read_only(c)) - bch_err(c, "emergency read only"); + bch_err(c, "fatal error - emergency read only"); } void bch2_io_error_work(struct work_struct *work) @@ -32,125 +44,294 @@ void bch2_io_error_work(struct work_struct *work) struct bch_fs *c = ca->fs; bool dev; - mutex_lock(&c->state_lock); - dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_RO, + down_write(&c->state_lock); + dev = bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_ro, BCH_FORCE_IF_DEGRADED); if (dev - ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_RO, + ? __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, BCH_FORCE_IF_DEGRADED) : bch2_fs_emergency_read_only(c)) bch_err(ca, "too many IO errors, setting %s RO", dev ? "device" : "filesystem"); - mutex_unlock(&c->state_lock); + up_write(&c->state_lock); } -void bch2_io_error(struct bch_dev *ca) +void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) { + atomic64_inc(&ca->errors[type]); //queue_work(system_long_wq, &ca->io_error_work); } +enum ask_yn { + YN_NO, + YN_YES, + YN_ALLNO, + YN_ALLYES, +}; + +static enum ask_yn parse_yn_response(char *buf) +{ + buf = strim(buf); + + if (strlen(buf) == 1) + switch (buf[0]) { + case 'n': + return YN_NO; + case 'y': + return YN_YES; + case 'N': + return YN_ALLNO; + case 'Y': + return YN_ALLYES; + } + return -1; +} + #ifdef __KERNEL__ -#define ask_yn() false +static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) +{ + struct stdio_redirect *stdio = c->stdio; + + if (c->stdio_filter && c->stdio_filter != current) + stdio = NULL; + + if (!stdio) + return YN_NO; + + char buf[100]; + int ret; + + do { + bch2_print(c, " (y,n, or Y,N for all errors of this type) "); + + int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1); + if (r < 0) + return YN_NO; + buf[r] = '\0'; + } while ((ret = parse_yn_response(buf)) < 0); + + return ret; +} #else + #include "tools-util.h" + +static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c) +{ + char *buf = NULL; + size_t buflen = 0; + int ret; + + do { + fputs(" (y,n, or Y,N for all errors of this type) ", stdout); + fflush(stdout); + + if (getline(&buf, &buflen, stdin) < 0) + die("error reading from standard input"); + } while ((ret = parse_yn_response(buf)) < 0); + + free(buf); + return ret; +} + #endif -enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags, - const char *fmt, ...) +static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) { struct fsck_err_state *s; - va_list args; - bool fix = false, print = true, suppressing = false; - char _buf[sizeof(s->buf)], *buf = _buf; - - mutex_lock(&c->fsck_error_lock); - if (test_bit(BCH_FS_FSCK_DONE, &c->flags)) - goto print; + if (!test_bit(BCH_FS_fsck_running, &c->flags)) + return NULL; - list_for_each_entry(s, &c->fsck_errors, list) - if (s->fmt == fmt) - goto found; + list_for_each_entry(s, &c->fsck_error_msgs, list) + if (s->fmt == fmt) { + /* + * move it to the head of the list: repeated fsck errors + * are common + */ + list_move(&s->list, &c->fsck_error_msgs); + return s; + } - s = kzalloc(sizeof(*s), GFP_KERNEL); + s = kzalloc(sizeof(*s), GFP_NOFS); if (!s) { - if (!c->fsck_alloc_err) + if (!c->fsck_alloc_msgs_err) bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); - c->fsck_alloc_err = true; - buf = _buf; - goto print; + c->fsck_alloc_msgs_err = true; + return NULL; } INIT_LIST_HEAD(&s->list); s->fmt = fmt; -found: - list_move(&s->list, &c->fsck_errors); - s->nr++; - suppressing = s->nr == 10; - print = s->nr <= 10; - buf = s->buf; -print: + list_add(&s->list, &c->fsck_error_msgs); + return s; +} + +int bch2_fsck_err(struct bch_fs *c, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) +{ + struct fsck_err_state *s = NULL; + va_list args; + bool print = true, suppressing = false, inconsistent = false; + struct printbuf buf = PRINTBUF, *out = &buf; + int ret = -BCH_ERR_fsck_ignore; + + if ((flags & FSCK_CAN_FIX) && + test_bit(err, c->sb.errors_silent)) + return -BCH_ERR_fsck_fix; + + bch2_sb_error_count(c, err); + va_start(args, fmt); - vscnprintf(buf, sizeof(_buf), fmt, args); + prt_vprintf(out, fmt, args); va_end(args); - if (c->opts.fix_errors == FSCK_OPT_EXIT) { - bch_err(c, "%s, exiting", buf); - mutex_unlock(&c->fsck_error_lock); - return FSCK_ERR_EXIT; + mutex_lock(&c->fsck_error_msgs_lock); + s = fsck_err_get(c, fmt); + if (s) { + /* + * We may be called multiple times for the same error on + * transaction restart - this memoizes instead of asking the user + * multiple times for the same error: + */ + if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { + ret = s->ret; + mutex_unlock(&c->fsck_error_msgs_lock); + printbuf_exit(&buf); + return ret; + } + + kfree(s->last_msg); + s->last_msg = kstrdup(buf.buf, GFP_KERNEL); + + if (c->opts.ratelimit_errors && + !(flags & FSCK_NO_RATELIMIT) && + s->nr >= FSCK_ERR_RATELIMIT_NR) { + if (s->nr == FSCK_ERR_RATELIMIT_NR) + suppressing = true; + else + print = false; + } + + s->nr++; } - if (flags & FSCK_CAN_FIX) { - if (c->opts.fix_errors == FSCK_OPT_ASK) { - printk(KERN_ERR "%s: fix?", buf); - fix = ask_yn(); - } else if (c->opts.fix_errors == FSCK_OPT_YES || +#ifdef BCACHEFS_LOG_PREFIX + if (!strncmp(fmt, "bcachefs:", 9)) + prt_printf(out, bch2_log_msg(c, "")); +#endif + + if (!test_bit(BCH_FS_fsck_running, &c->flags)) { + if (c->opts.errors != BCH_ON_ERROR_continue || + !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { + prt_str(out, ", shutting down"); + inconsistent = true; + ret = -BCH_ERR_fsck_errors_not_fixed; + } else if (flags & FSCK_CAN_FIX) { + prt_str(out, ", fixing"); + ret = -BCH_ERR_fsck_fix; + } else { + prt_str(out, ", continuing"); + ret = -BCH_ERR_fsck_ignore; + } + } else if (c->opts.fix_errors == FSCK_FIX_exit) { + prt_str(out, ", exiting"); + ret = -BCH_ERR_fsck_errors_not_fixed; + } else if (flags & FSCK_CAN_FIX) { + int fix = s && s->fix + ? s->fix + : c->opts.fix_errors; + + if (fix == FSCK_FIX_ask) { + int ask; + + prt_str(out, ": fix?"); + if (bch2_fs_stdio_redirect(c)) + bch2_print(c, "%s", out->buf); + else + bch2_print_string_as_lines(KERN_ERR, out->buf); + print = false; + + ask = bch2_fsck_ask_yn(c); + + if (ask >= YN_ALLNO && s) + s->fix = ask == YN_ALLNO + ? FSCK_FIX_no + : FSCK_FIX_yes; + + ret = ask & 1 + ? -BCH_ERR_fsck_fix + : -BCH_ERR_fsck_ignore; + } else if (fix == FSCK_FIX_yes || (c->opts.nochanges && !(flags & FSCK_CAN_IGNORE))) { - if (print) - bch_err(c, "%s, fixing", buf); - fix = true; + prt_str(out, ", fixing"); + ret = -BCH_ERR_fsck_fix; } else { - if (print) - bch_err(c, "%s, not fixing", buf); - fix = false; + prt_str(out, ", not fixing"); } } else if (flags & FSCK_NEED_FSCK) { - if (print) - bch_err(c, "%s (run fsck to correct)", buf); + prt_str(out, " (run fsck to correct)"); } else { - if (print) - bch_err(c, "%s (repair unimplemented)", buf); + prt_str(out, " (repair unimplemented)"); + } + + if (ret == -BCH_ERR_fsck_ignore && + (c->opts.fix_errors == FSCK_FIX_exit || + !(flags & FSCK_CAN_IGNORE))) + ret = -BCH_ERR_fsck_errors_not_fixed; + + if (print) { + if (bch2_fs_stdio_redirect(c)) + bch2_print(c, "%s\n", out->buf); + else + bch2_print_string_as_lines(KERN_ERR, out->buf); } - if (suppressing) + if (test_bit(BCH_FS_fsck_running, &c->flags) && + (ret != -BCH_ERR_fsck_fix && + ret != -BCH_ERR_fsck_ignore)) + bch_err(c, "Unable to continue, halting"); + else if (suppressing) bch_err(c, "Ratelimiting new instances of previous error"); - mutex_unlock(&c->fsck_error_lock); + if (s) + s->ret = ret; + + mutex_unlock(&c->fsck_error_msgs_lock); + + printbuf_exit(&buf); - if (fix) - set_bit(BCH_FS_FSCK_FIXED_ERRORS, &c->flags); + if (inconsistent) + bch2_inconsistent_error(c); + + if (ret == -BCH_ERR_fsck_fix) { + set_bit(BCH_FS_errors_fixed, &c->flags); + } else { + set_bit(BCH_FS_errors_not_fixed, &c->flags); + set_bit(BCH_FS_error, &c->flags); + } - return fix ? FSCK_ERR_FIX - : flags & FSCK_CAN_IGNORE ? FSCK_ERR_IGNORE - : FSCK_ERR_EXIT; + return ret; } void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; - mutex_lock(&c->fsck_error_lock); - set_bit(BCH_FS_FSCK_DONE, &c->flags); + mutex_lock(&c->fsck_error_msgs_lock); - list_for_each_entry_safe(s, n, &c->fsck_errors, list) { - if (s->nr > 10) - bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf); + list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { + if (s->ratelimited && s->last_msg) + bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); list_del(&s->list); + kfree(s->last_msg); kfree(s); } - mutex_unlock(&c->fsck_error_lock); + mutex_unlock(&c->fsck_error_msgs_lock); }