git.sesse.net Git - bcachefs-tools-debian/blob - libbcache/error.c

   1 #include "bcache.h"
   2 #include "error.h"
   3 #include "io.h"
   4 #include "notify.h"
   5 #include "super.h"
   6
   7 void bch_inconsistent_error(struct cache_set *c)
   8 {
   9         set_bit(CACHE_SET_ERROR, &c->flags);
  10
  11         switch (c->opts.errors) {
  12         case BCH_ON_ERROR_CONTINUE:
  13                 break;
  14         case BCH_ON_ERROR_RO:
  15                 if (!test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags)) {
  16                         /* XXX do something better here? */
  17                         bch_cache_set_stop(c);
  18                         return;
  19                 }
  20
  21                 if (bch_cache_set_emergency_read_only(c))
  22                         bch_err(c, "emergency read only");
  23                 break;
  24         case BCH_ON_ERROR_PANIC:
  25                 panic(bch_fmt(c, "panic after error"));
  26                 break;
  27         }
  28 }
  29
  30 void bch_fatal_error(struct cache_set *c)
  31 {
  32         if (bch_cache_set_emergency_read_only(c))
  33                 bch_err(c, "emergency read only");
  34 }
  35
  36 /* Nonfatal IO errors, IO error/latency accounting: */
  37
  38 /* Just does IO error accounting: */
  39 void bch_account_io_completion(struct cache *ca)
  40 {
  41         /*
  42          * The halflife of an error is:
  43          * log2(1/2)/log2(127/128) * refresh ~= 88 * refresh
  44          */
  45
  46         if (ca->set->error_decay) {
  47                 unsigned count = atomic_inc_return(&ca->io_count);
  48
  49                 while (count > ca->set->error_decay) {
  50                         unsigned errors;
  51                         unsigned old = count;
  52                         unsigned new = count - ca->set->error_decay;
  53
  54                         /*
  55                          * First we subtract refresh from count; each time we
  56                          * succesfully do so, we rescale the errors once:
  57                          */
  58
  59                         count = atomic_cmpxchg(&ca->io_count, old, new);
  60
  61                         if (count == old) {
  62                                 count = new;
  63
  64                                 errors = atomic_read(&ca->io_errors);
  65                                 do {
  66                                         old = errors;
  67                                         new = ((uint64_t) errors * 127) / 128;
  68                                         errors = atomic_cmpxchg(&ca->io_errors,
  69                                                                 old, new);
  70                                 } while (old != errors);
  71                         }
  72                 }
  73         }
  74 }
  75
  76 /* IO error accounting and latency accounting: */
  77 void bch_account_io_completion_time(struct cache *ca,
  78                                     unsigned submit_time_us, int op)
  79 {
  80         struct cache_set *c;
  81         unsigned threshold;
  82
  83         if (!ca)
  84                 return;
  85
  86         c = ca->set;
  87         threshold = op_is_write(op)
  88                 ? c->congested_write_threshold_us
  89                 : c->congested_read_threshold_us;
  90
  91         if (threshold && submit_time_us) {
  92                 unsigned t = local_clock_us();
  93
  94                 int us = t - submit_time_us;
  95                 int congested = atomic_read(&c->congested);
  96
  97                 if (us > (int) threshold) {
  98                         int ms = us / 1024;
  99                         c->congested_last_us = t;
 100
 101                         ms = min(ms, CONGESTED_MAX + congested);
 102                         atomic_sub(ms, &c->congested);
 103                 } else if (congested < 0)
 104                         atomic_inc(&c->congested);
 105         }
 106
 107         bch_account_io_completion(ca);
 108 }
 109
 110 void bch_nonfatal_io_error_work(struct work_struct *work)
 111 {
 112         struct cache *ca = container_of(work, struct cache, io_error_work);
 113         struct cache_set *c = ca->set;
 114         unsigned errors = atomic_read(&ca->io_errors);
 115         char buf[BDEVNAME_SIZE];
 116         bool dev;
 117
 118         if (errors < c->error_limit) {
 119                 bch_notify_cache_error(ca, false);
 120         } else {
 121                 bch_notify_cache_error(ca, true);
 122
 123                 mutex_lock(&bch_register_lock);
 124                 dev = bch_cache_may_remove(ca);
 125                 if (dev
 126                     ? bch_cache_read_only(ca)
 127                     : bch_cache_set_emergency_read_only(c))
 128                         bch_err(c,
 129                                 "too many IO errors on %s, setting %s RO",
 130                                 bdevname(ca->disk_sb.bdev, buf),
 131                                 dev ? "device" : "filesystem");
 132                 mutex_unlock(&bch_register_lock);
 133         }
 134 }
 135
 136 void bch_nonfatal_io_error(struct cache *ca)
 137 {
 138         atomic_add(1 << IO_ERROR_SHIFT, &ca->io_errors);
 139         queue_work(system_long_wq, &ca->io_error_work);
 140 }