check if fs is mounted before running fsck

[bcachefs-tools-debian] / libbcachefs / journal.c
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c

index b3099fdf02dd9bd5060cb9f37f9bc2939b5e65f4..aabb68d2faa74f9f2d08f0b1b827d00aa81e4131 100644 (file)
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -84,17 +84,12 @@ void bch2_journal_halt(struct journal *j)
  
         journal_wake(j);
         closure_wake_up(&journal_cur_buf(j)->wait);
-       closure_wake_up(&journal_prev_buf(j)->wait);
  }
  
  /* journal entry close/open: */
  
  void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
  {
-       struct journal_buf *w = journal_prev_buf(j);
-
-       atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
-
         if (!need_write_just_set &&
             test_bit(JOURNAL_NEED_WRITE, &j->flags))
                 bch2_time_stats_update(j->delay_time,
@@ -175,7 +170,6 @@ static bool __journal_entry_close(struct journal *j)
          * Hence, we want update/set last_seq on the current journal entry right
          * before we open a new one:
          */
-       bch2_journal_reclaim_fast(j);
         buf->data->last_seq     = cpu_to_le64(journal_last_seq(j));
  
         if (journal_entry_empty(buf->data))
@@ -189,8 +183,8 @@ static bool __journal_entry_close(struct journal *j)
  
         cancel_delayed_work(&j->write_work);
  
-       /* ugh - might be called from __journal_res_get() under wait_event() */
-       __set_current_state(TASK_RUNNING);
+       bch2_journal_space_available(j);
+
         bch2_journal_buf_put(j, old.idx, set_need_write);
         return true;
  }
@@ -220,7 +214,7 @@ static int journal_entry_open(struct journal *j)
  {
         struct journal_buf *buf = journal_cur_buf(j);
         union journal_res_state old, new;
-       int u64s, ret;
+       int u64s;
         u64 v;
  
         lockdep_assert_held(&j->lock);
@@ -229,12 +223,10 @@ static int journal_entry_open(struct journal *j)
         if (j->blocked)
                 return -EAGAIN;
  
-       if (!fifo_free(&j->pin))
-               return -ENOSPC;
+       if (j->cur_entry_error)
+               return j->cur_entry_error;
  
-       ret = bch2_journal_space_available(j);
-       if (ret)
-               return ret;
+       BUG_ON(!j->cur_entry_sectors);
  
         buf->u64s_reserved      = j->entry_u64s_reserved;
         buf->disk_sectors       = j->cur_entry_sectors;
@@ -261,6 +253,8 @@ static int journal_entry_open(struct journal *j)
  
                 /* Handle any already added entries */
                 new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
+
+               EBUG_ON(journal_state_count(new, new.idx));
                 journal_state_inc(&new);
         } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                        old.v, new.v)) != old.v);
@@ -328,6 +322,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
  {
         struct bch_fs *c = container_of(j, struct bch_fs, journal);
         struct journal_buf *buf;
+       bool can_discard;
         int ret;
  retry:
         if (journal_res_get_fast(j, res, flags))
@@ -348,6 +343,16 @@ retry:
                 return 0;
         }
  
+       if (!(flags & JOURNAL_RES_GET_RESERVED) &&
+           !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+               /*
+                * Don't want to close current journal entry, just need to
+                * invoke reclaim:
+                */
+               ret = -ENOSPC;
+               goto unlock;
+       }
+
         /*
          * If we couldn't get a reservation because the current buf filled up,
          * and we had room for a bigger entry on disk, signal that we want to
@@ -371,22 +376,38 @@ retry:
         } else {
                 ret = journal_entry_open(j);
         }
-
+unlock:
         if ((ret == -EAGAIN || ret == -ENOSPC) &&
             !j->res_get_blocked_start)
                 j->res_get_blocked_start = local_clock() ?: 1;
  
+       can_discard = j->can_discard;
         spin_unlock(&j->lock);
  
         if (!ret)
                 goto retry;
+
         if (ret == -ENOSPC) {
+               BUG_ON(!can_discard && (flags & JOURNAL_RES_GET_RESERVED));
+
                 /*
                  * Journal is full - can't rely on reclaim from work item due to
                  * freezing:
                  */
                 trace_journal_full(c);
-               bch2_journal_reclaim_work(&j->reclaim_work.work);
+
+               if (!(flags & JOURNAL_RES_GET_NONBLOCK)) {
+                       if (can_discard) {
+                               bch2_journal_do_discards(j);
+                               goto retry;
+                       }
+
+                       if (mutex_trylock(&j->reclaim_lock)) {
+                               bch2_journal_reclaim(j);
+                               mutex_unlock(&j->reclaim_lock);
+                       }
+               }
+
                 ret = -EAGAIN;
         }
  
@@ -408,12 +429,38 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
  {
         int ret;
  
-       wait_event(j->wait,
+       closure_wait_event(&j->async_wait,
                    (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
                    (flags & JOURNAL_RES_GET_NONBLOCK));
         return ret;
  }
  
+/* journal_preres: */
+
+static bool journal_preres_available(struct journal *j,
+                                    struct journal_preres *res,
+                                    unsigned new_u64s)
+{
+       bool ret = bch2_journal_preres_get_fast(j, res, new_u64s);
+
+       if (!ret)
+               bch2_journal_reclaim_work(&j->reclaim_work.work);
+
+       return ret;
+}
+
+int __bch2_journal_preres_get(struct journal *j,
+                             struct journal_preres *res,
+                             unsigned new_u64s)
+{
+       int ret;
+
+       closure_wait_event(&j->preres_wait,
+                  (ret = bch2_journal_error(j)) ||
+                  journal_preres_available(j, res, new_u64s));
+       return ret;
+}
+
  /* journal_entry_res: */
  
  void bch2_journal_entry_res_resize(struct journal *j,
@@ -429,7 +476,7 @@ void bch2_journal_entry_res_resize(struct journal *j,
         if (d <= 0)
                 goto out;
  
-       j->cur_entry_u64s -= d;
+       j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
         smp_mb();
         state = READ_ONCE(j->reservations);
  
@@ -765,6 +812,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
  
         while (ja->nr < nr) {
                 struct open_bucket *ob = NULL;
+               unsigned pos;
                 long bucket;
  
                 if (new_fs) {
@@ -791,20 +839,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                         preempt_disable();
                 }
  
-               __array_insert_item(ja->buckets,                ja->nr, ja->last_idx);
-               __array_insert_item(ja->bucket_seq,             ja->nr, ja->last_idx);
-               __array_insert_item(journal_buckets->buckets,   ja->nr, ja->last_idx);
+               pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+               __array_insert_item(ja->buckets,                ja->nr, pos);
+               __array_insert_item(ja->bucket_seq,             ja->nr, pos);
+               __array_insert_item(journal_buckets->buckets,   ja->nr, pos);
+               ja->nr++;
  
-               ja->buckets[ja->last_idx] = bucket;
-               ja->bucket_seq[ja->last_idx] = 0;
-               journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket);
+               ja->buckets[pos] = bucket;
+               ja->bucket_seq[pos] = 0;
+               journal_buckets->buckets[pos] = cpu_to_le64(bucket);
  
-               if (ja->last_idx < ja->nr) {
-                       if (ja->cur_idx >= ja->last_idx)
-                               ja->cur_idx++;
-                       ja->last_idx++;
-               }
-               ja->nr++;
+               if (pos <= ja->discard_idx)
+                       ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+               if (pos <= ja->dirty_idx_ondisk)
+                       ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+               if (pos <= ja->dirty_idx)
+                       ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+               if (pos <= ja->cur_idx)
+                       ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
  
                 bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
                                           ca->mi.bucket_size,
@@ -966,6 +1018,7 @@ void bch2_fs_journal_start(struct journal *j)
  
         c->last_bucket_seq_cleanup = journal_cur_seq(j);
  
+       bch2_journal_space_available(j);
         spin_unlock(&j->lock);
  
         /*
@@ -974,8 +1027,6 @@ void bch2_fs_journal_start(struct journal *j)
          * only have to go down with the next journal entry we write:
          */
         bch2_journal_seq_blacklist_write(j);
-
-       queue_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
  }
  
  /* init/exit: */
@@ -1043,6 +1094,7 @@ int bch2_fs_journal_init(struct journal *j)
         mutex_init(&j->blacklist_lock);
         INIT_LIST_HEAD(&j->seq_blacklist);
         mutex_init(&j->reclaim_lock);
+       mutex_init(&j->discard_lock);
  
         lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
  
@@ -1078,35 +1130,60 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
  {
         struct printbuf out = _PBUF(buf, PAGE_SIZE);
         struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       union journal_res_state *s = &j->reservations;
+       union journal_res_state s;
         struct bch_dev *ca;
         unsigned iter;
  
         rcu_read_lock();
         spin_lock(&j->lock);
+       s = READ_ONCE(j->reservations);
  
         pr_buf(&out,
                "active journal entries:\t%llu\n"
                "seq:\t\t\t%llu\n"
                "last_seq:\t\t%llu\n"
                "last_seq_ondisk:\t%llu\n"
-              "reservation count:\t%u\n"
-              "reservation offset:\t%u\n"
-              "current entry u64s:\t%u\n"
-              "io in flight:\t\t%i\n"
-              "need write:\t\t%i\n"
-              "dirty:\t\t\t%i\n"
-              "replay done:\t\t%i\n",
+              "prereserved:\t\t%u/%u\n"
+              "current entry sectors:\t%u\n"
+              "current entry:\t\t",
                fifo_used(&j->pin),
                journal_cur_seq(j),
                journal_last_seq(j),
                j->last_seq_ondisk,
-              journal_state_count(*s, s->idx),
-              s->cur_entry_offset,
-              j->cur_entry_u64s,
-              s->prev_buf_unwritten,
+              j->prereserved.reserved,
+              j->prereserved.remaining,
+              j->cur_entry_sectors);
+
+       switch (s.cur_entry_offset) {
+       case JOURNAL_ENTRY_ERROR_VAL:
+               pr_buf(&out, "error\n");
+               break;
+       case JOURNAL_ENTRY_CLOSED_VAL:
+               pr_buf(&out, "closed\n");
+               break;
+       default:
+               pr_buf(&out, "%u/%u\n",
+                      s.cur_entry_offset,
+                      j->cur_entry_u64s);
+               break;
+       }
+
+       pr_buf(&out,
+              "current entry refs:\t%u\n"
+              "prev entry unwritten:\t",
+              journal_state_count(s, s.idx));
+
+       if (s.prev_buf_unwritten)
+               pr_buf(&out, "yes, ref %u sectors %u\n",
+                      journal_state_count(s, !s.idx),
+                      journal_prev_buf(j)->sectors);
+       else
+               pr_buf(&out, "no\n");
+
+       pr_buf(&out,
+              "need write:\t\t%i\n"
+              "replay done:\t\t%i\n",
                test_bit(JOURNAL_NEED_WRITE,     &j->flags),
-              journal_entry_is_open(j),
                test_bit(JOURNAL_REPLAY_DONE,    &j->flags));
  
         for_each_member_device_rcu(ca, c, iter,
@@ -1119,11 +1196,18 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
                 pr_buf(&out,
                        "dev %u:\n"
                        "\tnr\t\t%u\n"
-                      "\tcur_idx\t\t%u (seq %llu)\n"
-                      "\tlast_idx\t%u (seq %llu)\n",
+                      "\tavailable\t%u:%u\n"
+                      "\tdiscard_idx\t\t%u\n"
+                      "\tdirty_idx_ondisk\t%u (seq %llu)\n"
+                      "\tdirty_idx\t\t%u (seq %llu)\n"
+                      "\tcur_idx\t\t%u (seq %llu)\n",
                        iter, ja->nr,
-                      ja->cur_idx,     ja->bucket_seq[ja->cur_idx],
-                      ja->last_idx,    ja->bucket_seq[ja->last_idx]);
+                      bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
+                      ja->sectors_free,
+                      ja->discard_idx,
+                      ja->dirty_idx_ondisk,    ja->bucket_seq[ja->dirty_idx_ondisk],
+                      ja->dirty_idx,           ja->bucket_seq[ja->dirty_idx],
+                      ja->cur_idx,             ja->bucket_seq[ja->cur_idx]);
         }
  
         spin_unlock(&j->lock);