]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/journal.c
check if fs is mounted before running fsck
[bcachefs-tools-debian] / libbcachefs / journal.c
index b3099fdf02dd9bd5060cb9f37f9bc2939b5e65f4..aabb68d2faa74f9f2d08f0b1b827d00aa81e4131 100644 (file)
@@ -84,17 +84,12 @@ void bch2_journal_halt(struct journal *j)
 
        journal_wake(j);
        closure_wake_up(&journal_cur_buf(j)->wait);
-       closure_wake_up(&journal_prev_buf(j)->wait);
 }
 
 /* journal entry close/open: */
 
 void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
 {
-       struct journal_buf *w = journal_prev_buf(j);
-
-       atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
-
        if (!need_write_just_set &&
            test_bit(JOURNAL_NEED_WRITE, &j->flags))
                bch2_time_stats_update(j->delay_time,
@@ -175,7 +170,6 @@ static bool __journal_entry_close(struct journal *j)
         * Hence, we want update/set last_seq on the current journal entry right
         * before we open a new one:
         */
-       bch2_journal_reclaim_fast(j);
        buf->data->last_seq     = cpu_to_le64(journal_last_seq(j));
 
        if (journal_entry_empty(buf->data))
@@ -189,8 +183,8 @@ static bool __journal_entry_close(struct journal *j)
 
        cancel_delayed_work(&j->write_work);
 
-       /* ugh - might be called from __journal_res_get() under wait_event() */
-       __set_current_state(TASK_RUNNING);
+       bch2_journal_space_available(j);
+
        bch2_journal_buf_put(j, old.idx, set_need_write);
        return true;
 }
@@ -220,7 +214,7 @@ static int journal_entry_open(struct journal *j)
 {
        struct journal_buf *buf = journal_cur_buf(j);
        union journal_res_state old, new;
-       int u64s, ret;
+       int u64s;
        u64 v;
 
        lockdep_assert_held(&j->lock);
@@ -229,12 +223,10 @@ static int journal_entry_open(struct journal *j)
        if (j->blocked)
                return -EAGAIN;
 
-       if (!fifo_free(&j->pin))
-               return -ENOSPC;
+       if (j->cur_entry_error)
+               return j->cur_entry_error;
 
-       ret = bch2_journal_space_available(j);
-       if (ret)
-               return ret;
+       BUG_ON(!j->cur_entry_sectors);
 
        buf->u64s_reserved      = j->entry_u64s_reserved;
        buf->disk_sectors       = j->cur_entry_sectors;
@@ -261,6 +253,8 @@ static int journal_entry_open(struct journal *j)
 
                /* Handle any already added entries */
                new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
+
+               EBUG_ON(journal_state_count(new, new.idx));
                journal_state_inc(&new);
        } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                       old.v, new.v)) != old.v);
@@ -328,6 +322,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct journal_buf *buf;
+       bool can_discard;
        int ret;
 retry:
        if (journal_res_get_fast(j, res, flags))
@@ -348,6 +343,16 @@ retry:
                return 0;
        }
 
+       if (!(flags & JOURNAL_RES_GET_RESERVED) &&
+           !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+               /*
+                * Don't want to close current journal entry, just need to
+                * invoke reclaim:
+                */
+               ret = -ENOSPC;
+               goto unlock;
+       }
+
        /*
         * If we couldn't get a reservation because the current buf filled up,
         * and we had room for a bigger entry on disk, signal that we want to
@@ -371,22 +376,38 @@ retry:
        } else {
                ret = journal_entry_open(j);
        }
-
+unlock:
        if ((ret == -EAGAIN || ret == -ENOSPC) &&
            !j->res_get_blocked_start)
                j->res_get_blocked_start = local_clock() ?: 1;
 
+       can_discard = j->can_discard;
        spin_unlock(&j->lock);
 
        if (!ret)
                goto retry;
+
        if (ret == -ENOSPC) {
+               BUG_ON(!can_discard && (flags & JOURNAL_RES_GET_RESERVED));
+
                /*
                 * Journal is full - can't rely on reclaim from work item due to
                 * freezing:
                 */
                trace_journal_full(c);
-               bch2_journal_reclaim_work(&j->reclaim_work.work);
+
+               if (!(flags & JOURNAL_RES_GET_NONBLOCK)) {
+                       if (can_discard) {
+                               bch2_journal_do_discards(j);
+                               goto retry;
+                       }
+
+                       if (mutex_trylock(&j->reclaim_lock)) {
+                               bch2_journal_reclaim(j);
+                               mutex_unlock(&j->reclaim_lock);
+                       }
+               }
+
                ret = -EAGAIN;
        }
 
@@ -408,12 +429,38 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
 {
        int ret;
 
-       wait_event(j->wait,
+       closure_wait_event(&j->async_wait,
                   (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
                   (flags & JOURNAL_RES_GET_NONBLOCK));
        return ret;
 }
 
+/* journal_preres: */
+
+static bool journal_preres_available(struct journal *j,
+                                    struct journal_preres *res,
+                                    unsigned new_u64s)
+{
+       bool ret = bch2_journal_preres_get_fast(j, res, new_u64s);
+
+       if (!ret)
+               bch2_journal_reclaim_work(&j->reclaim_work.work);
+
+       return ret;
+}
+
+int __bch2_journal_preres_get(struct journal *j,
+                             struct journal_preres *res,
+                             unsigned new_u64s)
+{
+       int ret;
+
+       closure_wait_event(&j->preres_wait,
+                  (ret = bch2_journal_error(j)) ||
+                  journal_preres_available(j, res, new_u64s));
+       return ret;
+}
+
 /* journal_entry_res: */
 
 void bch2_journal_entry_res_resize(struct journal *j,
@@ -429,7 +476,7 @@ void bch2_journal_entry_res_resize(struct journal *j,
        if (d <= 0)
                goto out;
 
-       j->cur_entry_u64s -= d;
+       j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d);
        smp_mb();
        state = READ_ONCE(j->reservations);
 
@@ -765,6 +812,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
 
        while (ja->nr < nr) {
                struct open_bucket *ob = NULL;
+               unsigned pos;
                long bucket;
 
                if (new_fs) {
@@ -791,20 +839,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                        preempt_disable();
                }
 
-               __array_insert_item(ja->buckets,                ja->nr, ja->last_idx);
-               __array_insert_item(ja->bucket_seq,             ja->nr, ja->last_idx);
-               __array_insert_item(journal_buckets->buckets,   ja->nr, ja->last_idx);
+               pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+               __array_insert_item(ja->buckets,                ja->nr, pos);
+               __array_insert_item(ja->bucket_seq,             ja->nr, pos);
+               __array_insert_item(journal_buckets->buckets,   ja->nr, pos);
+               ja->nr++;
 
-               ja->buckets[ja->last_idx] = bucket;
-               ja->bucket_seq[ja->last_idx] = 0;
-               journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket);
+               ja->buckets[pos] = bucket;
+               ja->bucket_seq[pos] = 0;
+               journal_buckets->buckets[pos] = cpu_to_le64(bucket);
 
-               if (ja->last_idx < ja->nr) {
-                       if (ja->cur_idx >= ja->last_idx)
-                               ja->cur_idx++;
-                       ja->last_idx++;
-               }
-               ja->nr++;
+               if (pos <= ja->discard_idx)
+                       ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+               if (pos <= ja->dirty_idx_ondisk)
+                       ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+               if (pos <= ja->dirty_idx)
+                       ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+               if (pos <= ja->cur_idx)
+                       ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
 
                bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
                                          ca->mi.bucket_size,
@@ -966,6 +1018,7 @@ void bch2_fs_journal_start(struct journal *j)
 
        c->last_bucket_seq_cleanup = journal_cur_seq(j);
 
+       bch2_journal_space_available(j);
        spin_unlock(&j->lock);
 
        /*
@@ -974,8 +1027,6 @@ void bch2_fs_journal_start(struct journal *j)
         * only have to go down with the next journal entry we write:
         */
        bch2_journal_seq_blacklist_write(j);
-
-       queue_delayed_work(system_freezable_wq, &j->reclaim_work, 0);
 }
 
 /* init/exit: */
@@ -1043,6 +1094,7 @@ int bch2_fs_journal_init(struct journal *j)
        mutex_init(&j->blacklist_lock);
        INIT_LIST_HEAD(&j->seq_blacklist);
        mutex_init(&j->reclaim_lock);
+       mutex_init(&j->discard_lock);
 
        lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
 
@@ -1078,35 +1130,60 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
 {
        struct printbuf out = _PBUF(buf, PAGE_SIZE);
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       union journal_res_state *s = &j->reservations;
+       union journal_res_state s;
        struct bch_dev *ca;
        unsigned iter;
 
        rcu_read_lock();
        spin_lock(&j->lock);
+       s = READ_ONCE(j->reservations);
 
        pr_buf(&out,
               "active journal entries:\t%llu\n"
               "seq:\t\t\t%llu\n"
               "last_seq:\t\t%llu\n"
               "last_seq_ondisk:\t%llu\n"
-              "reservation count:\t%u\n"
-              "reservation offset:\t%u\n"
-              "current entry u64s:\t%u\n"
-              "io in flight:\t\t%i\n"
-              "need write:\t\t%i\n"
-              "dirty:\t\t\t%i\n"
-              "replay done:\t\t%i\n",
+              "prereserved:\t\t%u/%u\n"
+              "current entry sectors:\t%u\n"
+              "current entry:\t\t",
               fifo_used(&j->pin),
               journal_cur_seq(j),
               journal_last_seq(j),
               j->last_seq_ondisk,
-              journal_state_count(*s, s->idx),
-              s->cur_entry_offset,
-              j->cur_entry_u64s,
-              s->prev_buf_unwritten,
+              j->prereserved.reserved,
+              j->prereserved.remaining,
+              j->cur_entry_sectors);
+
+       switch (s.cur_entry_offset) {
+       case JOURNAL_ENTRY_ERROR_VAL:
+               pr_buf(&out, "error\n");
+               break;
+       case JOURNAL_ENTRY_CLOSED_VAL:
+               pr_buf(&out, "closed\n");
+               break;
+       default:
+               pr_buf(&out, "%u/%u\n",
+                      s.cur_entry_offset,
+                      j->cur_entry_u64s);
+               break;
+       }
+
+       pr_buf(&out,
+              "current entry refs:\t%u\n"
+              "prev entry unwritten:\t",
+              journal_state_count(s, s.idx));
+
+       if (s.prev_buf_unwritten)
+               pr_buf(&out, "yes, ref %u sectors %u\n",
+                      journal_state_count(s, !s.idx),
+                      journal_prev_buf(j)->sectors);
+       else
+               pr_buf(&out, "no\n");
+
+       pr_buf(&out,
+              "need write:\t\t%i\n"
+              "replay done:\t\t%i\n",
               test_bit(JOURNAL_NEED_WRITE,     &j->flags),
-              journal_entry_is_open(j),
               test_bit(JOURNAL_REPLAY_DONE,    &j->flags));
 
        for_each_member_device_rcu(ca, c, iter,
@@ -1119,11 +1196,18 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
                pr_buf(&out,
                       "dev %u:\n"
                       "\tnr\t\t%u\n"
-                      "\tcur_idx\t\t%u (seq %llu)\n"
-                      "\tlast_idx\t%u (seq %llu)\n",
+                      "\tavailable\t%u:%u\n"
+                      "\tdiscard_idx\t\t%u\n"
+                      "\tdirty_idx_ondisk\t%u (seq %llu)\n"
+                      "\tdirty_idx\t\t%u (seq %llu)\n"
+                      "\tcur_idx\t\t%u (seq %llu)\n",
                       iter, ja->nr,
-                      ja->cur_idx,     ja->bucket_seq[ja->cur_idx],
-                      ja->last_idx,    ja->bucket_seq[ja->last_idx]);
+                      bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
+                      ja->sectors_free,
+                      ja->discard_idx,
+                      ja->dirty_idx_ondisk,    ja->bucket_seq[ja->dirty_idx_ondisk],
+                      ja->dirty_idx,           ja->bucket_seq[ja->dirty_idx],
+                      ja->cur_idx,             ja->bucket_seq[ja->cur_idx]);
        }
 
        spin_unlock(&j->lock);