buf->must_flush = false;
buf->separate_flush = false;
- memset(buf->has_inode, 0, sizeof(buf->has_inode));
-
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
buf->data->u64s = 0;
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
- j->err_seq = journal_cur_seq(j);
+ /*
+ * XXX: we're not using j->lock here because this can be called from
+ * interrupt context, this can race with journal_write_done()
+ */
+ if (!j->err_seq)
+ j->err_seq = journal_cur_seq(j);
journal_wake(j);
closure_wake_up(&journal_cur_buf(j)->wait);
}
mod_delayed_work(c->io_complete_wq,
&j->write_work,
- msecs_to_jiffies(j->write_delay_ms));
+ msecs_to_jiffies(c->opts.journal_flush_delay));
journal_wake(j);
return 0;
}
journal_entry_close(j);
}
-/*
- * Given an inode number, if that inode number has data in the journal that
- * hasn't yet been flushed, return the journal sequence number that needs to be
- * flushed:
- */
-u64 bch2_inode_journal_seq(struct journal *j, u64 inode)
-{
- size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
- union journal_res_state s;
- unsigned i;
- u64 seq;
-
-
- spin_lock(&j->lock);
- seq = journal_cur_seq(j);
- s = READ_ONCE(j->reservations);
- i = s.idx;
-
- while (1) {
- if (test_bit(h, j->buf[i].has_inode))
- goto out;
-
- if (i == s.unwritten_idx)
- break;
-
- i = (i - 1) & JOURNAL_BUF_MASK;
- seq--;
- }
-
- seq = 0;
-out:
- spin_unlock(&j->lock);
-
- return seq;
-}
-
-void bch2_journal_set_has_inum(struct journal *j, u64 inode, u64 seq)
-{
- size_t h = hash_64(inode, ilog2(sizeof(j->buf[0].has_inode) * 8));
- struct journal_buf *buf;
-
- spin_lock(&j->lock);
-
- if ((buf = journal_seq_to_buf(j, seq)))
- set_bit(h, buf->has_inode);
-
- spin_unlock(&j->lock);
-}
-
static int __journal_res_get(struct journal *j, struct journal_res *res,
unsigned flags)
{
spin_lock(&j->lock);
- BUG_ON(seq > journal_cur_seq(j));
+ if (WARN_ONCE(seq > journal_cur_seq(j),
+ "requested to flush journal seq %llu, but currently at %llu",
+ seq, journal_cur_seq(j)))
+ goto out;
/* Recheck under lock: */
if (j->err_seq && seq >= j->err_seq) {
u64 start_time = local_clock();
int ret, ret2;
+ /*
+ * Don't update time_stats when @seq is already flushed:
+ */
+ if (seq <= j->flushed_seq_ondisk)
+ return 0;
+
ret = wait_event_interruptible(j->wait, (ret2 = bch2_journal_flush_seq_async(j, seq, NULL)));
if (!ret)
int bch2_journal_meta(struct journal *j)
{
+ struct journal_buf *buf;
struct journal_res res;
int ret;
if (ret)
return ret;
+ buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
+ buf->must_flush = true;
+ set_bit(JOURNAL_NEED_WRITE, &j->flags);
+
bch2_journal_res_put(j, &res);
return bch2_journal_flush_seq(j, res.seq);
return bch2_journal_flush_seq(j, seq);
}
+/*
+ * bch2_journal_noflush_seq - tell the journal not to issue any flushes before
+ * @seq
+ */
+bool bch2_journal_noflush_seq(struct journal *j, u64 seq)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ u64 unwritten_seq;
+ bool ret = false;
+
+ if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush)))
+ return false;
+
+ if (seq <= c->journal.flushed_seq_ondisk)
+ return false;
+
+ spin_lock(&j->lock);
+ if (seq <= c->journal.flushed_seq_ondisk)
+ goto out;
+
+ for (unwritten_seq = last_unwritten_seq(j);
+ unwritten_seq < seq;
+ unwritten_seq++) {
+ struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq);
+
+ /* journal write is already in flight, and was a flush write: */
+ if (unwritten_seq == last_unwritten_seq(j) && !buf->noflush)
+ goto out;
+
+ buf->noflush = true;
+ }
+
+ ret = true;
+out:
+ spin_unlock(&j->lock);
+ return ret;
+}
+
/* block/unlock the journal: */
void bch2_journal_unblock(struct journal *j)
long b;
if (new_fs) {
- if (c)
- percpu_down_read(&c->mark_lock);
b = bch2_bucket_alloc_new_fs(ca);
if (b < 0) {
- percpu_up_read(&c->mark_lock);
ret = -ENOSPC;
goto err;
}
goto err;
}
- b = sector_to_bucket(ca, ob->ptr.offset);
+ b = ob->bucket;
}
if (c)
if (c)
spin_unlock(&c->journal.lock);
- if (new_fs) {
- bch2_mark_metadata_bucket(c, ca, b, BCH_DATA_journal,
- ca->mi.bucket_size,
- gc_phase(GC_PHASE_SB),
- 0);
- if (c)
- percpu_up_read(&c->mark_lock);
- } else {
+ if (!new_fs) {
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_trans_mark_metadata_bucket(&trans, ca,
b, BCH_DATA_journal,
j->replay_journal_seq = last_seq;
j->replay_journal_seq_end = cur_seq;
j->last_seq_ondisk = last_seq;
+ j->flushed_seq_ondisk = cur_seq - 1;
j->pin.front = last_seq;
j->pin.back = cur_seq;
atomic64_set(&j->seq, cur_seq - 1);
+ if (list_empty(journal_entries))
+ j->last_empty_seq = cur_seq - 1;
+
fifo_for_each_entry_ptr(p, &j->pin, seq)
journal_pin_list_init(p, 1);
if (seq < last_seq)
continue;
+ if (journal_entry_empty(&i->j))
+ j->last_empty_seq = le64_to_cpu(i->j.seq);
+
p = journal_seq_pin(j, seq);
p->devs.nr = 0;
bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
}
+ if (list_empty(journal_entries))
+ j->last_empty_seq = cur_seq;
+
spin_lock(&j->lock);
set_bit(JOURNAL_STARTED, &j->flags);
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
- j->write_delay_ms = 1000;
- j->reclaim_delay_ms = 100;
-
atomic64_set(&j->reservations.counter,
((union journal_res_state)
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
union journal_res_state s;
struct bch_dev *ca;
+ unsigned long now = jiffies;
unsigned i;
rcu_read_lock();
s = READ_ONCE(j->reservations);
- pr_buf(out,
- "active journal entries:\t%llu\n"
- "seq:\t\t\t%llu\n"
- "last_seq:\t\t%llu\n"
- "last_seq_ondisk:\t%llu\n"
- "flushed_seq_ondisk:\t%llu\n"
- "prereserved:\t\t%u/%u\n"
- "each entry reserved:\t%u\n"
- "nr flush writes:\t%llu\n"
- "nr noflush writes:\t%llu\n"
- "nr direct reclaim:\t%llu\n"
- "nr background reclaim:\t%llu\n"
- "reclaim kicked:\t\t%u\n"
- "reclaim runs in:\t%u ms\n"
- "current entry sectors:\t%u\n"
- "current entry error:\t%u\n"
- "current entry:\t\t",
- fifo_used(&j->pin),
- journal_cur_seq(j),
- journal_last_seq(j),
- j->last_seq_ondisk,
- j->flushed_seq_ondisk,
- j->prereserved.reserved,
- j->prereserved.remaining,
- j->entry_u64s_reserved,
- j->nr_flush_writes,
- j->nr_noflush_writes,
- j->nr_direct_reclaim,
- j->nr_background_reclaim,
- j->reclaim_kicked,
- jiffies_to_msecs(j->next_reclaim - jiffies),
- j->cur_entry_sectors,
- j->cur_entry_error);
+ pr_buf(out, "active journal entries:\t%llu\n", fifo_used(&j->pin));
+ pr_buf(out, "seq:\t\t\t%llu\n", journal_cur_seq(j));
+ pr_buf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
+ pr_buf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
+ pr_buf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
+ pr_buf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
+ pr_buf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
+ pr_buf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
+ pr_buf(out, "nr noflush writes:\t%llu\n", j->nr_noflush_writes);
+ pr_buf(out, "nr direct reclaim:\t%llu\n", j->nr_direct_reclaim);
+ pr_buf(out, "nr background reclaim:\t%llu\n", j->nr_background_reclaim);
+ pr_buf(out, "reclaim kicked:\t\t%u\n", j->reclaim_kicked);
+ pr_buf(out, "reclaim runs in:\t%u ms\n", time_after(j->next_reclaim, now)
+ ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
+ pr_buf(out, "current entry sectors:\t%u\n", j->cur_entry_sectors);
+ pr_buf(out, "current entry error:\t%u\n", j->cur_entry_error);
+ pr_buf(out, "current entry:\t\t");
switch (s.cur_entry_offset) {
case JOURNAL_ENTRY_ERROR_VAL:
pr_buf(out, "closed\n");
break;
default:
- pr_buf(out, "%u/%u\n",
- s.cur_entry_offset,
- j->cur_entry_u64s);
+ pr_buf(out, "%u/%u\n", s.cur_entry_offset, j->cur_entry_u64s);
break;
}
- pr_buf(out,
- "current entry:\t\tidx %u refcount %u\n",
- s.idx, journal_state_count(s, s.idx));
+ pr_buf(out, "current entry:\t\tidx %u refcount %u\n", s.idx, journal_state_count(s, s.idx));
i = s.idx;
while (i != s.unwritten_idx) {
if (!ja->nr)
continue;
- pr_buf(out,
- "dev %u:\n"
- "\tnr\t\t%u\n"
- "\tbucket size\t%u\n"
- "\tavailable\t%u:%u\n"
- "\tdiscard_idx\t%u\n"
- "\tdirty_ondisk\t%u (seq %llu)\n"
- "\tdirty_idx\t%u (seq %llu)\n"
- "\tcur_idx\t\t%u (seq %llu)\n",
- i, ja->nr, ca->mi.bucket_size,
- bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
- ja->sectors_free,
- ja->discard_idx,
- ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk],
- ja->dirty_idx, ja->bucket_seq[ja->dirty_idx],
- ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
+ pr_buf(out, "dev %u:\n", i);
+ pr_buf(out, "\tnr\t\t%u\n", ja->nr);
+ pr_buf(out, "\tbucket size\t%u\n", ca->mi.bucket_size);
+ pr_buf(out, "\tavailable\t%u:%u\n", bch2_journal_dev_buckets_available(j, ja, journal_space_discarded), ja->sectors_free);
+ pr_buf(out, "\tdiscard_idx\t%u\n", ja->discard_idx);
+ pr_buf(out, "\tdirty_ondisk\t%u (seq %llu)\n", ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk]);
+ pr_buf(out, "\tdirty_idx\t%u (seq %llu)\n", ja->dirty_idx, ja->bucket_seq[ja->dirty_idx]);
+ pr_buf(out, "\tcur_idx\t\t%u (seq %llu)\n", ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
}
rcu_read_unlock();