#include "bcachefs.h"
#include "alloc_background.h"
+#include "backpointers.h"
#include "bset.h"
#include "btree_gc.h"
#include "btree_update.h"
{
unsigned i;
- pr_buf(out, "capacity:\t\t\t%llu\n", c->capacity);
+ prt_printf(out, "capacity:\t\t\t%llu\n", c->capacity);
- pr_buf(out, "hidden:\t\t\t\t%llu\n",
+ prt_printf(out, "hidden:\t\t\t\t%llu\n",
fs_usage->u.hidden);
- pr_buf(out, "data:\t\t\t\t%llu\n",
+ prt_printf(out, "data:\t\t\t\t%llu\n",
fs_usage->u.data);
- pr_buf(out, "cached:\t\t\t\t%llu\n",
+ prt_printf(out, "cached:\t\t\t\t%llu\n",
fs_usage->u.cached);
- pr_buf(out, "reserved:\t\t\t%llu\n",
+ prt_printf(out, "reserved:\t\t\t%llu\n",
fs_usage->u.reserved);
- pr_buf(out, "nr_inodes:\t\t\t%llu\n",
+ prt_printf(out, "nr_inodes:\t\t\t%llu\n",
fs_usage->u.nr_inodes);
- pr_buf(out, "online reserved:\t\t%llu\n",
+ prt_printf(out, "online reserved:\t\t%llu\n",
fs_usage->online_reserved);
for (i = 0;
i < ARRAY_SIZE(fs_usage->u.persistent_reserved);
i++) {
- pr_buf(out, "%u replicas:\n", i + 1);
- pr_buf(out, "\treserved:\t\t%llu\n",
+ prt_printf(out, "%u replicas:\n", i + 1);
+ prt_printf(out, "\treserved:\t\t%llu\n",
fs_usage->u.persistent_reserved[i]);
}
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
- pr_buf(out, "\t");
+ prt_printf(out, "\t");
bch2_replicas_entry_to_text(out, e);
- pr_buf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
+ prt_printf(out, ":\t%llu\n", fs_usage->u.replicas[i]);
}
}
return ret;
}
-static inline int is_unavailable_bucket(struct bch_alloc_v4 a)
+void bch2_dev_usage_init(struct bch_dev *ca)
{
- return a.dirty_sectors || a.stripe;
+ ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket;
}
static inline int bucket_sectors_fragmented(struct bch_dev *ca,
: 0;
}
-static inline enum bch_data_type bucket_type(struct bch_alloc_v4 a)
-{
- return a.cached_sectors && !a.dirty_sectors
- ? BCH_DATA_cached
- : a.data_type;
-}
-
-static inline void account_bucket(struct bch_fs_usage *fs_usage,
- struct bch_dev_usage *dev_usage,
- enum bch_data_type type,
- int nr, s64 size)
-{
- if (type == BCH_DATA_sb || type == BCH_DATA_journal)
- fs_usage->hidden += size;
-
- dev_usage->d[type].buckets += nr;
-}
-
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
struct bch_alloc_v4 old,
struct bch_alloc_v4 new,
preempt_disable();
fs_usage = fs_usage_ptr(c, journal_seq, gc);
- u = dev_usage_ptr(ca, journal_seq, gc);
- if (bucket_type(old))
- account_bucket(fs_usage, u, bucket_type(old),
- -1, -ca->mi.bucket_size);
+ if (data_type_is_hidden(old.data_type))
+ fs_usage->hidden -= ca->mi.bucket_size;
+ if (data_type_is_hidden(new.data_type))
+ fs_usage->hidden += ca->mi.bucket_size;
+
+ u = dev_usage_ptr(ca, journal_seq, gc);
- if (bucket_type(new))
- account_bucket(fs_usage, u, bucket_type(new),
- 1, ca->mi.bucket_size);
+ u->d[old.data_type].buckets--;
+ u->d[new.data_type].buckets++;
- u->buckets_ec += (int) new.stripe - (int) old.stripe;
- u->buckets_unavailable +=
- is_unavailable_bucket(new) - is_unavailable_bucket(old);
+ u->buckets_ec -= (int) !!old.stripe;
+ u->buckets_ec += (int) !!new.stripe;
u->d[old.data_type].sectors -= old.dirty_sectors;
u->d[new.data_type].sectors += new.dirty_sectors;
- u->d[BCH_DATA_cached].sectors +=
- (int) new.cached_sectors - (int) old.cached_sectors;
+
+ u->d[BCH_DATA_cached].sectors += new.cached_sectors;
+ u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old);
u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new);
idx = bch2_replicas_entry_idx(c, r);
if (idx < 0 &&
- (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
- fsck_err(c, "no replicas entry\n"
- " while marking %s",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
+ fsck_err(c, "no replicas entry\n"
+ " while marking %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
percpu_up_read(&c->mark_lock);
ret = bch2_mark_replicas(c, r);
percpu_down_read(&c->mark_lock);
n = (void *) d->d + d->used;
n->delta = sectors;
- memcpy(&n->r, r, replicas_entry_bytes(r));
+ memcpy((void *) n + offsetof(struct replicas_delta, r),
+ r, replicas_entry_bytes(r));
bch2_replicas_entry_sort(&n->r);
d->used += b;
}
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
struct bch_alloc_v4 old_a, new_a;
- struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
+ struct bch_dev *ca;
int ret = 0;
- if (bch2_trans_inconsistent_on(new.k->p.offset < ca->mi.first_bucket ||
- new.k->p.offset >= ca->mi.nbuckets, trans,
- "alloc key outside range of device's buckets"))
- return -EIO;
-
/*
* alloc btree is read in by bch2_alloc_read, not gc:
*/
!(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
return 0;
+ if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
+ "alloc key for invalid device or bucket"))
+ return -EIO;
+
+ ca = bch_dev_bkey_exists(c, new.k->p.inode);
+
bch2_alloc_to_v4(old, &old_a);
bch2_alloc_to_v4(new, &new_a);
if ((flags & BTREE_TRIGGER_INSERT) &&
- !old_a.data_type != !new_a.data_type &&
+ data_type_is_empty(old_a.data_type) !=
+ data_type_is_empty(new_a.data_type) &&
new.k->type == KEY_TYPE_alloc_v4) {
struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
* before the bucket became empty again, then the we don't have
* to wait on a journal flush before we can reuse the bucket:
*/
- new_a.journal_seq = !new_a.data_type &&
+ new_a.journal_seq = data_type_is_empty(new_a.data_type) &&
(journal_seq == v->journal_seq ||
bch2_journal_noflush_seq(&c->journal, v->journal_seq))
? 0 : journal_seq;
v->journal_seq = new_a.journal_seq;
}
- if (old_a.data_type && !new_a.data_type && new_a.journal_seq) {
+ if (!data_type_is_empty(old_a.data_type) &&
+ data_type_is_empty(new_a.data_type) &&
+ new_a.journal_seq) {
ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
c->journal.flushed_seq_ondisk,
new.k->p.inode, new.k->p.offset,
}
}
- if (!new_a.data_type &&
- (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
- closure_wake_up(&c->freelist_wait);
-
- if ((flags & BTREE_TRIGGER_INSERT) &&
- BCH_ALLOC_V4_NEED_DISCARD(&new_a) &&
- !new_a.journal_seq)
- bch2_do_discards(c);
-
- if (!old_a.data_type &&
- new_a.data_type &&
- should_invalidate_buckets(ca))
- bch2_do_invalidates(c);
-
- if (bucket_state(new_a) == BUCKET_need_gc_gens) {
- atomic_inc(&c->kick_gc);
- wake_up_process(c->gc_thread);
- }
-
percpu_down_read(&c->mark_lock);
if (!gc && new_a.gen != old_a.gen)
*bucket_gen(ca, new.k->p.offset) = new_a.gen;
bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
return ret;
}
-
- trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
- old_a.cached_sectors);
}
+ if (new_a.data_type == BCH_DATA_free &&
+ (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+ closure_wake_up(&c->freelist_wait);
+
+ if (new_a.data_type == BCH_DATA_need_discard &&
+ (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+ bch2_do_discards(c);
+
+ if (old_a.data_type != BCH_DATA_cached &&
+ new_a.data_type == BCH_DATA_cached &&
+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
+ bch2_do_invalidates(c);
+
+ if (new_a.data_type == BCH_DATA_need_gc_gens)
+ bch2_do_gc_gens(c);
+
return 0;
}
-void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
- size_t b, enum bch_data_type data_type,
- unsigned sectors, struct gc_pos pos,
- unsigned flags)
+int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
+ size_t b, enum bch_data_type data_type,
+ unsigned sectors, struct gc_pos pos,
+ unsigned flags)
{
struct bucket old, new, *g;
- bool overflow;
+ int ret = 0;
BUG_ON(!(flags & BTREE_TRIGGER_GC));
BUG_ON(data_type != BCH_DATA_sb &&
* Backup superblock might be past the end of our normal usable space:
*/
if (b >= ca->mi.nbuckets)
- return;
+ return 0;
percpu_down_read(&c->mark_lock);
g = gc_bucket(ca, b);
bucket_lock(g);
old = *g;
+ if (bch2_fs_inconsistent_on(g->data_type &&
+ g->data_type != data_type, c,
+ "different types of data in same bucket: %s, %s",
+ bch2_data_types[g->data_type],
+ bch2_data_types[data_type])) {
+ ret = -EIO;
+ goto err;
+ }
+
+ if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > bucket size",
+ ca->dev_idx, b, g->gen,
+ bch2_data_types[g->data_type ?: data_type],
+ g->dirty_sectors, sectors)) {
+ ret = -EIO;
+ goto err;
+ }
+
+
g->data_type = data_type;
g->dirty_sectors += sectors;
- overflow = g->dirty_sectors < sectors;
-
new = *g;
+err:
bucket_unlock(g);
-
- bch2_fs_inconsistent_on(old.data_type &&
- old.data_type != data_type, c,
- "different types of data in same bucket: %s, %s",
- bch2_data_types[old.data_type],
- bch2_data_types[data_type]);
-
- bch2_fs_inconsistent_on(overflow, c,
- "bucket %u:%zu gen %u data type %s sector count overflow: %u + %u > U16_MAX",
- ca->dev_idx, b, new.gen,
- bch2_data_types[old.data_type ?: data_type],
- old.dirty_sectors, sectors);
-
- bch2_dev_usage_update_m(c, ca, old, new, 0, true);
+ if (!ret)
+ bch2_dev_usage_update_m(c, ca, old, new, 0, true);
percpu_up_read(&c->mark_lock);
-}
-
-static s64 ptr_disk_sectors(s64 sectors, struct extent_ptr_decoded p)
-{
- EBUG_ON(sectors < 0);
-
- return p.crc.compression_type &&
- p.crc.compression_type != BCH_COMPRESSION_TYPE_incompressible
- ? DIV_ROUND_UP_ULL(sectors * p.crc.compressed_size,
- p.crc.uncompressed_size)
- : sectors;
+ return ret;
}
static int check_bucket_ref(struct bch_fs *c,
struct printbuf buf = PRINTBUF;
int ret = 0;
+ if (bucket_data_type == BCH_DATA_cached)
+ bucket_data_type = BCH_DATA_user;
+
if (gen_after(ptr->gen, b_gen)) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
goto err;
}
- if (bucket_data_type && ptr_data_type &&
+ if (!data_type_is_empty(bucket_data_type) &&
+ ptr_data_type &&
bucket_data_type != ptr_data_type) {
bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
"bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
old = *g;
ret = check_bucket_ref(c, k, ptr, sectors, data_type,
- new.gen, new.data_type,
- new.dirty_sectors, new.cached_sectors);
- if (ret) {
- bucket_unlock(g);
+ g->gen, g->data_type,
+ g->dirty_sectors, g->cached_sectors);
+ if (ret)
goto err;
- }
- new.dirty_sectors += sectors;
if (data_type)
- new.data_type = data_type;
+ g->data_type = data_type;
+ g->dirty_sectors += sectors;
g->stripe = k.k->p.offset;
g->stripe_redundancy = s->nr_redundant;
-
new = *g;
- bucket_unlock(g);
-
- bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
err:
+ bucket_unlock(g);
+ if (!ret)
+ bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
percpu_up_read(&c->mark_lock);
printbuf_exit(&buf);
return ret;
percpu_down_read(&c->mark_lock);
g = PTR_GC_BUCKET(ca, &p.ptr);
-
bucket_lock(g);
old = *g;
bucket_data_type = g->data_type;
-
ret = __mark_pointer(trans, k, &p.ptr, sectors,
data_type, g->gen,
&bucket_data_type,
&g->dirty_sectors,
&g->cached_sectors);
- if (ret) {
- bucket_unlock(g);
- goto err;
- }
-
- g->data_type = bucket_data_type;
+ if (!ret)
+ g->data_type = bucket_data_type;
new = *g;
bucket_unlock(g);
-
- bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
-err:
+ if (!ret)
+ bch2_dev_usage_update_m(c, ca, old, new, journal_seq, true);
percpu_up_read(&c->mark_lock);
return ret;
/* trans_mark: */
static int bch2_trans_mark_pointer(struct btree_trans *trans,
- struct bkey_s_c k, struct extent_ptr_decoded p,
- s64 sectors, enum bch_data_type data_type)
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, struct extent_ptr_decoded p,
+ unsigned flags)
{
+ bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
struct btree_iter iter;
struct bkey_i_alloc_v4 *a;
+ struct bpos bucket_pos;
+ struct bch_backpointer bp;
+ s64 sectors;
int ret;
- a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(trans->c, &p.ptr));
+ bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket_pos, &bp);
+ sectors = bp.bucket_len;
+ if (!insert)
+ sectors = -sectors;
+
+ a = bch2_trans_start_alloc_update(trans, &iter, bucket_pos);
if (IS_ERR(a))
return PTR_ERR(a);
- ret = __mark_pointer(trans, k, &p.ptr, sectors, data_type,
+ ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
a->v.gen, &a->v.data_type,
&a->v.dirty_sectors, &a->v.cached_sectors);
if (ret)
- goto out;
+ goto err;
+
+ if (!p.ptr.cached) {
+ ret = insert
+ ? bch2_bucket_backpointer_add(trans, a, bp, k)
+ : bch2_bucket_backpointer_del(trans, a, bp, k);
+ if (ret)
+ goto err;
+ }
ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
- if (ret)
- goto out;
-out:
+err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
}
int bch2_trans_mark_extent(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new,
unsigned flags)
{
if (flags & BTREE_TRIGGER_OVERWRITE)
disk_sectors = -disk_sectors;
- ret = bch2_trans_mark_pointer(trans, k, p,
- disk_sectors, data_type);
+ ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
if (ret < 0)
return ret;
}
int bch2_trans_mark_stripe(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_i *new,
unsigned flags)
{
}
int bch2_trans_mark_inode(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old,
struct bkey_i *new,
unsigned flags)
}
int bch2_trans_mark_reservation(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old,
struct bkey_i *new,
unsigned flags)
}
int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old,
struct bkey_i *new,
unsigned flags)
enum bch_data_type type,
unsigned sectors)
{
- return __bch2_trans_do(trans, NULL, NULL, 0,
+ return commit_do(trans, NULL, NULL, 0,
__bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors));
}
int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
{
- return bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
- __bch2_trans_mark_dev_sb(&trans, ca));
+ return bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca));
}
/* Disk reservations: */
ret = 0;
} else {
atomic64_set(&c->sectors_available, sectors_available);
- ret = -ENOSPC;
+ ret = -BCH_ERR_ENOSPC_disk_reservation;
}
mutex_unlock(&c->sectors_available_lock);