-a5da815430ebf6e09cedfc3d3e00f57a7f6c344d
+2272c5f5b76a5dc0c925064b3682110218a3e53b
struct bch_alloc_v4 a,
enum bch_data_type data_type)
{
+ if (stripe)
+ return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
if (dirty_sectors)
return data_type;
- if (stripe)
- return BCH_DATA_stripe;
if (cached_sectors)
return BCH_DATA_cached;
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
a.stripe, a, data_type);
}
+static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
+{
+ return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
+}
+
static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
{
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
}
}
+static void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob)
+{
+ unsigned data_type = ob->data_type;
+ barrier(); /* READ_ONCE() doesn't work on bitfields */
+
+ prt_printf(out, "%zu ref %u %s%s%s %u:%llu gen %u\n",
+ ob - c->open_buckets,
+ atomic_read(&ob->pin),
+ data_type < BCH_DATA_NR ? bch2_data_types[data_type] : "invalid data type",
+ ob->ec ? " ec" : "",
+ ob->on_partial_list ? " partial" : "",
+ ob->dev, ob->bucket, ob->gen);
+}
+
void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
{
struct open_bucket *ob;
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
spin_lock(&ob->lock);
- if (ob->valid && !ob->on_partial_list) {
- prt_printf(out, "%zu ref %u type %s %u:%llu:%u\n",
- ob - c->open_buckets,
- atomic_read(&ob->pin),
- bch2_data_types[ob->data_type],
- ob->dev, ob->bucket, ob->gen);
- }
+ if (ob->valid && !ob->on_partial_list)
+ bch2_open_bucket_to_text(out, c, ob);
spin_unlock(&ob->lock);
}
}
unsigned i;
spin_lock(&c->freelist_lock);
- for (i = 0; i < c->open_buckets_partial_nr; i++) {
- struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i];
-
- prt_printf(out, "%zu ref %u type %s ec %u %u:%llu:%u\n",
- ob - c->open_buckets,
- atomic_read(&ob->pin),
- bch2_data_types[ob->data_type],
- ob->ec != NULL,
- ob->dev, ob->bucket, ob->gen);
- }
+ for (i = 0; i < c->open_buckets_partial_nr; i++)
+ bch2_open_bucket_to_text(out, c,
+ c->open_buckets + c->open_buckets_partial[i]);
spin_unlock(&c->freelist_lock);
}
#define x(name) RESERVE_##name,
BCH_ALLOC_RESERVES()
#undef x
+ RESERVE_NR,
};
#define OPEN_BUCKETS_COUNT 1024
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
}
+static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, struct extent_ptr_decoded p)
+{
+ return level ? BCH_DATA_btree :
+ p.has_ec ? BCH_DATA_stripe :
+ BCH_DATA_user;
+}
+
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
struct bpos *bucket_pos, struct bch_backpointer *bp)
{
- enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user;
+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
s64 sectors = level ? btree_sectors(c) : k.k->size;
u32 bucket_offset;
bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_i *, unsigned);
- int (*atomic_trigger)(struct btree_trans *, struct bkey_s_c,
- struct bkey_s_c, unsigned);
+ int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
void (*compat)(enum btree_id id, unsigned version,
unsigned big_endian, int write,
struct bkey_s);
bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
static inline int bch2_mark_key(struct btree_trans *trans,
- struct bkey_s_c old,
- struct bkey_s_c new,
- unsigned flags)
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_s_c new,
+ unsigned flags)
{
const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new.k->type];
return ops->atomic_trigger
- ? ops->atomic_trigger(trans, old, new, flags)
+ ? ops->atomic_trigger(trans, btree, level, old, new, flags)
: 0;
}
return NULL;
bkey_btree_ptr_init(&b->key);
- __six_lock_init(&b->c.lock, "b->c.lock", &bch2_btree_node_lock_key);
+ bch2_btree_lock_init(&b->c);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
lockdep_set_no_check_recursion(&b->c.lock.dep_map);
#endif
}
}
-struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks)
+struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks)
{
+ struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct list_head *freed = pcpu_read_locks
? &bc->freed_pcpu
}
/* Slowpath, don't want it inlined into btree_iter_traverse() */
-static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
- struct btree_trans *trans,
+static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
struct btree_path *path,
const struct bkey_i *k,
enum btree_id btree_id,
enum six_lock_type lock_type,
bool sync)
{
+ struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
u32 seq;
* Parent node must be locked, else we could read in a btree node that's
* been freed:
*/
- if (trans && !bch2_btree_node_relock(trans, path, level + 1)) {
+ if (path && !bch2_btree_node_relock(trans, path, level + 1)) {
trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path);
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock));
}
- b = bch2_btree_node_mem_alloc(c, level != 0);
+ b = bch2_btree_node_mem_alloc(trans, level != 0);
- if (trans && b == ERR_PTR(-ENOMEM)) {
+ if (b == ERR_PTR(-ENOMEM)) {
trans->memory_allocation_failure = true;
trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path);
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
if (!sync)
return NULL;
- if (trans) {
+ if (path) {
int ret = bch2_trans_relock(trans) ?:
bch2_btree_path_relock_intent(trans, path);
if (ret) {
}
if (!six_relock_type(&b->c.lock, lock_type, seq)) {
- if (trans)
+ if (path)
trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
}
* else we could read in a btree node from disk that's been
* freed:
*/
- b = bch2_btree_node_fill(c, trans, path, k, path->btree_id,
+ b = bch2_btree_node_fill(trans, path, k, path->btree_id,
level, lock_type, true);
/* We raced and found the btree node in the cache */
if (nofill)
goto out;
- b = bch2_btree_node_fill(c, NULL, NULL, k, btree_id,
+ b = bch2_btree_node_fill(trans, NULL, k, btree_id,
level, SIX_LOCK_read, true);
/* We raced and found the btree node in the cache */
return b;
}
-int bch2_btree_node_prefetch(struct bch_fs *c,
- struct btree_trans *trans,
+int bch2_btree_node_prefetch(struct btree_trans *trans,
struct btree_path *path,
const struct bkey_i *k,
enum btree_id btree_id, unsigned level)
{
+ struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
if (b)
return 0;
- b = bch2_btree_node_fill(c, trans, path, k, btree_id,
+ b = bch2_btree_node_fill(trans, path, k, btree_id,
level, SIX_LOCK_read, false);
return PTR_ERR_OR_ZERO(b);
}
int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
-struct btree *bch2_btree_node_mem_alloc(struct bch_fs *, bool);
+struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool);
struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
const struct bkey_i *, unsigned,
struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *,
enum btree_id, unsigned, bool);
-int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *,
+int bch2_btree_node_prefetch(struct btree_trans *, struct btree_path *,
const struct bkey_i *, enum btree_id, unsigned);
void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
continue;
- if (fsck_err_on(g->data_type &&
- g->data_type != data_type, c,
+ if (fsck_err_on(bucket_data_type(g->data_type) &&
+ bucket_data_type(g->data_type) != data_type, c,
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
"while marking %s",
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
}
ret = commit_do(trans, NULL, NULL, 0,
- bch2_mark_key(trans, old, *k, flags));
+ bch2_mark_key(trans, btree_id, level, old, *k, flags));
fsck_err:
err:
if (ret)
if (!btree_node_fake(b)) {
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
- ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1,
true, &k, initial);
}
gc_pos_set(c, gc_pos_btree_root(b->c.btree_id));
if (!ret) {
struct bkey_s_c k = bkey_i_to_s_c(&b->key);
- ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, true,
+ ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level + 1, true,
&k, true);
}
fsck_err:
if (gen_after(old->gen, gc.gen))
return 0;
+ if (c->opts.reconstruct_alloc ||
+ fsck_err_on(new.data_type != gc.data_type, c,
+ "bucket %llu:%llu gen %u has wrong data_type"
+ ": got %s, should be %s",
+ iter->pos.inode, iter->pos.offset,
+ gc.gen,
+ bch2_data_types[new.data_type],
+ bch2_data_types[gc.data_type]))
+ new.data_type = gc.data_type;
+
#define copy_bucket_field(_f) \
if (c->opts.reconstruct_alloc || \
fsck_err_on(new._f != gc._f, c, \
new._f = gc._f; \
copy_bucket_field(gen);
- copy_bucket_field(data_type);
copy_bucket_field(dirty_sectors);
copy_bucket_field(cached_sectors);
copy_bucket_field(stripe_redundancy);
void bch2_btree_node_io_lock(struct btree *b)
{
- BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+ bch2_assert_btree_nodes_not_locked();
wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight,
TASK_UNINTERRUPTIBLE);
void bch2_btree_node_wait_on_read(struct btree *b)
{
- BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+ bch2_assert_btree_nodes_not_locked();
wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight,
TASK_UNINTERRUPTIBLE);
void bch2_btree_node_wait_on_write(struct btree *b)
{
- BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+ bch2_assert_btree_nodes_not_locked();
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight,
TASK_UNINTERRUPTIBLE);
}
}
-int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
- const struct bkey_i *k, unsigned level)
+static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
+ const struct bkey_i *k, unsigned level)
{
+ struct bch_fs *c = trans->c;
struct closure cl;
struct btree *b;
int ret;
closure_sync(&cl);
} while (ret);
- b = bch2_btree_node_mem_alloc(c, level != 0);
+ b = bch2_btree_node_mem_alloc(trans, level != 0);
bch2_btree_cache_cannibalize_unlock(c);
BUG_ON(IS_ERR(b));
return ret;
}
+int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
+ const struct bkey_i *k, unsigned level)
+{
+ return bch2_trans_run(c, __bch2_btree_root_read(&trans, id, k, level));
+
+}
+
void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
struct btree_write *w)
{
break;
bch2_bkey_buf_unpack(&tmp, c, l->b, k);
- ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
+ ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
path->level - 1);
}
break;
bch2_bkey_buf_reassemble(&tmp, c, k);
- ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
+ ret = bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id,
path->level - 1);
}
{
struct btree_transaction_stats *s;
- BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+ bch2_assert_btree_nodes_not_locked();
memset(trans, 0, sizeof(*trans));
trans->c = c;
return NULL;
init:
INIT_LIST_HEAD(&ck->list);
- __six_lock_init(&ck->c.lock, "b->c.lock", &bch2_btree_node_lock_key);
+ bch2_btree_lock_init(&ck->c);
if (pcpu_readers)
six_lock_pcpu_alloc(&ck->c.lock);
struct bucket_table *tbl;
struct bkey_cached *ck, *n;
struct rhash_head *pos;
+ LIST_HEAD(items);
unsigned i;
#ifdef __KERNEL__
int cpu;
for (i = 0; i < tbl->size; i++)
rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
bkey_cached_evict(bc, ck);
- list_add(&ck->list, &bc->freed_nonpcpu);
+ list_add(&ck->list, &items);
}
rcu_read_unlock();
}
for (i = 0; i < f->nr; i++) {
ck = f->objs[i];
- list_add(&ck->list, &bc->freed_nonpcpu);
+ list_add(&ck->list, &items);
}
}
#endif
- list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu);
+ list_splice(&bc->freed_pcpu, &items);
+ list_splice(&bc->freed_nonpcpu, &items);
- list_for_each_entry_safe(ck, n, &bc->freed_nonpcpu, list) {
+ mutex_unlock(&bc->lock);
+
+ list_for_each_entry_safe(ck, n, &items, list) {
cond_resched();
bch2_journal_pin_drop(&c->journal, &ck->journal);
panic("btree key cache shutdown error: nr_keys nonzero (%li)\n",
atomic_long_read(&bc->nr_keys));
- mutex_unlock(&bc->lock);
-
if (bc->table_init_done)
rhashtable_destroy(&bc->table);
#include "btree_locking.h"
#include "btree_types.h"
-struct lock_class_key bch2_btree_node_lock_key;
+static struct lock_class_key bch2_btree_node_lock_key;
+
+void bch2_btree_lock_init(struct btree_bkey_cached_common *b)
+{
+ __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key);
+}
+
+#ifdef CONFIG_LOCKDEP
+void bch2_assert_btree_nodes_not_locked(void)
+{
+ BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
+}
+#endif
/* Btree node locking: */
* bch2_gc_btree_init_recurse() doesn't use btree iterators for walking
* btree nodes, it implements its own walking:
*/
- EBUG_ON(!trans->is_initial_gc &&
- lock_class_is_held(&bch2_btree_node_lock_key));
+ if (!trans->is_initial_gc)
+ bch2_assert_btree_nodes_not_locked();
}
bool bch2_trans_locked(struct btree_trans *trans)
#include "btree_iter.h"
-extern struct lock_class_key bch2_btree_node_lock_key;
+void bch2_btree_lock_init(struct btree_bkey_cached_common *);
+
+#ifdef CONFIG_LOCKDEP
+void bch2_assert_btree_nodes_not_locked(void);
+#else
+static inline void bch2_assert_btree_nodes_not_locked(void) {}
+#endif
static inline bool is_btree_node(struct btree_path *path, unsigned l)
{
unsigned long ip_allocated;
};
-#ifndef CONFIG_LOCKDEP
#define BTREE_ITER_MAX 64
-#else
-#define BTREE_ITER_MAX 32
-#endif
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
bch2_open_bucket_get(c, wp, &ob);
bch2_alloc_sectors_done(c, wp);
mem_alloc:
- b = bch2_btree_node_mem_alloc(c, interior_node);
+ b = bch2_btree_node_mem_alloc(trans, interior_node);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
return ret;
}
- new_hash = bch2_btree_node_mem_alloc(c, false);
+ new_hash = bch2_btree_node_mem_alloc(trans, false);
}
path->intent_ref++;
bch2_btree_set_root_inmem(c, b);
}
-void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
+static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
{
+ struct bch_fs *c = trans->c;
struct closure cl;
struct btree *b;
int ret;
closure_sync(&cl);
} while (ret);
- b = bch2_btree_node_mem_alloc(c, false);
+ b = bch2_btree_node_mem_alloc(trans, false);
bch2_btree_cache_cannibalize_unlock(c);
set_btree_node_fake(b);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
+ return 0;
+}
+
+void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
+{
+ bch2_trans_run(c, __bch2_btree_root_alloc(&trans, id));
}
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
if (bch2_bkey_ops[old.k->type].atomic_trigger ==
bch2_bkey_ops[i->k->k.type].atomic_trigger &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
- ret = bch2_mark_key(trans, old, bkey_i_to_s_c(new),
+ ret = bch2_mark_key(trans, i->btree_id, i->level,
+ old, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
} else {
struct bkey _deleted = KEY(0, 0, 0);
_deleted.p = i->path->pos;
- ret = bch2_mark_key(trans, deleted, bkey_i_to_s_c(new),
+ ret = bch2_mark_key(trans, i->btree_id, i->level,
+ deleted, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|flags) ?:
- bch2_mark_key(trans, old, deleted,
+ bch2_mark_key(trans, i->btree_id, i->level,
+ old, deleted,
BTREE_TRIGGER_OVERWRITE|flags);
}
}
int bch2_mark_alloc(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned nr_data = s->nr_blocks - s->nr_redundant;
bool parity = ptr_idx >= nr_data;
- enum bch_data_type data_type = parity ? BCH_DATA_parity : 0;
+ enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
if (ret)
goto err;
- if (data_type)
- g->data_type = data_type;
+ g->data_type = data_type;
g->dirty_sectors += sectors;
g->stripe = k.k->p.offset;
}
static int bch2_mark_pointer(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c k,
struct extent_ptr_decoded p,
- s64 sectors, enum bch_data_type data_type,
+ s64 sectors,
unsigned flags)
{
u64 journal_seq = trans->journal_res.seq;
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket old, new, *g;
+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
u8 bucket_data_type;
int ret = 0;
}
int bch2_mark_extent(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
if (flags & BTREE_TRIGGER_OVERWRITE)
disk_sectors = -disk_sectors;
- ret = bch2_mark_pointer(trans, k, p, disk_sectors,
- data_type, flags);
+ ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags);
if (ret < 0)
return ret;
}
int bch2_mark_stripe(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
}
int bch2_mark_inode(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
}
int bch2_mark_reservation(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
}
int bch2_mark_reflink_p(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
a->v.stripe = s.k->p.offset;
a->v.stripe_redundancy = s.v->nr_redundant;
+ a->v.data_type = BCH_DATA_stripe;
} else {
if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
a->v.stripe_redundancy != s.v->nr_redundant, trans,
a->v.stripe = 0;
a->v.stripe_redundancy = 0;
+ a->v.data_type = alloc_data_type(a->v, BCH_DATA_user);
}
a->v.dirty_sectors += sectors;
s64 reserved = 0;
switch (reserve) {
+ case RESERVE_NR:
+ unreachable();
case RESERVE_none:
reserved += ca->mi.nbuckets >> 6;
fallthrough;
size_t, enum bch_data_type, unsigned,
struct gc_pos, unsigned);
-int bch2_mark_alloc(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
struct extent_ptr_decoded p;
const struct bch_extent_ptr *ptr;
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
- unsigned int ptrs_locked = 0;
+ unsigned ptrs_locked = 0;
int ret;
bch2_bkey_buf_init(&m->k);
m->op.version = k.k->version;
m->op.target = data_opts.target;
m->op.write_point = wp;
+ m->op.nr_replicas = 0;
m->op.flags |= BCH_WRITE_PAGES_STABLE|
BCH_WRITE_PAGES_OWNED|
BCH_WRITE_DATA_ENCODED|
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
bool locked;
- if (((1U << i) & m->data_opts.rewrite_ptrs) &&
- p.ptr.cached)
- BUG();
+ if (((1U << i) & m->data_opts.rewrite_ptrs)) {
+ BUG_ON(p.ptr.cached);
- if (!((1U << i) & m->data_opts.rewrite_ptrs) &&
- !p.ptr.cached)
- bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+ if (crc_is_compressed(p.crc))
+ reserve_sectors += k.k->size;
- if (((1U << i) & m->data_opts.rewrite_ptrs) &&
- crc_is_compressed(p.crc))
- reserve_sectors += k.k->size;
+ m->op.nr_replicas += bch2_extent_ptr_durability(c, &p);
+ } else if (!p.ptr.cached) {
+ bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+ }
/*
* op->csum_type is normally initialized from the fs/file's
goto err;
}
- m->op.nr_replicas = m->op.nr_replicas_required =
- hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas;
+ m->op.nr_replicas += m->data_opts.extra_replicas;
+ m->op.nr_replicas_required = m->op.nr_replicas;
BUG_ON(!m->op.nr_replicas);
BUG_ON(!s->idx);
spin_lock(&c->ec_stripes_new_lock);
- hlist_del(&s->hash);
+ hlist_del_init(&s->hash);
spin_unlock(&c->ec_stripes_new_lock);
s->idx = 0;
if (*bp_offset == U64_MAX)
return 0;
- if (bch2_fs_inconsistent_on(bp.level, c, "found btree node in erasure coded bucket!?"))
+ if (bp.level) {
+ struct printbuf buf = PRINTBUF;
+ struct btree_iter node_iter;
+ struct btree *b;
+
+ b = bch2_backpointer_get_node(trans, &node_iter, bucket, *bp_offset, bp);
+ bch2_trans_iter_exit(trans, &node_iter);
+
+ prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b);
+ bch2_backpointer_to_text(&buf, &bp);
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+ printbuf_exit(&buf);
return -EIO;
+ }
k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
ret = bkey_err(k);
bch2_err_str(ret));
goto err;
}
-
- bch2_stripe_close(c, s);
err:
bch2_disk_reservation_put(c, &s->res);
}
}
+ bch2_stripe_close(c, s);
+
ec_stripe_buf_exit(&s->existing_stripe);
ec_stripe_buf_exit(&s->new_stripe);
closure_debug_destroy(&s->iodone);
queue_work(wq, &wp->index_update_work);
}
+static inline void bch2_write_queue(struct bch_write_op *op, struct write_point *wp)
+{
+ op->btree_update_ready = false;
+ op->wp = wp;
+
+ spin_lock(&wp->writes_lock);
+ list_add_tail(&op->wp_list, &wp->writes);
+ if (wp->state == WRITE_POINT_stopped)
+ __wp_update_state(wp, WRITE_POINT_waiting_io);
+ spin_unlock(&wp->writes_lock);
+}
+
void bch2_write_point_do_index_updates(struct work_struct *work)
{
struct write_point *wp =
bch2_alloc_sectors_done_inlined(c, wp);
err:
if (ret <= 0) {
- if (!(op->flags & BCH_WRITE_SYNC)) {
- spin_lock(&wp->writes_lock);
- op->wp = wp;
- list_add_tail(&op->wp_list, &wp->writes);
- if (wp->state == WRITE_POINT_stopped)
- __wp_update_state(wp, WRITE_POINT_waiting_io);
- spin_unlock(&wp->writes_lock);
- }
-
op->flags |= BCH_WRITE_DONE;
if (ret < 0) {
goto again;
bch2_write_done(&op->cl);
} else {
+ bch2_write_queue(op, wp);
continue_at(&op->cl, bch2_write_index, NULL);
}
out_nofs_restore:
return wait;
}
+void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c)
+{
+ prt_printf(out, "Currently waiting for: ");
+ prt_human_readable_u64(out, max(0LL, c->copygc_wait -
+ atomic64_read(&c->io_clock[WRITE].now)) << 9);
+ prt_newline(out);
+
+ prt_printf(out, "Currently calculated wait: ");
+ prt_human_readable_u64(out, bch2_copygc_wait_amount(c));
+ prt_newline(out);
+}
+
static int bch2_copygc_thread(void *arg)
{
struct bch_fs *c = arg;
#define _BCACHEFS_MOVINGGC_H
unsigned long bch2_copygc_wait_amount(struct bch_fs *);
+void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *);
+
void bch2_copygc_stop(struct bch_fs *);
int bch2_copygc_start(struct bch_fs *);
void bch2_fs_copygc_init(struct bch_fs *);
PRINTBUF_UNITS_10, /* use powers of 10^3 (standard SI) */
};
-#define PRINTBUF_INLINE_TABSTOPS 4
+#define PRINTBUF_INLINE_TABSTOPS 6
struct printbuf {
char *buf;
}
int bch2_mark_snapshot(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
{
for_each_btree_key2(&trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
- bch2_mark_snapshot(&trans, bkey_s_c_null, k, 0) ?:
+ bch2_mark_snapshot(&trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
bch2_snapshot_set_equiv(&trans, k));
bch2_trans_exit(&trans);
void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_snapshot_invalid(const struct bch_fs *, struct bkey_s_c,
unsigned, struct printbuf *);
-int bch2_mark_snapshot(struct btree_trans *, struct bkey_s_c,
- struct bkey_s_c, unsigned);
+int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \
.key_invalid = bch2_snapshot_invalid, \
#include "journal.h"
#include "keylist.h"
#include "move.h"
+#include "movinggc.h"
#include "nocow_locking.h"
#include "opts.h"
#include "rebalance.h"
sysfs_printf(rebalance_enabled, "%i", c->rebalance.enabled);
sysfs_pd_controller_show(rebalance, &c->rebalance.pd); /* XXX */
- sysfs_hprint(copy_gc_wait,
- max(0LL, c->copygc_wait -
- atomic64_read(&c->io_clock[WRITE].now)) << 9);
+
+ if (attr == &sysfs_copy_gc_wait)
+ bch2_copygc_wait_to_text(out, c);
if (attr == &sysfs_rebalance_work)
bch2_rebalance_work_to_text(out, c);
for (i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
- prt_printf(out,
- "\t\t\t buckets\t sectors fragmented\n"
- "capacity\t%16llu\n",
- ca->mi.nbuckets - ca->mi.first_bucket);
-
- for (i = 0; i < BCH_DATA_NR; i++)
- prt_printf(out, "%-16s%16llu%16llu%16llu\n",
- bch2_data_types[i], stats.d[i].buckets,
- stats.d[i].sectors, stats.d[i].fragmented);
-
- prt_printf(out,
- "ec\t\t%16llu\n"
- "\n"
- "freelist_wait\t\t%s\n"
- "open buckets allocated\t%u\n"
- "open buckets this dev\t%u\n"
- "open buckets total\t%u\n"
- "open_buckets_wait\t%s\n"
- "open_buckets_btree\t%u\n"
- "open_buckets_user\t%u\n"
- "buckets_to_invalidate\t%llu\n"
- "btree reserve cache\t%u\n",
- stats.buckets_ec,
- c->freelist_wait.list.first ? "waiting" : "empty",
- OPEN_BUCKETS_COUNT - c->open_buckets_nr_free,
- ca->nr_open_buckets,
- OPEN_BUCKETS_COUNT,
- c->open_buckets_wait.list.first ? "waiting" : "empty",
- nr[BCH_DATA_btree],
- nr[BCH_DATA_user],
- should_invalidate_buckets(ca, stats),
- c->btree_reserve_cache_nr);
+ printbuf_tabstop_push(out, 8);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+
+ prt_tab(out);
+ prt_str(out, "buckets");
+ prt_tab_rjust(out);
+ prt_str(out, "sectors");
+ prt_tab_rjust(out);
+ prt_str(out, "fragmented");
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ for (i = 0; i < BCH_DATA_NR; i++) {
+ prt_str(out, bch2_data_types[i]);
+ prt_tab(out);
+ prt_u64(out, stats.d[i].buckets);
+ prt_tab_rjust(out);
+ prt_u64(out, stats.d[i].sectors);
+ prt_tab_rjust(out);
+ prt_u64(out, stats.d[i].fragmented);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ prt_str(out, "ec");
+ prt_tab(out);
+ prt_u64(out, stats.buckets_ec);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_newline(out);
+
+ prt_printf(out, "reserves:");
+ prt_newline(out);
+ for (i = 0; i < RESERVE_NR; i++) {
+ prt_str(out, bch2_alloc_reserves[i]);
+ prt_tab(out);
+ prt_u64(out, bch2_dev_buckets_reserved(ca, i));
+ prt_tab_rjust(out);
+ prt_newline(out);
+ }
+
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 24);
+
+ prt_str(out, "freelist_wait");
+ prt_tab(out);
+ prt_str(out, c->freelist_wait.list.first ? "waiting" : "empty");
+ prt_newline(out);
+
+ prt_str(out, "open buckets allocated");
+ prt_tab(out);
+ prt_u64(out, OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
+ prt_newline(out);
+
+ prt_str(out, "open buckets this dev");
+ prt_tab(out);
+ prt_u64(out, ca->nr_open_buckets);
+ prt_newline(out);
+
+ prt_str(out, "open buckets total");
+ prt_tab(out);
+ prt_u64(out, OPEN_BUCKETS_COUNT);
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_wait");
+ prt_tab(out);
+ prt_str(out, c->open_buckets_wait.list.first ? "waiting" : "empty");
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_btree");
+ prt_tab(out);
+ prt_u64(out, nr[BCH_DATA_btree]);
+ prt_newline(out);
+
+ prt_str(out, "open_buckets_user");
+ prt_tab(out);
+ prt_u64(out, nr[BCH_DATA_user]);
+ prt_newline(out);
+
+ prt_str(out, "buckets_to_invalidate");
+ prt_tab(out);
+ prt_u64(out, should_invalidate_buckets(ca, stats));
+ prt_newline(out);
+
+ prt_str(out, "btree reserve cache");
+ prt_tab(out);
+ prt_u64(out, c->btree_reserve_cache_nr);
+ prt_newline(out);
}
static const char * const bch2_rw[] = {
#define prt_tab_rjust(_out) bch2_prt_tab_rjust(_out)
#define prt_bytes_indented(...) bch2_prt_bytes_indented(__VA_ARGS__)
-#define prt_u64(_out, _v) prt_printf(_out, "%llu", _v)
+#define prt_u64(_out, _v) prt_printf(_out, "%llu", (u64) (_v))
#define prt_human_readable_u64(...) bch2_prt_human_readable_u64(__VA_ARGS__)
#define prt_human_readable_s64(...) bch2_prt_human_readable_s64(__VA_ARGS__)
#define prt_units_u64(...) bch2_prt_units_u64(__VA_ARGS__)