+// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "buckets.h"
+#include "journal.h"
#include "replicas.h"
#include "super-io.h"
static inline int u8_cmp(u8 l, u8 r)
{
- return (l > r) - (l < r);
+ return cmp_int(l, r);
}
-static void verify_replicas_entry_sorted(struct bch_replicas_entry *e)
+static void verify_replicas_entry(struct bch_replicas_entry *e)
{
-#ifdef CONFIG_BCACHES_DEBUG
+#ifdef CONFIG_BCACHEFS_DEBUG
unsigned i;
+ BUG_ON(e->data_type >= BCH_DATA_NR);
+ BUG_ON(!e->nr_devs);
+ BUG_ON(e->nr_required > 1 &&
+ e->nr_required >= e->nr_devs);
+
for (i = 0; i + 1 < e->nr_devs; i++)
BUG_ON(e->devs[i] >= e->devs[i + 1]);
#endif
bubble_sort(e->devs, e->nr_devs, u8_cmp);
}
-#define for_each_cpu_replicas_entry(_r, _i) \
- for (_i = (_r)->entries; \
- (void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
- _i = (void *) (_i) + (_r)->entry_size)
-
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
{
eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
if (p.ptr.cached)
continue;
- if (p.ec_nr) {
- r->nr_devs = 0;
- break;
- }
-
- r->devs[r->nr_devs++] = p.ptr.dev;
+ if (!p.has_ec)
+ r->devs[r->nr_devs++] = p.ptr.dev;
+ else
+ r->nr_required = 0;
}
}
r->devs[r->nr_devs++] = ptr->dev;
}
-static void bkey_to_replicas(struct bch_replicas_entry *e,
- struct bkey_s_c k)
+void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
+ struct bkey_s_c k)
{
e->nr_devs = 0;
switch (k.k->type) {
case KEY_TYPE_btree_ptr:
- e->data_type = BCH_DATA_BTREE;
+ case KEY_TYPE_btree_ptr_v2:
+ e->data_type = BCH_DATA_btree;
extent_to_replicas(k, e);
break;
case KEY_TYPE_extent:
- e->data_type = BCH_DATA_USER;
+ case KEY_TYPE_reflink_v:
+ e->data_type = BCH_DATA_user;
extent_to_replicas(k, e);
break;
case KEY_TYPE_stripe:
- e->data_type = BCH_DATA_USER;
+ e->data_type = BCH_DATA_parity;
stripe_to_replicas(k, e);
break;
}
unsigned i;
BUG_ON(!data_type ||
- data_type == BCH_DATA_SB ||
+ data_type == BCH_DATA_sb ||
data_type >= BCH_DATA_NR);
e->data_type = data_type;
};
BUG_ON(!new_entry->data_type);
- verify_replicas_entry_sorted(new_entry);
+ verify_replicas_entry(new_entry);
new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
if (!new.entries)
if (unlikely(entry_size > r->entry_size))
return -1;
- verify_replicas_entry_sorted(search);
+ verify_replicas_entry(search);
#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
}
bool bch2_replicas_marked(struct bch_fs *c,
- struct bch_replicas_entry *search,
- bool check_gc_replicas)
+ struct bch_replicas_entry *search)
{
bool marked;
if (!search->nr_devs)
return true;
- verify_replicas_entry_sorted(search);
+ verify_replicas_entry(search);
- percpu_down_read_preempt_disable(&c->mark_lock);
+ percpu_down_read(&c->mark_lock);
marked = __replicas_has_entry(&c->replicas, search) &&
- (!check_gc_replicas ||
- likely((!c->replicas_gc.entries)) ||
+ (likely((!c->replicas_gc.entries)) ||
__replicas_has_entry(&c->replicas_gc, search));
- percpu_up_read_preempt_enable(&c->mark_lock);
+ percpu_up_read(&c->mark_lock);
return marked;
}
-static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
+static void __replicas_table_update(struct bch_fs_usage *dst,
struct bch_replicas_cpu *dst_r,
- struct bch_fs_usage __percpu *src_p,
+ struct bch_fs_usage *src,
struct bch_replicas_cpu *src_r)
{
- unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
- struct bch_fs_usage *dst, *src = (void *)
- bch2_acc_percpu_u64s((void *) src_p, src_nr);
int src_idx, dst_idx;
- preempt_disable();
- dst = this_cpu_ptr(dst_p);
- preempt_enable();
-
*dst = *src;
for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
- if (!src->data[src_idx])
+ if (!src->replicas[src_idx])
continue;
dst_idx = __replicas_entry_idx(dst_r,
cpu_replicas_entry(src_r, src_idx));
BUG_ON(dst_idx < 0);
- dst->data[dst_idx] = src->data[src_idx];
+ dst->replicas[dst_idx] = src->replicas[src_idx];
}
}
+static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p,
+ struct bch_replicas_cpu *dst_r,
+ struct bch_fs_usage __percpu *src_p,
+ struct bch_replicas_cpu *src_r)
+{
+ unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr;
+ struct bch_fs_usage *dst, *src = (void *)
+ bch2_acc_percpu_u64s((void *) src_p, src_nr);
+
+ preempt_disable();
+ dst = this_cpu_ptr(dst_p);
+ preempt_enable();
+
+ __replicas_table_update(dst, dst_r, src, src_r);
+}
+
/*
* Resize filesystem accounting:
*/
static int replicas_table_update(struct bch_fs *c,
struct bch_replicas_cpu *new_r)
{
- struct bch_fs_usage __percpu *new_usage[3] = { NULL, NULL, NULL };
+ struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
+ struct bch_fs_usage *new_scratch = NULL;
+ struct bch_fs_usage __percpu *new_gc = NULL;
+ struct bch_fs_usage *new_base = NULL;
unsigned bytes = sizeof(struct bch_fs_usage) +
sizeof(u64) * new_r->nr;
- unsigned i;
int ret = -ENOMEM;
- for (i = 0; i < 3; i++) {
- if (i < 2 && !c->usage[i])
- continue;
-
- new_usage[i] = __alloc_percpu_gfp(bytes, sizeof(u64),
- GFP_NOIO);
- if (!new_usage[i])
- goto err;
- }
-
- for (i = 0; i < 2; i++) {
- if (!c->usage[i])
- continue;
-
- __replicas_table_update(new_usage[i], new_r,
- c->usage[i], &c->replicas);
-
- swap(c->usage[i], new_usage[i]);
+ if (!(new_base = kzalloc(bytes, GFP_NOIO)) ||
+ !(new_usage[0] = __alloc_percpu_gfp(bytes, sizeof(u64),
+ GFP_NOIO)) ||
+ !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
+ GFP_NOIO)) ||
+ !(new_scratch = kmalloc(bytes, GFP_NOIO)) ||
+ (c->usage_gc &&
+ !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) {
+ bch_err(c, "error updating replicas table: memory allocation failure");
+ goto err;
}
- swap(c->usage_scratch, new_usage[2]);
-
- swap(c->replicas, *new_r);
+ if (c->usage_base)
+ __replicas_table_update(new_base, new_r,
+ c->usage_base, &c->replicas);
+ if (c->usage[0])
+ __replicas_table_update_pcpu(new_usage[0], new_r,
+ c->usage[0], &c->replicas);
+ if (c->usage[1])
+ __replicas_table_update_pcpu(new_usage[1], new_r,
+ c->usage[1], &c->replicas);
+ if (c->usage_gc)
+ __replicas_table_update_pcpu(new_gc, new_r,
+ c->usage_gc, &c->replicas);
+
+ swap(c->usage_base, new_base);
+ swap(c->usage[0], new_usage[0]);
+ swap(c->usage[1], new_usage[1]);
+ swap(c->usage_scratch, new_scratch);
+ swap(c->usage_gc, new_gc);
+ swap(c->replicas, *new_r);
ret = 0;
err:
- for (i = 0; i < 3; i++)
- free_percpu(new_usage[i]);
+ free_percpu(new_gc);
+ kfree(new_scratch);
+ free_percpu(new_usage[1]);
+ free_percpu(new_usage[0]);
+ kfree(new_base);
return ret;
}
+static unsigned reserve_journal_replicas(struct bch_fs *c,
+ struct bch_replicas_cpu *r)
+{
+ struct bch_replicas_entry *e;
+ unsigned journal_res_u64s = 0;
+
+ /* nr_inodes: */
+ journal_res_u64s +=
+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
+
+ /* key_version: */
+ journal_res_u64s +=
+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64));
+
+ /* persistent_reserved: */
+ journal_res_u64s +=
+ DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) *
+ BCH_REPLICAS_MAX;
+
+ for_each_cpu_replicas_entry(r, e)
+ journal_res_u64s +=
+ DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) +
+ e->nr_devs, sizeof(u64));
+ return journal_res_u64s;
+}
+
noinline
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
struct bch_replicas_entry *new_entry)
{
struct bch_replicas_cpu new_r, new_gc;
- int ret = -ENOMEM;
+ int ret = 0;
+
+ verify_replicas_entry(new_entry);
memset(&new_r, 0, sizeof(new_r));
memset(&new_gc, 0, sizeof(new_gc));
ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
if (ret)
goto err;
+
+ bch2_journal_entry_res_resize(&c->journal,
+ &c->replicas_journal_res,
+ reserve_journal_replicas(c, &new_r));
}
if (!new_r.entries &&
swap(new_gc, c->replicas_gc);
percpu_up_write(&c->mark_lock);
out:
- ret = 0;
-err:
mutex_unlock(&c->sb_lock);
kfree(new_r.entries);
kfree(new_gc.entries);
return ret;
+err:
+ bch_err(c, "error adding replicas entry: memory allocation failure");
+ ret = -ENOMEM;
+ goto out;
}
-int bch2_mark_replicas(struct bch_fs *c,
- struct bch_replicas_entry *r)
+static int __bch2_mark_replicas(struct bch_fs *c,
+ struct bch_replicas_entry *r,
+ bool check)
{
- return likely(bch2_replicas_marked(c, r, true))
- ? 0
+ return likely(bch2_replicas_marked(c, r)) ? 0
+ : check ? -1
: bch2_mark_replicas_slowpath(c, r);
}
-bool bch2_bkey_replicas_marked(struct bch_fs *c,
- struct bkey_s_c k,
- bool check_gc_replicas)
+int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r)
+{
+ return __bch2_mark_replicas(c, r, false);
+}
+
+static int __bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k,
+ bool check)
{
struct bch_replicas_padded search;
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
unsigned i;
+ int ret;
for (i = 0; i < cached.nr; i++) {
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
- if (!bch2_replicas_marked(c, &search.e, check_gc_replicas))
- return false;
+ ret = __bch2_mark_replicas(c, &search.e, check);
+ if (ret)
+ return ret;
}
- bkey_to_replicas(&search.e, k);
+ bch2_bkey_to_replicas(&search.e, k);
- return bch2_replicas_marked(c, &search.e, check_gc_replicas);
-}
-
-int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
-{
- struct bch_replicas_padded search;
- struct bch_devs_list cached = bch2_bkey_cached_devs(k);
- unsigned i;
- int ret;
+ ret = __bch2_mark_replicas(c, &search.e, check);
+ if (ret)
+ return ret;
- for (i = 0; i < cached.nr; i++) {
- bch2_replicas_entry_cached(&search.e, cached.devs[i]);
+ if (search.e.data_type == BCH_DATA_parity) {
+ search.e.data_type = BCH_DATA_cached;
+ ret = __bch2_mark_replicas(c, &search.e, check);
+ if (ret)
+ return ret;
- ret = bch2_mark_replicas(c, &search.e);
+ search.e.data_type = BCH_DATA_user;
+ ret = __bch2_mark_replicas(c, &search.e, check);
if (ret)
return ret;
}
- bkey_to_replicas(&search.e, k);
+ return 0;
+}
- return bch2_mark_replicas(c, &search.e);
+bool bch2_bkey_replicas_marked(struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ return __bch2_mark_bkey_replicas(c, k, true) == 0;
+}
+
+int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
+{
+ return __bch2_mark_bkey_replicas(c, k, false);
}
int bch2_replicas_gc_end(struct bch_fs *c, int ret)
lockdep_assert_held(&c->replicas_gc_lock);
mutex_lock(&c->sb_lock);
-
- if (ret)
- goto err;
+ percpu_down_write(&c->mark_lock);
/*
* this is kind of crappy; the replicas gc mechanism needs to be ripped
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
struct bch_replicas_cpu n;
- u64 v = 0;
- int cpu;
-
- if (__replicas_has_entry(&c->replicas_gc, e))
- continue;
- for_each_possible_cpu(cpu)
- v += *per_cpu_ptr(&c->usage[0]->data[i], cpu);
- if (!v)
- continue;
+ if (!__replicas_has_entry(&c->replicas_gc, e) &&
+ (c->usage_base->replicas[i] ||
+ percpu_u64_get(&c->usage[0]->replicas[i]) ||
+ percpu_u64_get(&c->usage[1]->replicas[i]))) {
+ n = cpu_replicas_add_entry(&c->replicas_gc, e);
+ if (!n.entries) {
+ ret = -ENOSPC;
+ goto err;
+ }
- n = cpu_replicas_add_entry(&c->replicas_gc, e);
- if (!n.entries) {
- ret = -ENOSPC;
- goto err;
+ swap(n, c->replicas_gc);
+ kfree(n.entries);
}
-
- percpu_down_write(&c->mark_lock);
- swap(n, c->replicas_gc);
- percpu_up_write(&c->mark_lock);
-
- kfree(n.entries);
}
if (bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc)) {
goto err;
}
- bch2_write_super(c);
-
- /* don't update in memory replicas until changes are persistent */
+ ret = replicas_table_update(c, &c->replicas_gc);
err:
- percpu_down_write(&c->mark_lock);
- if (!ret)
- ret = replicas_table_update(c, &c->replicas_gc);
-
kfree(c->replicas_gc.entries);
c->replicas_gc.entries = NULL;
+
percpu_up_write(&c->mark_lock);
+ if (!ret)
+ bch2_write_super(c);
+
mutex_unlock(&c->sb_lock);
+
return ret;
}
GFP_NOIO);
if (!c->replicas_gc.entries) {
mutex_unlock(&c->sb_lock);
+ bch_err(c, "error allocating c->replicas_gc");
return -ENOMEM;
}
return 0;
}
+int bch2_replicas_gc2(struct bch_fs *c)
+{
+ struct bch_replicas_cpu new = { 0 };
+ unsigned i, nr;
+ int ret = 0;
+
+ bch2_journal_meta(&c->journal);
+retry:
+ nr = READ_ONCE(c->replicas.nr);
+ new.entry_size = READ_ONCE(c->replicas.entry_size);
+ new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
+ if (!new.entries) {
+ bch_err(c, "error allocating c->replicas_gc");
+ return -ENOMEM;
+ }
+
+ mutex_lock(&c->sb_lock);
+ percpu_down_write(&c->mark_lock);
+
+ if (nr != c->replicas.nr ||
+ new.entry_size != c->replicas.entry_size) {
+ percpu_up_write(&c->mark_lock);
+ mutex_unlock(&c->sb_lock);
+ kfree(new.entries);
+ goto retry;
+ }
+
+ for (i = 0; i < c->replicas.nr; i++) {
+ struct bch_replicas_entry *e =
+ cpu_replicas_entry(&c->replicas, i);
+
+ if (e->data_type == BCH_DATA_journal ||
+ c->usage_base->replicas[i] ||
+ percpu_u64_get(&c->usage[0]->replicas[i]) ||
+ percpu_u64_get(&c->usage[1]->replicas[i]))
+ memcpy(cpu_replicas_entry(&new, new.nr++),
+ e, new.entry_size);
+ }
+
+ bch2_cpu_replicas_sort(&new);
+
+ if (bch2_cpu_replicas_to_sb_replicas(c, &new)) {
+ ret = -ENOSPC;
+ goto err;
+ }
+
+ ret = replicas_table_update(c, &new);
+err:
+ kfree(new.entries);
+
+ percpu_up_write(&c->mark_lock);
+
+ if (!ret)
+ bch2_write_super(c);
+
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
+int bch2_replicas_set_usage(struct bch_fs *c,
+ struct bch_replicas_entry *r,
+ u64 sectors)
+{
+ int ret, idx = bch2_replicas_entry_idx(c, r);
+
+ if (idx < 0) {
+ struct bch_replicas_cpu n;
+
+ n = cpu_replicas_add_entry(&c->replicas, r);
+ if (!n.entries)
+ return -ENOMEM;
+
+ ret = replicas_table_update(c, &n);
+ if (ret)
+ return ret;
+
+ kfree(n.entries);
+
+ idx = bch2_replicas_entry_idx(c, r);
+ BUG_ON(ret < 0);
+ }
+
+ c->usage_base->replicas[idx] = sectors;
+
+ return 0;
+}
+
/* Replicas tracking - superblock: */
static int
bch2_cpu_replicas_sort(&new_r);
percpu_down_write(&c->mark_lock);
+
ret = replicas_table_update(c, &new_r);
percpu_up_write(&c->mark_lock);
goto err;
err = "invalid replicas entry: bad nr_required";
- if (!e->nr_required ||
- (e->nr_required > 1 &&
- e->nr_required >= e->nr_devs))
+ if (e->nr_required > 1 &&
+ e->nr_required >= e->nr_devs)
goto err;
err = "invalid replicas entry: invalid device";
mi = bch2_sb_get_members(c->disk_sb.sb);
- percpu_down_read_preempt_disable(&c->mark_lock);
+ percpu_down_read(&c->mark_lock);
for_each_cpu_replicas_entry(&c->replicas, e) {
if (e->data_type >= ARRAY_SIZE(ret.replicas))
nr_offline);
}
- percpu_up_read_preempt_enable(&c->mark_lock);
+ percpu_up_read(&c->mark_lock);
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
if (ret.replicas[i].redundancy == INT_MAX)
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
{
- return (have_enough_devs(s, BCH_DATA_JOURNAL,
+ return (have_enough_devs(s, BCH_DATA_journal,
flags & BCH_FORCE_IF_METADATA_DEGRADED,
flags & BCH_FORCE_IF_METADATA_LOST) &&
- have_enough_devs(s, BCH_DATA_BTREE,
+ have_enough_devs(s, BCH_DATA_btree,
flags & BCH_FORCE_IF_METADATA_DEGRADED,
flags & BCH_FORCE_IF_METADATA_LOST) &&
- have_enough_devs(s, BCH_DATA_USER,
+ have_enough_devs(s, BCH_DATA_user,
flags & BCH_FORCE_IF_DATA_DEGRADED,
flags & BCH_FORCE_IF_DATA_LOST));
}
struct replicas_status s = bch2_replicas_status(c);
return (meta
- ? min(s.replicas[BCH_DATA_JOURNAL].redundancy,
- s.replicas[BCH_DATA_BTREE].redundancy)
- : s.replicas[BCH_DATA_USER].redundancy) + 1;
+ ? min(s.replicas[BCH_DATA_journal].redundancy,
+ s.replicas[BCH_DATA_btree].redundancy)
+ : s.replicas[BCH_DATA_user].redundancy) + 1;
}
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
struct bch_replicas_entry *e;
unsigned i, ret = 0;
- percpu_down_read_preempt_disable(&c->mark_lock);
+ percpu_down_read(&c->mark_lock);
for_each_cpu_replicas_entry(&c->replicas, e)
for (i = 0; i < e->nr_devs; i++)
if (e->devs[i] == ca->dev_idx)
ret |= 1 << e->data_type;
- percpu_up_read_preempt_enable(&c->mark_lock);
+ percpu_up_read(&c->mark_lock);
return ret;
}
+
+int bch2_fs_replicas_init(struct bch_fs *c)
+{
+ c->journal.entry_u64s_reserved +=
+ reserve_journal_replicas(c, &c->replicas);
+
+ return replicas_table_update(c, &c->replicas);
+}