-710cd382bf5f50ab8114a7cc22d78b5b2f574529
+720f644e63e0f5b24bb69f2ffb70cdc2dd162810
struct bch_reflink_p {
struct bch_val v;
__le64 idx;
- /*
- * A reflink pointer might point to an indirect extent which is then
- * later split (by copygc or rebalance). If we only pointed to part of
- * the original indirect extent, and then one of the fragments is
- * outside the range we point to, we'd leak a refcount: so when creating
- * reflink pointers, we need to store pad values to remember the full
- * range we were taking a reference on.
- */
- __le32 front_pad;
- __le32 back_pad;
-} __attribute__((packed, aligned(8)));
+
+ __le32 reservation_generation;
+ __u8 nr_replicas;
+ __u8 pad[3];
+};
struct bch_reflink_v {
struct bch_val v;
__le64 refcount;
union bch_extent_entry start[0];
__u64 _data[0];
-} __attribute__((packed, aligned(8)));
+};
struct bch_indirect_inline_data {
struct bch_val v;
*max_stale = max(*max_stale, ptr_stale(ca, ptr));
}
- bch2_mark_key(c, *k, flags);
+ ret = bch2_mark_key(c, *k, flags);
fsck_err:
err:
if (ret)
struct btree_path *path;
struct btree_insert_entry *i;
unsigned idx;
- char buf[300];
+ char buf1[300], buf2[300];
btree_trans_verify_sorted(trans);
path->idx, path->ref, path->intent_ref,
path->preserve ? " preserve" : "",
bch2_btree_ids[path->btree_id],
- (bch2_bpos_to_text(&PBUF(buf), path->pos), buf),
+ (bch2_bpos_to_text(&PBUF(buf1), path->pos), buf1),
#ifdef CONFIG_BCACHEFS_DEBUG
(void *) path->ip_allocated
#else
#endif
);
- trans_for_each_update(trans, i)
- printk(KERN_ERR "update: btree %s %s %pS\n",
+ trans_for_each_update(trans, i) {
+ struct bkey u;
+ struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u);
+
+ printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s",
bch2_btree_ids[i->btree_id],
- (bch2_bkey_val_to_text(&PBUF(buf), trans->c, bkey_i_to_s_c(i->k)), buf),
- (void *) i->ip_allocated);
+ (void *) i->ip_allocated,
+ (bch2_bkey_val_to_text(&PBUF(buf1), trans->c, old), buf1),
+ (bch2_bkey_val_to_text(&PBUF(buf2), trans->c, bkey_i_to_s_c(i->k)), buf2));
+ }
}
static struct btree_path *btree_path_alloc(struct btree_trans *trans,
#include "ec.h"
#include "error.h"
#include "movinggc.h"
+#include "recovery.h"
#include "reflink.h"
#include "replicas.h"
#include "subvolume.h"
{
struct reflink_gc *r;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
+ s64 ret = 0;
- while (1) {
- if (*r_idx >= c->reflink_gc_nr)
- goto not_found;
+ while (*r_idx < c->reflink_gc_nr) {
r = genradix_ptr(&c->reflink_gc_table, *r_idx);
BUG_ON(!r);
(*r_idx)++;
}
+ if (*r_idx >= c->reflink_gc_nr ||
+ idx < r->offset - r->size) {
+ ret = p.k->size;
+ goto not_found;
+ }
+
BUG_ON((s64) r->refcount + add < 0);
r->refcount += add;
return r->offset - idx;
not_found:
- bch2_fs_inconsistent(c,
- "%llu:%llu len %u points to nonexistent indirect extent %llu",
- p.k->p.inode, p.k->p.offset, p.k->size, idx);
- bch2_inconsistent_error(c);
- return -EIO;
+ if ((flags & BTREE_TRIGGER_GC) &&
+ (flags & BTREE_TRIGGER_NOATOMIC)) {
+ /*
+ * XXX: we're replacing the entire reflink pointer with an error
+ * key, we should just be replacing the part that was missing:
+ */
+ if (fsck_err(c, "%llu:%llu len %u points to nonexistent indirect extent %llu",
+ p.k->p.inode, p.k->p.offset, p.k->size, idx)) {
+ struct bkey_i_error *new;
+
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ bch_err(c, "%s: error allocating new key", __func__);
+ return -ENOMEM;
+ }
+
+ bkey_init(&new->k);
+ new->k.type = KEY_TYPE_error;
+ new->k.p = p.k->p;
+ new->k.size = p.k->size;
+ ret = bch2_journal_key_insert(c, BTREE_ID_extents, 0, &new->k_i);
+
+ }
+ } else {
+ bch2_fs_inconsistent(c,
+ "%llu:%llu len %u points to nonexistent indirect extent %llu",
+ p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ bch2_inconsistent_error(c);
+ ret = -EIO;
+ }
+fsck_err:
+ return ret;
}
static int bch2_mark_reflink_p(struct bch_fs *c,
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
struct reflink_gc *ref;
size_t l, r, m;
- u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
- u64 sectors = (u64) le32_to_cpu(p.v->front_pad) +
- le32_to_cpu(p.v->back_pad) +
- p.k->size;
+ u64 idx = le64_to_cpu(p.v->idx);
+ unsigned sectors = p.k->size;
s64 ret = 0;
BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) ==
while (sectors) {
ret = __bch2_mark_reflink_p(c, p, idx, flags, &l);
- if (ret < 0)
+ if (ret <= 0)
return ret;
ret = min_t(s64, ret, sectors);
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ bch2_inconsistent_error(c);
ret = -EIO;
goto err;
}
- if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
- bch2_fs_inconsistent(c,
- "%llu:%llu len %u idx %llu indirect extent refcount underflow",
- p.k->p.inode, p.k->p.offset, p.k->size, idx);
- ret = -EIO;
- goto err;
- }
-
- if (flags & BTREE_TRIGGER_INSERT) {
- struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
- u64 pad;
-
- pad = max_t(s64, le32_to_cpu(v->front_pad),
- le64_to_cpu(v->idx) - bkey_start_offset(k.k));
- BUG_ON(pad > U32_MAX);
- v->front_pad = cpu_to_le32(pad);
-
- pad = max_t(s64, le32_to_cpu(v->back_pad),
- k.k->p.offset - p.k->size - le64_to_cpu(v->idx));
- BUG_ON(pad > U32_MAX);
- v->back_pad = cpu_to_le32(pad);
- }
-
+ BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE));
le64_add_cpu(refcount, add);
if (!*refcount) {
struct bkey_s_c k, unsigned flags)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
- u64 idx, sectors;
+ u64 idx = le64_to_cpu(p.v->idx);
+ unsigned sectors = p.k->size;
s64 ret = 0;
- if (flags & BTREE_TRIGGER_INSERT) {
- struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
-
- v->front_pad = v->back_pad = 0;
- }
-
- idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
- sectors = (u64) le32_to_cpu(p.v->front_pad) +
- le32_to_cpu(p.v->back_pad) +
- p.k->size;
-
while (sectors) {
ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags);
if (ret < 0)
u32 *subvol, u32 *snapshot, u64 *inum,
bool is_fsck)
{
+ struct bch_subvolume s;
int ret = 0;
*subvol = 0;
if (likely(d.v->d_type != DT_SUBVOL)) {
*inum = le64_to_cpu(d.v->d_inum);
} else {
- struct bch_subvolume s;
- int ret;
-
*subvol = le64_to_cpu(d.v->d_inum);
ret = bch2_subvolume_get(trans, *subvol, !is_fsck, BTREE_ITER_CACHED, &s);
{
struct btree_iter iter;
struct bkey_s_c k;
+ u32 target_subvol, target_snapshot;
+ u64 target_inum;
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents,
if (k.k->type != KEY_TYPE_dirent)
goto out;
- ret = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum) == inode->bi_inum;
+ ret = __bch2_dirent_read_target(trans, bkey_s_c_to_dirent(k),
+ &target_subvol,
+ &target_snapshot,
+ &target_inum,
+ true);
+ if (ret)
+ goto out;
+
+ ret = target_inum == inode->bi_inum;
out:
bch2_trans_iter_exit(trans, &iter);
return ret;
snapshot = snapshot_t(c, snapshot)->equiv;
p->nr = 0;
- while (inode->bi_inum != BCACHEFS_ROOT_INO) {
+ while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
+ inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
+ if (inode->bi_parent_subvol) {
+ u64 inum;
+
+ ret = subvol_lookup(trans, inode->bi_parent_subvol,
+ &snapshot, &inum);
+ if (ret)
+ break;
+ }
+
ret = lockrestart_do(trans,
inode_backpointer_exists(trans, inode, snapshot));
if (ret < 0)
atomic_long_read(&c->btree_key_cache.nr_dirty),
atomic_long_read(&c->btree_key_cache.nr_keys));
- min_key_cache = min(bch2_nr_btree_keys_need_flush(c), 128UL);
+ min_key_cache = min(bch2_nr_btree_keys_need_flush(c), (size_t) 128);
nr_flushed = journal_flush_pins(j, seq_to_flush,
min_nr, min_key_cache);
if (bkey_val_bytes(p.k) != sizeof(*p.v))
return "incorrect value size";
- if (le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad))
- return "idx < front_pad";
-
return NULL;
}
if (ret)
goto err;
+ /*
+ * orig is in a bkey_buf which statically allocates 5 64s for the val,
+ * so we know it will be big enough:
+ */
orig->k.type = KEY_TYPE_reflink_p;
r_p = bkey_i_to_reflink_p(orig);
set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
+ memset(&r_p->v, 0, sizeof(r_p->v));
+
r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
ret = bch2_trans_update(trans, extent_iter, &r_p->k_i, 0);
u32 *new_snapshotid,
bool ro)
{
+ struct bch_fs *c = trans->c;
struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
struct bkey_i_subvolume *new_subvol = NULL;
struct bkey_i_subvolume *src_subvol = NULL;
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (bkey_cmp(k.k->p, SUBVOL_POS_MAX) > 0)
break;
- if (bkey_deleted(k.k))
+
+ /*
+ * bch2_subvolume_delete() doesn't flush the btree key cache -
+ * ideally it would but that's tricky
+ */
+ if (bkey_deleted(k.k) &&
+ !bch2_btree_key_cache_find(c, BTREE_ID_subvolumes, dst_iter.pos))
goto found_slot;
}
goto err;
if (k.k->type != KEY_TYPE_subvolume) {
- bch_err(trans->c, "subvolume %u not found", src_subvolid);
+ bch_err(c, "subvolume %u not found", src_subvolid);
ret = -ENOENT;
goto err;
}
static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
{
if (s->nr == s->size) {
- size_t new_size = max(s->size, 128UL) * 2;
+ size_t new_size = max(s->size, (size_t) 128) * 2;
u32 *d = krealloc(s->d, new_size * sizeof(s->d[0]), GFP_KERNEL);
if (!d) {