-07c2895cb3372c0e7b406ab13264de80c7ff19eb
+710cd382bf5f50ab8114a7cc22d78b5b2f574529
struct bch_reflink_p {
struct bch_val v;
__le64 idx;
-
- __le32 reservation_generation;
- __u8 nr_replicas;
- __u8 pad[3];
-};
+ /*
+ * A reflink pointer might point to an indirect extent which is then
+ * later split (by copygc or rebalance). If we only pointed to part of
+ * the original indirect extent, and then one of the fragments is
+ * outside the range we point to, we'd leak a refcount: so when creating
+ * reflink pointers, we need to store pad values to remember the full
+ * range we were taking a reference on.
+ */
+ __le32 front_pad;
+ __le32 back_pad;
+} __attribute__((packed, aligned(8)));
struct bch_reflink_v {
struct bch_val v;
__le64 refcount;
union bch_extent_entry start[0];
__u64 _data[0];
-};
+} __attribute__((packed, aligned(8)));
struct bch_indirect_inline_data {
struct bch_val v;
bch2_bkey_buf_reassemble(&sk, c, k);
bch2_extent_normalize(c, bkey_i_to_s(sk.k));
-
commit_err =
bch2_trans_update(&trans, &iter, sk.k, 0) ?:
bch2_trans_commit(&trans, NULL, NULL,
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
struct reflink_gc *ref;
size_t l, r, m;
- u64 idx = le64_to_cpu(p.v->idx);
- unsigned sectors = p.k->size;
+ u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+ u64 sectors = (u64) le32_to_cpu(p.v->front_pad) +
+ le32_to_cpu(p.v->back_pad) +
+ p.k->size;
s64 ret = 0;
BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) ==
bch2_fs_inconsistent(c,
"%llu:%llu len %u points to nonexistent indirect extent %llu",
p.k->p.inode, p.k->p.offset, p.k->size, idx);
- bch2_inconsistent_error(c);
ret = -EIO;
goto err;
}
- BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE));
+ if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
+ bch2_fs_inconsistent(c,
+ "%llu:%llu len %u idx %llu indirect extent refcount underflow",
+ p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ ret = -EIO;
+ goto err;
+ }
+
+ if (flags & BTREE_TRIGGER_INSERT) {
+ struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+ u64 pad;
+
+ pad = max_t(s64, le32_to_cpu(v->front_pad),
+ le64_to_cpu(v->idx) - bkey_start_offset(k.k));
+ BUG_ON(pad > U32_MAX);
+ v->front_pad = cpu_to_le32(pad);
+
+ pad = max_t(s64, le32_to_cpu(v->back_pad),
+ k.k->p.offset - p.k->size - le64_to_cpu(v->idx));
+ BUG_ON(pad > U32_MAX);
+ v->back_pad = cpu_to_le32(pad);
+ }
+
le64_add_cpu(refcount, add);
if (!*refcount) {
struct bkey_s_c k, unsigned flags)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
- u64 idx = le64_to_cpu(p.v->idx);
- unsigned sectors = p.k->size;
+ u64 idx, sectors;
s64 ret = 0;
+ if (flags & BTREE_TRIGGER_INSERT) {
+ struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+
+ v->front_pad = v->back_pad = 0;
+ }
+
+ idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+ sectors = (u64) le32_to_cpu(p.v->front_pad) +
+ le32_to_cpu(p.v->back_pad) +
+ p.k->size;
+
while (sectors) {
ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags);
if (ret < 0)
bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
if (can_narrow_crc(p.crc, n)) {
- bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
+ __bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
p.ptr.offset += p.crc.offset;
p.crc = n;
bch2_extent_ptr_decoded_append(k, &p);
return i;
}
-union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
- struct bch_extent_ptr *ptr)
+static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
+{
+ union bch_extent_entry *next = extent_entry_next(entry);
+
+ /* stripes have ptrs, but their layout doesn't work with this code */
+ BUG_ON(k.k->type == KEY_TYPE_stripe);
+
+ memmove_u64s_down(entry, next,
+ (u64 *) bkey_val_end(k) - (u64 *) next);
+ k.k->u64s -= (u64 *) next - (u64 *) entry;
+}
+
+/*
+ * Returns pointer to the next entry after the one being dropped:
+ */
+union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
+ struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
- union bch_extent_entry *dst, *src, *prev;
+ union bch_extent_entry *entry = to_entry(ptr), *next;
+ union bch_extent_entry *ret = entry;
bool drop_crc = true;
EBUG_ON(ptr < &ptrs.start->ptr ||
ptr >= &ptrs.end->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
- src = extent_entry_next(to_entry(ptr));
- if (src != ptrs.end &&
- !extent_entry_is_crc(src))
- drop_crc = false;
-
- dst = to_entry(ptr);
- while ((prev = extent_entry_prev(ptrs, dst))) {
- if (extent_entry_is_ptr(prev))
+ for (next = extent_entry_next(entry);
+ next != ptrs.end;
+ next = extent_entry_next(next)) {
+ if (extent_entry_is_crc(next)) {
break;
-
- if (extent_entry_is_crc(prev)) {
- if (drop_crc)
- dst = prev;
+ } else if (extent_entry_is_ptr(next)) {
+ drop_crc = false;
break;
}
+ }
+
+ extent_entry_drop(k, entry);
- dst = prev;
+ while ((entry = extent_entry_prev(ptrs, entry))) {
+ if (extent_entry_is_ptr(entry))
+ break;
+
+ if ((extent_entry_is_crc(entry) && drop_crc) ||
+ extent_entry_is_stripe_ptr(entry)) {
+ ret = (void *) ret - extent_entry_bytes(entry);
+ extent_entry_drop(k, entry);
+ }
}
- memmove_u64s_down(dst, src,
- (u64 *) ptrs.end - (u64 *) src);
- k.k->u64s -= (u64 *) src - (u64 *) dst;
+ return ret;
+}
+
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
+ struct bch_extent_ptr *ptr)
+{
+ bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
+ union bch_extent_entry *ret =
+ __bch2_bkey_drop_ptr(k, ptr);
+
+ /*
+ * If we deleted all the dirty pointers and there's still cached
+ * pointers, we could set the cached pointers to dirty if they're not
+ * stale - but to do that correctly we'd need to grab an open_bucket
+ * reference so that we don't race with bucket reuse:
+ */
+ if (have_dirty &&
+ !bch2_bkey_dirty_devs(k.s_c).nr) {
+ k.k->type = KEY_TYPE_error;
+ set_bkey_val_u64s(k.k, 0);
+ ret = NULL;
+ } else if (!bch2_bkey_nr_ptrs(k.s_c)) {
+ k.k->type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(k.k, 0);
+ ret = NULL;
+ }
- return dst;
+ return ret;
}
void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
ptr->cached &&
ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
- /* will only happen if all pointers were cached: */
- if (!bch2_bkey_nr_ptrs(k.s_c))
- k.k->type = KEY_TYPE_deleted;
-
return bkey_deleted(k.k);
}
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{
- switch (extent_entry_type(e)) {
- case BCH_EXTENT_ENTRY_ptr:
- return true;
- default:
- return false;
- }
+ return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
+}
+
+static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e)
+{
+ return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
}
static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr);
void bch2_extent_ptr_decoded_append(struct bkey_i *,
struct extent_ptr_decoded *);
+union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s,
+ struct bch_extent_ptr *);
union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
struct bch_extent_ptr *);
? ((unsigned long) buf & (PAGE_SIZE - 1))
: 0), PAGE_SIZE);
+ pages = min(pages, BIO_MAX_VECS);
+
bio = bio_alloc_bioset(GFP_NOIO, pages, &c->bio_write);
wbio = wbio_init(bio);
wbio->put_bio = true;
*/
bch2_extent_normalize(c, bkey_i_to_s(sk.k));
+ /*
+ * Since we're not inserting through an extent iterator
+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+ * we aren't using the extent overwrite path to delete, we're
+ * just using the normal key deletion path:
+ */
+ if (bkey_deleted(&sk.k->k))
+ sk.k->k.size = 0;
+
ret = bch2_btree_iter_traverse(&iter) ?:
bch2_trans_update(&trans, &iter, sk.k,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
extent_for_each_ptr(extent_i_to_s(new), new_ptr)
new_ptr->cached = true;
- bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
+ __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr);
}
extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
if (bkey_val_bytes(p.k) != sizeof(*p.v))
return "incorrect value size";
+ if (le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad))
+ return "idx < front_pad";
+
return NULL;
}
if (!page)
return -ENOMEM;
- BUG_ON(!bio_add_page(bio, page, len, 0));
+ if (unlikely(!bio_add_page(bio, page, len, 0))) {
+ __free_page(page);
+ break;
+ }
+
size -= len;
}