#include <trace/events/bcachefs.h>
+static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
+
static unsigned bch2_crc_field_size_max[] = {
[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
return -EIO;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ /*
+ * Unwritten extent: no need to actually read, treat it as a
+ * hole and return 0s:
+ */
+ if (p.ptr.unwritten)
+ return 0;
+
ca = bch_dev_bkey_exists(c, p.ptr.dev);
/*
int rw, struct printbuf *err)
{
if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) {
- pr_buf(err, "value too big (%zu > %u)",
+ prt_printf(err, "value too big (%zu > %u)",
bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
return bch2_bkey_ptrs_invalid(c, k, rw, err);
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
if (bkey_val_bytes(k.k) <= sizeof(*bp.v)) {
- pr_buf(err, "value too small (%zu <= %zu)",
+ prt_printf(err, "value too small (%zu <= %zu)",
bkey_val_bytes(k.k), sizeof(*bp.v));
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) {
- pr_buf(err, "value too big (%zu > %zu)",
+ prt_printf(err, "value too big (%zu > %zu)",
bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
if (c->sb.version < bcachefs_metadata_version_snapshot &&
bp.v->min_key.snapshot) {
- pr_buf(err, "invalid min_key.snapshot (%u != 0)",
+ prt_printf(err, "invalid min_key.snapshot (%u != 0)",
bp.v->min_key.snapshot);
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
return bch2_bkey_ptrs_invalid(c, k, rw, err);
{
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
- pr_buf(out, "seq %llx written %u min_key %s",
+ prt_printf(out, "seq %llx written %u min_key %s",
le64_to_cpu(bp.v->seq),
le16_to_cpu(bp.v->sectors_written),
BTREE_PTR_RANGE_UPDATED(bp.v) ? "R " : "");
bch2_bpos_to_text(out, bp.v->min_key);
- pr_buf(out, " ");
+ prt_printf(out, " ");
bch2_bkey_ptrs_to_text(out, c, k);
}
if (version < bcachefs_metadata_version_inode_btree_change &&
btree_node_type_is_extents(btree_id) &&
- bkey_cmp(bp.v->min_key, POS_MIN))
+ !bkey_eq(bp.v->min_key, POS_MIN))
bp.v->min_key = write
? bpos_nosnap_predecessor(bp.v->min_key)
: bpos_nosnap_successor(bp.v->min_key);
rp.ptr.offset + rp.crc.offset ||
lp.ptr.dev != rp.ptr.dev ||
lp.ptr.gen != rp.ptr.gen ||
+ lp.ptr.unwritten != rp.ptr.unwritten ||
lp.has_ec != rp.has_ec)
return false;
if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
lp.crc.uncompressed_size) {
/* can use left extent's crc entry */
- } else if (lp.crc.live_size <= rp.crc.offset ) {
+ } else if (lp.crc.live_size <= rp.crc.offset) {
/* can use right extent's crc entry */
} else {
/* check if checksums can be merged: */
lp.crc.uncompressed_size +
rp.crc.uncompressed_size > (c->opts.encoded_extent_max >> 9))
return false;
+ }
- if (lp.crc.uncompressed_size + rp.crc.uncompressed_size >
+ en_l = extent_entry_next(en_l);
+ en_r = extent_entry_next(en_r);
+ }
+
+ en_l = l_ptrs.start;
+ en_r = r_ptrs.start;
+ while (en_l < l_ptrs.end && en_r < r_ptrs.end) {
+ if (extent_entry_is_crc(en_l)) {
+ struct bch_extent_crc_unpacked crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
+ struct bch_extent_crc_unpacked crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
+
+ if (crc_l.uncompressed_size + crc_r.uncompressed_size >
bch2_crc_field_size_max[extent_entry_type(en_l)])
return false;
}
if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
crc_l.uncompressed_size) {
/* can use left extent's crc entry */
- } else if (crc_l.live_size <= crc_r.offset ) {
+ } else if (crc_l.live_size <= crc_r.offset) {
/* can use right extent's crc entry */
crc_r.offset -= crc_l.live_size;
bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) {
- pr_buf(err, "incorrect value size (%zu != %zu)",
+ prt_printf(err, "incorrect value size (%zu != %zu)",
bkey_val_bytes(k.k), sizeof(*r.v));
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) {
- pr_buf(err, "invalid nr_replicas (%u)",
+ prt_printf(err, "invalid nr_replicas (%u)",
r.v->nr_replicas);
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
return 0;
{
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
- pr_buf(out, "generation %u replicas %u",
+ prt_printf(out, "generation %u replicas %u",
le32_to_cpu(r.v->generation),
r.v->nr_replicas);
}
return replicas;
}
-static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
- struct extent_ptr_decoded p)
+unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
{
unsigned durability = 0;
struct bch_dev *ca;
- if (p.ptr.cached)
+ if (p->ptr.cached)
return 0;
- ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ ca = bch_dev_bkey_exists(c, p->ptr.dev);
if (ca->mi.state != BCH_MEMBER_STATE_failed)
durability = max_t(unsigned, durability, ca->mi.durability);
- if (p.has_ec)
- durability += p.ec.redundancy;
+ if (p->has_ec)
+ durability += p->ec.redundancy;
return durability;
}
unsigned durability = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- durability += bch2_extent_ptr_durability(c, p);
+ durability += bch2_extent_ptr_durability(c,& p);
return durability;
}
-void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
- unsigned target,
- unsigned nr_desired_replicas)
-{
- struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
- union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
-
- if (target && extra > 0)
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- int n = bch2_extent_ptr_durability(c, p);
-
- if (n && n <= extra &&
- !bch2_dev_in_target(c, p.ptr.dev, target)) {
- entry->ptr.cached = true;
- extra -= n;
- }
- }
-
- if (extra > 0)
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- int n = bch2_extent_ptr_durability(c, p);
-
- if (n && n <= extra) {
- entry->ptr.cached = true;
- extra -= n;
- }
- }
-}
-
void bch2_bkey_extent_entry_drop(struct bkey_i *k, union bch_extent_entry *entry)
{
union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
k->k.u64s -= extent_entry_u64s(entry);
}
-void bch2_bkey_append_ptr(struct bkey_i *k,
- struct bch_extent_ptr ptr)
-{
- EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
-
- switch (k->k.type) {
- case KEY_TYPE_btree_ptr:
- case KEY_TYPE_btree_ptr_v2:
- case KEY_TYPE_extent:
- EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
-
- ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
-
- memcpy((void *) &k->v + bkey_val_bytes(&k->k),
- &ptr,
- sizeof(ptr));
- k->u64s++;
- break;
- default:
- BUG();
- }
-}
-
static inline void __extent_entry_insert(struct bkey_i *k,
union bch_extent_entry *dst,
union bch_extent_entry *new)
/*
* Returns pointer to the next entry after the one being dropped:
*/
-union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
- struct bch_extent_ptr *ptr)
+static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
+ struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry = to_entry(ptr), *next;
bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
}
+void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
+{
+ struct bch_extent_ptr *ptr = (void *) bch2_bkey_has_device(k.s_c, dev);
+
+ if (ptr)
+ __bch2_bkey_drop_ptr(k, ptr);
+}
+
const struct bch_extent_ptr *
bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
{
return false;
}
+/*
+ * Returns true if two extents refer to the same data:
+ */
+bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
+{
+ if (k1.k->type != k2.k->type)
+ return false;
+
+ if (bkey_extent_is_direct_data(k1.k)) {
+ struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(k1);
+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
+ const union bch_extent_entry *entry1, *entry2;
+ struct extent_ptr_decoded p1, p2;
+
+ if (bkey_extent_is_unwritten(k1) != bkey_extent_is_unwritten(k2))
+ return false;
+
+ bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return true;
+
+ return false;
+ } else {
+ /* KEY_TYPE_deleted, etc. */
+ return true;
+ }
+}
+
+bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
+ struct bkey_s_c k2)
+{
+ struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
+ const union bch_extent_entry *entry2;
+ struct extent_ptr_decoded p2;
+
+ bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return true;
+
+ return false;
+}
+
/*
* bch_extent_normalize - clean up an extent, dropping stale pointers etc.
*
bkey_extent_entry_for_each(ptrs, entry) {
if (!first)
- pr_buf(out, " ");
+ prt_printf(out, " ");
switch (__extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
: NULL;
if (!ca) {
- pr_buf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+ prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
(u64) ptr->offset, ptr->gen,
ptr->cached ? " cached" : "");
} else {
u32 offset;
u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
- pr_buf(out, "ptr: %u:%llu:%u gen %u%s", ptr->dev,
- b, offset, ptr->gen,
- ptr->cached ? " cached" : "");
-
+ prt_printf(out, "ptr: %u:%llu:%u gen %u",
+ ptr->dev, b, offset, ptr->gen);
+ if (ptr->cached)
+ prt_str(out, " cached");
+ if (ptr->unwritten)
+ prt_str(out, " unwritten");
if (ca && ptr_stale(ca, ptr))
- pr_buf(out, " stale");
+ prt_printf(out, " stale");
}
break;
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc128:
crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
- pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
+ prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s",
crc.compressed_size,
crc.uncompressed_size,
crc.offset, crc.nonce,
case BCH_EXTENT_ENTRY_stripe_ptr:
ec = &entry->stripe_ptr;
- pr_buf(out, "ec: idx %llu block %u",
+ prt_printf(out, "ec: idx %llu block %u",
(u64) ec->idx, ec->block);
break;
default:
- pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
+ prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
return;
}
struct bch_dev *ca;
if (!bch2_dev_exists2(c, ptr->dev)) {
- pr_buf(err, "pointer to invalid device (%u)", ptr->dev);
- return -EINVAL;
+ prt_printf(err, "pointer to invalid device (%u)", ptr->dev);
+ return -BCH_ERR_invalid_bkey;
}
ca = bch_dev_bkey_exists(c, ptr->dev);
bkey_for_each_ptr(ptrs, ptr2)
if (ptr != ptr2 && ptr->dev == ptr2->dev) {
- pr_buf(err, "multiple pointers to same device (%u)", ptr->dev);
- return -EINVAL;
+ prt_printf(err, "multiple pointers to same device (%u)", ptr->dev);
+ return -BCH_ERR_invalid_bkey;
}
bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset);
if (bucket >= ca->mi.nbuckets) {
- pr_buf(err, "pointer past last bucket (%llu > %llu)",
+ prt_printf(err, "pointer past last bucket (%llu > %llu)",
bucket, ca->mi.nbuckets);
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) {
- pr_buf(err, "pointer before first bucket (%llu < %u)",
+ prt_printf(err, "pointer before first bucket (%llu < %u)",
bucket, ca->mi.first_bucket);
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
if (bucket_offset + size_ondisk > ca->mi.bucket_size) {
- pr_buf(err, "pointer spans multiple buckets (%u + %u > %u)",
+ prt_printf(err, "pointer spans multiple buckets (%u + %u > %u)",
bucket_offset, size_ondisk, ca->mi.bucket_size);
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
return 0;
struct bch_extent_crc_unpacked crc;
unsigned size_ondisk = k.k->size;
unsigned nonce = UINT_MAX;
+ unsigned nr_ptrs = 0;
+ bool unwritten = false;
int ret;
if (bkey_is_btree_ptr(k.k))
bkey_extent_entry_for_each(ptrs, entry) {
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) {
- pr_buf(err, "invalid extent entry type (got %u, max %u)",
+ prt_printf(err, "invalid extent entry type (got %u, max %u)",
__extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX);
- return -EINVAL;
+ return -BCH_ERR_invalid_bkey;
}
if (bkey_is_btree_ptr(k.k) &&
!extent_entry_is_ptr(entry)) {
- pr_buf(err, "has non ptr field");
- return -EINVAL;
+ prt_printf(err, "has non ptr field");
+ return -BCH_ERR_invalid_bkey;
}
switch (extent_entry_type(entry)) {
false, err);
if (ret)
return ret;
+
+ if (nr_ptrs && unwritten != entry->ptr.unwritten) {
+ prt_printf(err, "extent with unwritten and written ptrs");
+ return -BCH_ERR_invalid_bkey;
+ }
+
+ if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) {
+ prt_printf(err, "has unwritten ptrs");
+ return -BCH_ERR_invalid_bkey;
+ }
+
+ unwritten = entry->ptr.unwritten;
+ nr_ptrs++;
break;
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
if (crc.offset + crc.live_size >
crc.uncompressed_size) {
- pr_buf(err, "checksum offset + key size > uncompressed size");
- return -EINVAL;
+ prt_printf(err, "checksum offset + key size > uncompressed size");
+ return -BCH_ERR_invalid_bkey;
}
size_ondisk = crc.compressed_size;
if (!bch2_checksum_type_valid(c, crc.csum_type)) {
- pr_buf(err, "invalid checksum type");
- return -EINVAL;
+ prt_printf(err, "invalid checksum type");
+ return -BCH_ERR_invalid_bkey;
}
if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR) {
- pr_buf(err, "invalid compression type");
- return -EINVAL;
+ prt_printf(err, "invalid compression type");
+ return -BCH_ERR_invalid_bkey;
}
if (bch2_csum_type_is_encryption(crc.csum_type)) {
if (nonce == UINT_MAX)
nonce = crc.offset + crc.nonce;
else if (nonce != crc.offset + crc.nonce) {
- pr_buf(err, "incorrect nonce");
- return -EINVAL;
+ prt_printf(err, "incorrect nonce");
+ return -BCH_ERR_invalid_bkey;
}
}
break;
}
}
+ if (nr_ptrs >= BCH_BKEY_PTRS_MAX) {
+ prt_str(err, "too many ptrs");
+ return -BCH_ERR_invalid_bkey;
+ }
+
return 0;
}
int val_u64s_delta;
u64 sub;
- if (bkey_cmp(where, bkey_start_pos(k.k)) <= 0)
+ if (bkey_le(where, bkey_start_pos(k.k)))
return 0;
- EBUG_ON(bkey_cmp(where, k.k->p) > 0);
+ EBUG_ON(bkey_gt(where, k.k->p));
sub = where.offset - bkey_start_offset(k.k);
int val_u64s_delta;
u64 len = 0;
- if (bkey_cmp(where, k.k->p) >= 0)
+ if (bkey_ge(where, k.k->p))
return 0;
- EBUG_ON(bkey_cmp(where, bkey_start_pos(k.k)) < 0);
+ EBUG_ON(bkey_lt(where, bkey_start_pos(k.k)));
len = where.offset - bkey_start_offset(k.k);