#include "replicas.h"
#include "super.h"
#include "super-io.h"
+#include "trace.h"
#include "util.h"
-#include <trace/events/bcachefs.h>
-
-static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
-
static unsigned bch2_crc_field_size_max[] = {
[BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX,
[BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX,
/* KEY_TYPE_btree_ptr: */
int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k,
- int rw, struct printbuf *err)
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
{
if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) {
prt_printf(err, "value too big (%zu > %u)",
return -BCH_ERR_invalid_bkey;
}
- return bch2_bkey_ptrs_invalid(c, k, rw, err);
+ return bch2_bkey_ptrs_invalid(c, k, flags, err);
}
void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
}
int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k,
- int rw, struct printbuf *err)
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
{
- struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
-
- if (bkey_val_bytes(k.k) <= sizeof(*bp.v)) {
- prt_printf(err, "value too small (%zu <= %zu)",
- bkey_val_bytes(k.k), sizeof(*bp.v));
- return -BCH_ERR_invalid_bkey;
- }
-
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) {
prt_printf(err, "value too big (%zu > %zu)",
bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
return -BCH_ERR_invalid_bkey;
}
- if (c->sb.version < bcachefs_metadata_version_snapshot &&
- bp.v->min_key.snapshot) {
- prt_printf(err, "invalid min_key.snapshot (%u != 0)",
- bp.v->min_key.snapshot);
- return -BCH_ERR_invalid_bkey;
- }
-
- return bch2_bkey_ptrs_invalid(c, k, rw, err);
+ return bch2_bkey_ptrs_invalid(c, k, flags, err);
}
void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c,
compat_bpos(0, btree_id, version, big_endian, write, &bp.v->min_key);
if (version < bcachefs_metadata_version_inode_btree_change &&
- btree_node_type_is_extents(btree_id) &&
+ btree_id_is_extents(btree_id) &&
!bkey_eq(bp.v->min_key, POS_MIN))
bp.v->min_key = write
? bpos_nosnap_predecessor(bp.v->min_key)
/* KEY_TYPE_reservation: */
int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k,
- int rw, struct printbuf *err)
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
{
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
- if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) {
- prt_printf(err, "incorrect value size (%zu != %zu)",
- bkey_val_bytes(k.k), sizeof(*r.v));
- return -BCH_ERR_invalid_bkey;
- }
-
if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) {
prt_printf(err, "invalid nr_replicas (%u)",
r.v->nr_replicas);
bkey_for_each_ptr_decode(&k->k, ptrs, p, i)
if (can_narrow_crc(p.crc, n)) {
- __bch2_bkey_drop_ptr(bkey_i_to_s(k), &i->ptr);
+ bch2_bkey_drop_ptr_noerror(bkey_i_to_s(k), &i->ptr);
p.ptr.offset += p.crc.offset;
p.crc = n;
bch2_extent_ptr_decoded_append(k, &p);
switch (type) {
case BCH_EXTENT_ENTRY_crc32:
set_common_fields(dst->crc32, src);
- dst->crc32.csum = *((__le32 *) &src.csum.lo);
+ dst->crc32.csum = (u32 __force) *((__le32 *) &src.csum.lo);
break;
case BCH_EXTENT_ENTRY_crc64:
set_common_fields(dst->crc64, src);
dst->crc64.nonce = src.nonce;
- dst->crc64.csum_lo = src.csum.lo;
- dst->crc64.csum_hi = *((__le16 *) &src.csum.hi);
+ dst->crc64.csum_lo = (u64 __force) src.csum.lo;
+ dst->crc64.csum_hi = (u64 __force) *((__le16 *) &src.csum.hi);
break;
case BCH_EXTENT_ENTRY_crc128:
set_common_fields(dst->crc128, src);
return replicas;
}
+unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
+{
+ struct bch_dev *ca;
+
+ if (p->ptr.cached)
+ return 0;
+
+ ca = bch_dev_bkey_exists(c, p->ptr.dev);
+
+ return ca->mi.durability +
+ (p->has_ec
+ ? p->ec.redundancy
+ : 0);
+}
+
unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p)
{
- unsigned durability = 0;
struct bch_dev *ca;
if (p->ptr.cached)
ca = bch_dev_bkey_exists(c, p->ptr.dev);
- if (ca->mi.state != BCH_MEMBER_STATE_failed)
- durability = max_t(unsigned, durability, ca->mi.durability);
+ if (ca->mi.state == BCH_MEMBER_STATE_failed)
+ return 0;
- if (p->has_ec)
- durability += p->ec.redundancy;
+ return ca->mi.durability +
+ (p->has_ec
+ ? p->ec.redundancy
+ : 0);
+}
+
+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned durability = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ durability += bch2_extent_ptr_durability(c, &p);
return durability;
}
-unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
+static unsigned bch2_bkey_durability_safe(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
unsigned durability = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- durability += bch2_extent_ptr_durability(c,& p);
+ if (p.ptr.dev < c->sb.nr_devices && c->devs[p.ptr.dev])
+ durability += bch2_extent_ptr_durability(c, &p);
return durability;
}
k->k.u64s -= extent_entry_u64s(entry);
}
-static inline void __extent_entry_insert(struct bkey_i *k,
- union bch_extent_entry *dst,
- union bch_extent_entry *new)
-{
- union bch_extent_entry *end = bkey_val_end(bkey_i_to_s(k));
-
- memmove_u64s_up_small((u64 *) dst + extent_entry_u64s(new),
- dst, (u64 *) end - (u64 *) dst);
- k->k.u64s += extent_entry_u64s(new);
- memcpy(dst, new, extent_entry_bytes(new));
-}
-
void bch2_extent_ptr_decoded_append(struct bkey_i *k,
struct extent_ptr_decoded *p)
{
/*
* Returns pointer to the next entry after the one being dropped:
*/
-static union bch_extent_entry *__bch2_bkey_drop_ptr(struct bkey_s k,
- struct bch_extent_ptr *ptr)
+union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
+ struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry = to_entry(ptr), *next;
{
bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
union bch_extent_entry *ret =
- __bch2_bkey_drop_ptr(k, ptr);
+ bch2_bkey_drop_ptr_noerror(k, ptr);
/*
* If we deleted all the dirty pointers and there's still cached
void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
{
- struct bch_extent_ptr *ptr = (void *) bch2_bkey_has_device(k.s_c, dev);
+ struct bch_extent_ptr *ptr = bch2_bkey_has_device(k, dev);
if (ptr)
- __bch2_bkey_drop_ptr(k, ptr);
+ bch2_bkey_drop_ptr_noerror(k, ptr);
}
-const struct bch_extent_ptr *
-bch2_bkey_has_device(struct bkey_s_c k, unsigned dev)
+const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
bkey_for_each_ptr_decode(k1.k, ptrs1, p1, entry1)
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
- if (p1.ptr.dev == p2.ptr.dev &&
- p1.ptr.gen == p2.ptr.gen &&
- (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
- (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
- return true;
+ if (p1.ptr.dev == p2.ptr.dev &&
+ p1.ptr.gen == p2.ptr.gen &&
+ (s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ return true;
return false;
} else {
}
}
-bool bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1,
- struct bkey_s_c k2)
+struct bch_extent_ptr *
+bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bkey_s k2)
{
- struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(k2);
- const union bch_extent_entry *entry2;
+ struct bkey_ptrs ptrs2 = bch2_bkey_ptrs(k2);
+ union bch_extent_entry *entry2;
struct extent_ptr_decoded p2;
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
p1.ptr.gen == p2.ptr.gen &&
(s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
(s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
- return true;
+ return &entry2->ptr;
- return false;
+ return NULL;
+}
+
+void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ union bch_extent_entry *entry;
+ union bch_extent_entry *ec = NULL;
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ if (&entry->ptr == ptr) {
+ ptr->cached = true;
+ if (ec)
+ extent_entry_drop(k, ec);
+ return;
+ }
+
+ if (extent_entry_is_stripe_ptr(entry))
+ ec = entry;
+ else if (extent_entry_is_ptr(entry))
+ ec = NULL;
+ }
+
+ BUG();
}
/*
struct bch_dev *ca;
bool first = true;
+ if (c)
+ prt_printf(out, "durability: %u ", bch2_bkey_durability_safe(c, k));
+
bkey_extent_entry_for_each(ptrs, entry) {
if (!first)
prt_printf(out, " ");
static int extent_ptr_invalid(const struct bch_fs *c,
struct bkey_s_c k,
+ enum bkey_invalid_flags flags,
const struct bch_extent_ptr *ptr,
unsigned size_ondisk,
bool metadata,
struct bch_dev *ca;
if (!bch2_dev_exists2(c, ptr->dev)) {
+ /*
+ * If we're in the write path this key might have already been
+ * overwritten, and we could be seeing a device that doesn't
+ * exist anymore due to racing with device removal:
+ */
+ if (flags & BKEY_INVALID_WRITE)
+ return 0;
+
prt_printf(err, "pointer to invalid device (%u)", ptr->dev);
return -BCH_ERR_invalid_bkey;
}
}
int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
- int rw, struct printbuf *err)
+ enum bkey_invalid_flags flags,
+ struct printbuf *err)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
unsigned size_ondisk = k.k->size;
unsigned nonce = UINT_MAX;
unsigned nr_ptrs = 0;
- bool unwritten = false;
+ bool unwritten = false, have_ec = false, crc_since_last_ptr = false;
int ret;
if (bkey_is_btree_ptr(k.k))
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
- ret = extent_ptr_invalid(c, k, &entry->ptr, size_ondisk,
- false, err);
+ ret = extent_ptr_invalid(c, k, flags, &entry->ptr,
+ size_ondisk, false, err);
if (ret)
return ret;
return -BCH_ERR_invalid_bkey;
}
+ if (entry->ptr.cached && have_ec) {
+ prt_printf(err, "cached, erasure coded ptr");
+ return -BCH_ERR_invalid_bkey;
+ }
+
unwritten = entry->ptr.unwritten;
+ have_ec = false;
+ crc_since_last_ptr = false;
nr_ptrs++;
break;
case BCH_EXTENT_ENTRY_crc32:
return -BCH_ERR_invalid_bkey;
}
}
+
+ if (crc_since_last_ptr) {
+ prt_printf(err, "redundant crc entry");
+ return -BCH_ERR_invalid_bkey;
+ }
+ crc_since_last_ptr = true;
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
+ if (have_ec) {
+ prt_printf(err, "redundant stripe entry");
+ return -BCH_ERR_invalid_bkey;
+ }
+ have_ec = true;
+ break;
+ case BCH_EXTENT_ENTRY_rebalance:
break;
}
}
+ if (!nr_ptrs) {
+ prt_str(err, "no ptrs");
+ return -BCH_ERR_invalid_bkey;
+ }
+
if (nr_ptrs >= BCH_BKEY_PTRS_MAX) {
prt_str(err, "too many ptrs");
return -BCH_ERR_invalid_bkey;
}
+ if (crc_since_last_ptr) {
+ prt_printf(err, "redundant crc entry");
+ return -BCH_ERR_invalid_bkey;
+ }
+
+ if (have_ec) {
+ prt_printf(err, "redundant stripe entry");
+ return -BCH_ERR_invalid_bkey;
+ }
+
return 0;
}
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
break;
+ case BCH_EXTENT_ENTRY_rebalance:
+ break;
}
}
}
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
break;
+ case BCH_EXTENT_ENTRY_rebalance:
+ break;
}
if (extent_entry_is_crc(entry))