#include "bkey_methods.h"
#include "btree_gc.h"
#include "btree_update.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
#include "checksum.h"
#include "debug.h"
#include "dirent.h"
return nr_ptrs;
}
+/* Doesn't cleanup redundant crcs */
+void __bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
+{
+ EBUG_ON(ptr < &e.v->start->ptr ||
+ ptr >= &extent_entry_last(e)->ptr);
+ EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
+ memmove_u64s_down(ptr, ptr + 1,
+ (u64 *) extent_entry_last(e) - (u64 *) (ptr + 1));
+ e.k->u64s -= sizeof(*ptr) / sizeof(u64);
+}
+
+void bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
+{
+ __bch2_extent_drop_ptr(e, ptr);
+ bch2_extent_drop_redundant_crcs(e);
+}
+
+void bch2_extent_drop_ptr_idx(struct bkey_s_extent e, unsigned idx)
+{
+ struct bch_extent_ptr *ptr;
+ unsigned i = 0;
+
+ extent_for_each_ptr(e, ptr)
+ if (i++ == idx)
+ goto found;
+
+ BUG();
+found:
+ bch2_extent_drop_ptr(e, ptr);
+}
+
/* returns true if equal */
static bool crc_cmp(union bch_extent_crc *l, union bch_extent_crc *r)
{
if (ptr != ptr2 && ptr->dev == ptr2->dev)
return "multiple pointers to same device";
- if (ptr->offset + size_ondisk > ca->mi.bucket_size * ca->mi.nbuckets)
+ if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets))
return "offset past end of device";
- if (ptr->offset < ca->mi.bucket_size * ca->mi.first_bucket)
+ if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket))
return "offset before first bucket";
- if ((ptr->offset & (ca->mi.bucket_size - 1)) +
+ if (bucket_remainder(ca, ptr->offset) +
size_ondisk > ca->mi.bucket_size)
return "spans multiple buckets";
return out - buf;
}
+static void extent_pick_read_device(struct bch_fs *c,
+ struct bkey_s_c_extent e,
+ struct bch_devs_mask *avoid,
+ struct extent_pick_ptr *pick)
+{
+ const union bch_extent_crc *crc;
+ const struct bch_extent_ptr *ptr;
+
+ extent_for_each_ptr_crc(e, ptr, crc) {
+ struct bch_dev *ca = c->devs[ptr->dev];
+
+ if (ptr->cached && ptr_stale(ca, ptr))
+ continue;
+
+ if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
+ continue;
+
+ if (avoid && test_bit(ca->dev_idx, avoid->d))
+ continue;
+
+ if (pick->ca && pick->ca->mi.tier < ca->mi.tier)
+ continue;
+
+ if (!percpu_ref_tryget(&ca->io_ref))
+ continue;
+
+ if (pick->ca)
+ percpu_ref_put(&pick->ca->io_ref);
+
+ *pick = (struct extent_pick_ptr) {
+ .ptr = *ptr,
+ .ca = ca,
+ };
+
+ if (e.k->size)
+ pick->crc = crc_to_128(e.k, crc);
+ }
+}
+
/* Btree ptrs */
static const char *bch2_btree_ptr_invalid(const struct bch_fs *c,
extent_for_each_ptr_crc(e, ptr, crc) {
reason = extent_ptr_invalid(c, e, ptr,
- c->sb.btree_node_size,
+ c->opts.btree_node_size,
true);
if (reason)
return reason;
g = PTR_BUCKET(ca, ptr);
replicas++;
+ if (!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags))
+ continue;
+
err = "stale";
if (ptr_stale(ca, ptr))
goto err;
do {
seq = read_seqcount_begin(&c->gc_pos_lock);
bad = gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
- g->mark.data_type != BUCKET_BTREE;
+ (g->mark.data_type != BUCKET_BTREE ||
+ g->mark.dirty_sectors < c->opts.btree_node_size);
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
err = "inconsistent";
}
struct extent_pick_ptr
-bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b)
+bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
+ struct bch_devs_mask *avoid)
{
- struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
- const union bch_extent_crc *crc;
- const struct bch_extent_ptr *ptr;
struct extent_pick_ptr pick = { .ca = NULL };
- extent_for_each_ptr_crc(e, ptr, crc) {
- struct bch_dev *ca = c->devs[ptr->dev];
- struct btree *root = btree_node_root(c, b);
-
- if (bch2_fs_inconsistent_on(crc, c,
- "btree node pointer with crc at btree %u level %u/%u bucket %zu",
- b->btree_id, b->level, root ? root->level : -1,
- PTR_BUCKET_NR(ca, ptr)))
- break;
-
- if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
- continue;
-
- if (pick.ca && pick.ca->mi.tier < ca->mi.tier)
- continue;
-
- if (!percpu_ref_tryget(&ca->io_ref))
- continue;
-
- if (pick.ca)
- percpu_ref_put(&pick.ca->io_ref);
-
- pick.ca = ca;
- pick.ptr = *ptr;
- }
+ extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
+ avoid, &pick);
return pick;
}
* as the pointers are sorted by tier, hence preferring pointers to tier 0
* rather than pointers to tier 1.
*/
-void bch2_extent_pick_ptr_avoiding(struct bch_fs *c, struct bkey_s_c k,
- struct bch_dev *avoid,
- struct extent_pick_ptr *ret)
+void bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_devs_mask *avoid,
+ struct extent_pick_ptr *ret)
{
struct bkey_s_c_extent e;
- const union bch_extent_crc *crc;
- const struct bch_extent_ptr *ptr;
switch (k.k->type) {
case KEY_TYPE_DELETED:
e = bkey_s_c_to_extent(k);
ret->ca = NULL;
- extent_for_each_ptr_crc(e, ptr, crc) {
- struct bch_dev *ca = c->devs[ptr->dev];
-
- if (ptr->cached && ptr_stale(ca, ptr))
- continue;
-
- if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
- continue;
-
- if (ret->ca &&
- (ca == avoid ||
- ret->ca->mi.tier < ca->mi.tier))
- continue;
-
- if (!percpu_ref_tryget(&ca->io_ref))
- continue;
-
- if (ret->ca)
- percpu_ref_put(&ret->ca->io_ref);
-
- *ret = (struct extent_pick_ptr) {
- .crc = crc_to_128(e.k, crc),
- .ptr = *ptr,
- .ca = ca,
- };
- }
+ extent_pick_read_device(c, bkey_s_c_to_extent(k), avoid, ret);
if (!ret->ca && !bkey_extent_is_cached(e.k))
ret->ca = ERR_PTR(-EIO);
extent_for_each_entry(el, en_l) {
struct bch_extent_ptr *lp, *rp;
- unsigned bucket_size;
+ struct bch_dev *ca;
en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
return BCH_MERGE_NOMERGE;
/* We don't allow extents to straddle buckets: */
- bucket_size = c->devs[lp->dev]->mi.bucket_size;
+ ca = c->devs[lp->dev];
- if ((lp->offset & ~((u64) bucket_size - 1)) !=
- (rp->offset & ~((u64) bucket_size - 1)))
+ if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
return BCH_MERGE_NOMERGE;
}