]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/extents.c
Update bcachefs sources to f7ccf51390 bcachefs: durability
[bcachefs-tools-debian] / libbcachefs / extents.c
index f5dccfad15d6572d4470e859a023e9d4a59cf4de..ed33f9bf528a9628c9f1d3c5f1b0e2ed47f79a1d 100644 (file)
@@ -28,7 +28,7 @@
 static enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *,
                                           struct bkey_i *, struct bkey_i *);
 
-static void sort_key_next(struct btree_node_iter *iter,
+static void sort_key_next(struct btree_node_iter_large *iter,
                          struct btree *b,
                          struct btree_node_iter_set *i)
 {
@@ -54,7 +54,7 @@ static void sort_key_next(struct btree_node_iter *iter,
        ?: (l).k - (r).k;                                               \
 })
 
-static inline bool should_drop_next_key(struct btree_node_iter *iter,
+static inline bool should_drop_next_key(struct btree_node_iter_large *iter,
                                        struct btree *b)
 {
        struct btree_node_iter_set *l = iter->data, *r = iter->data + 1;
@@ -81,8 +81,8 @@ static inline bool should_drop_next_key(struct btree_node_iter *iter,
 }
 
 struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
-                                                 struct btree *b,
-                                                 struct btree_node_iter *iter)
+                                       struct btree *b,
+                                       struct btree_node_iter_large *iter)
 {
        struct bkey_packed *out = dst->start;
        struct btree_nr_keys nr;
@@ -91,7 +91,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
 
        heap_resort(iter, key_sort_cmp);
 
-       while (!bch2_btree_node_iter_end(iter)) {
+       while (!bch2_btree_node_iter_large_end(iter)) {
                if (!should_drop_next_key(iter, b)) {
                        struct bkey_packed *k =
                                __btree_node_offset_to_key(b, iter->data->k);
@@ -148,7 +148,7 @@ bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group
                struct bch_dev *ca = c->devs[ptr->dev];
 
                if (ca->mi.group &&
-                   ca->mi.group == group)
+                   ca->mi.group - 1 == group)
                        return ptr;
        }
 
@@ -201,17 +201,31 @@ unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k)
        return nr_ptrs;
 }
 
-unsigned bch2_extent_nr_good_ptrs(struct bch_fs *c, struct bkey_s_c_extent e)
+unsigned bch2_extent_ptr_durability(struct bch_fs *c,
+                                   const struct bch_extent_ptr *ptr)
+{
+       struct bch_dev *ca;
+
+       if (ptr->cached)
+               return 0;
+
+       ca = bch_dev_bkey_exists(c, ptr->dev);
+
+       if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
+               return 0;
+
+       return ca->mi.durability;
+}
+
+unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e)
 {
        const struct bch_extent_ptr *ptr;
-       unsigned nr_ptrs = 0;
+       unsigned durability = 0;
 
        extent_for_each_ptr(e, ptr)
-               nr_ptrs += (!ptr->cached &&
-                           bch_dev_bkey_exists(c, ptr->dev)->mi.state !=
-                           BCH_MEMBER_STATE_FAILED);
+               durability += bch2_extent_ptr_durability(c, ptr);
 
-       return nr_ptrs;
+       return durability;
 }
 
 unsigned bch2_extent_is_compressed(struct bkey_s_c k)
@@ -694,7 +708,7 @@ static void btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
                        goto err;
        }
 
-       if (!bch2_sb_has_replicas(c, BCH_DATA_BTREE, bch2_extent_devs(e))) {
+       if (!bch2_bkey_replicas_marked(c, BCH_DATA_BTREE, e.s_c)) {
                bch2_bkey_val_to_text(c, btree_node_type(b),
                                     buf, sizeof(buf), k);
                bch2_fs_bug(c,
@@ -890,13 +904,13 @@ static void extent_save(struct btree *b, struct btree_node_iter *iter,
                 bkey_start_pos(&_ur)) ?: (r).k - (l).k;                \
 })
 
-static inline void extent_sort_sift(struct btree_node_iter *iter,
+static inline void extent_sort_sift(struct btree_node_iter_large *iter,
                                    struct btree *b, size_t i)
 {
        heap_sift_down(iter, i, extent_sort_cmp);
 }
 
-static inline void extent_sort_next(struct btree_node_iter *iter,
+static inline void extent_sort_next(struct btree_node_iter_large *iter,
                                    struct btree *b,
                                    struct btree_node_iter_set *i)
 {
@@ -938,7 +952,7 @@ static void extent_sort_append(struct bch_fs *c,
 struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
                                        struct bset *dst,
                                        struct btree *b,
-                                       struct btree_node_iter *iter)
+                                       struct btree_node_iter_large *iter)
 {
        struct bkey_format *f = &b->format;
        struct btree_node_iter_set *_l = iter->data, *_r;
@@ -951,7 +965,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
 
        heap_resort(iter, extent_sort_cmp);
 
-       while (!bch2_btree_node_iter_end(iter)) {
+       while (!bch2_btree_node_iter_large_end(iter)) {
                lk = __btree_node_offset_to_key(b, _l->k);
 
                if (iter->used == 1) {
@@ -1766,7 +1780,6 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
        unsigned seq, stale;
        char buf[160];
        bool bad;
-       unsigned ptrs_per_tier[BCH_TIER_MAX];
        unsigned replicas = 0;
 
        /*
@@ -1778,12 +1791,9 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
         * going to get overwritten during replay)
         */
 
-       memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier));
-
        extent_for_each_ptr(e, ptr) {
                ca = bch_dev_bkey_exists(c, ptr->dev);
                replicas++;
-               ptrs_per_tier[ca->mi.tier]++;
 
                /*
                 * If journal replay hasn't finished, we might be seeing keys
@@ -1834,7 +1844,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
        }
 
        if (!bkey_extent_is_cached(e.k) &&
-           !bch2_sb_has_replicas(c, BCH_DATA_USER, bch2_extent_devs(e))) {
+           !bch2_bkey_replicas_marked(c, BCH_DATA_USER, e.s_c)) {
                bch2_bkey_val_to_text(c, btree_node_type(b),
                                     buf, sizeof(buf), e.s_c);
                bch2_fs_bug(c,
@@ -1886,12 +1896,6 @@ static void bch2_extent_to_text(struct bch_fs *c, char *buf,
 #undef p
 }
 
-static unsigned PTR_TIER(struct bch_fs *c,
-                        const struct bch_extent_ptr *ptr)
-{
-       return bch_dev_bkey_exists(c, ptr->dev)->mi.tier;
-}
-
 static void bch2_extent_crc_init(union bch_extent_crc *crc,
                                 struct bch_extent_crc_unpacked new)
 {
@@ -2013,45 +2017,40 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
 }
 
 void bch2_extent_mark_replicas_cached(struct bch_fs *c,
-                                     struct bkey_s_extent e)
+                                     struct bkey_s_extent e,
+                                     unsigned nr_desired_replicas,
+                                     unsigned target)
 {
        struct bch_extent_ptr *ptr;
-       unsigned tier = 0, nr_cached = 0;
-       unsigned nr_good = bch2_extent_nr_good_ptrs(c, e.c);
-       bool have_higher_tier;
+       int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas;
 
-       if (nr_good <= c->opts.data_replicas)
+       if (extra <= 0)
                return;
 
-       nr_cached = nr_good - c->opts.data_replicas;
+       extent_for_each_ptr(e, ptr) {
+               int n = bch2_extent_ptr_durability(c, ptr);
 
-       do {
-               have_higher_tier = false;
+               if (n && n <= extra &&
+                   !dev_in_target(c->devs[ptr->dev], target)) {
+                       ptr->cached = true;
+                       extra -= n;
+               }
+       }
 
-               extent_for_each_ptr(e, ptr) {
-                       if (!ptr->cached &&
-                           PTR_TIER(c, ptr) == tier) {
-                               ptr->cached = true;
-                               nr_cached--;
-                               if (!nr_cached)
-                                       return;
-                       }
+       extent_for_each_ptr(e, ptr) {
+               int n = bch2_extent_ptr_durability(c, ptr);
 
-                       if (PTR_TIER(c, ptr) > tier)
-                               have_higher_tier = true;
+               if (n && n <= extra) {
+                       ptr->cached = true;
+                       extra -= n;
                }
-
-               tier++;
-       } while (have_higher_tier);
+       }
 }
 
 /*
- * This picks a non-stale pointer, preferabbly from a device other than
- * avoid.  Avoid can be NULL, meaning pick any.  If there are no non-stale
- * pointers to other devices, it will still pick a pointer from avoid.
- * Note that it prefers lowered-numbered pointers to higher-numbered pointers
- * as the pointers are sorted by tier, hence preferring pointers to tier 0
- * rather than pointers to tier 1.
+ * This picks a non-stale pointer, preferably from a device other than @avoid.
+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
+ * other devices, it will still pick a pointer from avoid.
  */
 void bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
                          struct bch_devs_mask *avoid,