]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/alloc_foreground.c
Makefile: detect rst2man
[bcachefs-tools-debian] / libbcachefs / alloc_foreground.c
index dcbe04040a39c913e199e33f97e27057b05ef881..412fed47948278060b77516e6c5a4801df124a78 100644 (file)
@@ -1,57 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Primary bucket allocation code
- *
  * Copyright 2012 Google, Inc.
  *
- * Allocation in bcache is done in terms of buckets:
- *
- * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in
- * btree pointers - they must match for the pointer to be considered valid.
- *
- * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a
- * bucket simply by incrementing its gen.
- *
- * The gens (along with the priorities; it's really the gens are important but
- * the code is named as if it's the priorities) are written in an arbitrary list
- * of buckets on disk, with a pointer to them in the journal header.
- *
- * When we invalidate a bucket, we have to write its new gen to disk and wait
- * for that write to complete before we use it - otherwise after a crash we
- * could have pointers that appeared to be good but pointed to data that had
- * been overwritten.
- *
- * Since the gens and priorities are all stored contiguously on disk, we can
- * batch this up: We fill up the free_inc list with freshly invalidated buckets,
- * call prio_write(), and when prio_write() finishes we pull buckets off the
- * free_inc list and optionally discard them.
- *
- * free_inc isn't the only freelist - if it was, we'd often have to sleep while
- * priorities and gens were being written before we could allocate. c->free is a
- * smaller freelist, and buckets on that list are always ready to be used.
- *
- * If we've got discards enabled, that happens when a bucket moves from the
- * free_inc list to the free list.
- *
- * It's important to ensure that gens don't wrap around - with respect to
- * either the oldest gen in the btree or the gen on disk. This is quite
- * difficult to do in practice, but we explicitly guard against it anyways - if
- * a bucket is in danger of wrapping around we simply skip invalidating it that
- * time around, and we garbage collect or rewrite the priorities sooner than we
- * would have otherwise.
+ * Foreground allocator code: allocate buckets from freelist, and allocate in
+ * sector granularity from writepoints.
  *
  * bch2_bucket_alloc() allocates a single bucket from a specific device.
  *
  * bch2_bucket_alloc_set() allocates one or more buckets from different devices
  * in a given filesystem.
- *
- * invalidate_buckets() drives all the processes described above. It's called
- * from bch2_bucket_alloc() and a few other places that need to make sure free
- * buckets are ready.
- *
- * invalidate_buckets_(lru|fifo)() find buckets that are available to be
- * invalidated, and then invalidate them and stick them on the free_inc list -
- * in either lru or fifo order.
  */
 
 #include "bcachefs.h"
@@ -98,8 +55,7 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
        percpu_down_read(&c->mark_lock);
        spin_lock(&ob->lock);
 
-       bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
-                              false, gc_pos_alloc(c, ob), 0);
+       bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), false);
        ob->valid = false;
        ob->type = 0;
 
@@ -109,7 +65,9 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
        spin_lock(&c->freelist_lock);
        ob->freelist = c->open_buckets_freelist;
        c->open_buckets_freelist = ob - c->open_buckets;
+
        c->open_buckets_nr_free++;
+       ca->nr_open_buckets--;
        spin_unlock(&c->freelist_lock);
 
        closure_wake_up(&c->open_buckets_wait);
@@ -192,8 +150,9 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
        rcu_read_lock();
        buckets = bucket_array(ca);
 
-       for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
-               if (is_available_bucket(buckets->b[b].mark))
+       for (b = buckets->first_bucket; b < buckets->nbuckets; b++)
+               if (is_available_bucket(buckets->b[b].mark) &&
+                   !buckets->b[b].mark.owned_by_allocator)
                        goto success;
        b = -1;
 success:
@@ -224,9 +183,8 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
                                      bool may_alloc_partial,
                                      struct closure *cl)
 {
-       struct bucket_array *buckets;
        struct open_bucket *ob;
-       long bucket = 0;
+       long b = 0;
 
        spin_lock(&c->freelist_lock);
 
@@ -260,13 +218,13 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
                return ERR_PTR(-OPEN_BUCKETS_EMPTY);
        }
 
-       if (likely(fifo_pop(&ca->free[RESERVE_NONE], bucket)))
+       if (likely(fifo_pop(&ca->free[RESERVE_NONE], b)))
                goto out;
 
        switch (reserve) {
        case RESERVE_BTREE_MOVINGGC:
        case RESERVE_MOVINGGC:
-               if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
+               if (fifo_pop(&ca->free[RESERVE_MOVINGGC], b))
                        goto out;
                break;
        default:
@@ -284,20 +242,19 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
        trace_bucket_alloc_fail(ca, reserve);
        return ERR_PTR(-FREELIST_EMPTY);
 out:
-       verify_not_on_freelist(c, ca, bucket);
+       verify_not_on_freelist(c, ca, b);
 
        ob = bch2_open_bucket_alloc(c);
 
        spin_lock(&ob->lock);
-       buckets = bucket_array(ca);
 
        ob->valid       = true;
        ob->sectors_free = ca->mi.bucket_size;
        ob->alloc_reserve = reserve;
        ob->ptr         = (struct bch_extent_ptr) {
                .type   = 1 << BCH_EXTENT_ENTRY_ptr,
-               .gen    = buckets->b[bucket].mark.gen,
-               .offset = bucket_to_sector(ca, bucket),
+               .gen    = bucket(ca, b)->mark.gen,
+               .offset = bucket_to_sector(ca, b),
                .dev    = ca->dev_idx,
        };
 
@@ -317,6 +274,7 @@ out:
                c->blocked_allocate = 0;
        }
 
+       ca->nr_open_buckets++;
        spin_unlock(&c->freelist_lock);
 
        bch2_wake_allocator(ca);
@@ -352,7 +310,7 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
                               struct dev_stripe_state *stripe)
 {
        u64 *v = stripe->next_alloc + ca->dev_idx;
-       u64 free_space = dev_buckets_free(ca);
+       u64 free_space = dev_buckets_available(ca);
        u64 free_space_inv = free_space
                ? div64_u64(1ULL << 48, free_space)
                : 1ULL << 48;
@@ -489,16 +447,20 @@ bucket_alloc_from_stripe(struct bch_fs *c,
        devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
 
        for (i = 0; i < devs_sorted.nr; i++)
-               open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
+               for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
+                       if (!h->s->blocks[ec_idx])
+                               continue;
+
+                       ob = c->open_buckets + h->s->blocks[ec_idx];
                        if (ob->ptr.dev == devs_sorted.devs[i] &&
-                           !test_and_set_bit(h->s->data_block_idx[ec_idx],
-                                             h->s->blocks_allocated))
+                           !test_and_set_bit(ec_idx, h->s->blocks_allocated))
                                goto got_bucket;
+               }
        goto out_put_head;
 got_bucket:
        ca = bch_dev_bkey_exists(c, ob->ptr.dev);
 
-       ob->ec_idx      = h->s->data_block_idx[ec_idx];
+       ob->ec_idx      = ec_idx;
        ob->ec          = h->s;
 
        add_new_bucket(c, ptrs, devs_may_alloc,
@@ -636,10 +598,13 @@ void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
 
                if (!drop && ob->ec) {
                        mutex_lock(&ob->ec->lock);
-                       open_bucket_for_each(c, &ob->ec->blocks, ob2, j)
-                               drop |= ob2->ptr.dev == ca->dev_idx;
-                       open_bucket_for_each(c, &ob->ec->parity, ob2, j)
+                       for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) {
+                               if (!ob->ec->blocks[j])
+                                       continue;
+
+                               ob2 = c->open_buckets + ob->ec->blocks[j];
                                drop |= ob2->ptr.dev == ca->dev_idx;
+                       }
                        mutex_unlock(&ob->ec->lock);
                }
 
@@ -674,11 +639,14 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
 {
        struct write_point *wp;
 
+       rcu_read_lock();
        hlist_for_each_entry_rcu(wp, head, node)
                if (wp->write_point == write_point)
-                       return wp;
-
-       return NULL;
+                       goto out;
+       wp = NULL;
+out:
+       rcu_read_unlock();
+       return wp;
 }
 
 static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)