]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to d7f6da1d60 bcachefs: fix missing include
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 12 Oct 2018 18:55:27 +0000 (14:55 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Fri, 12 Oct 2018 19:10:26 +0000 (15:10 -0400)
33 files changed:
.bcachefs_revision
cmd_debug.c
cmd_migrate.c
include/linux/freezer.h
libbcachefs/acl.c
libbcachefs/alloc.h [deleted file]
libbcachefs/alloc_background.c [moved from libbcachefs/alloc.c with 63% similarity]
libbcachefs/alloc_background.h [new file with mode: 0644]
libbcachefs/alloc_foreground.c [new file with mode: 0644]
libbcachefs/alloc_foreground.h [new file with mode: 0644]
libbcachefs/alloc_types.h
libbcachefs/bkey_methods.c
libbcachefs/btree_gc.c
libbcachefs/btree_types.h
libbcachefs/btree_update_interior.c
libbcachefs/btree_update_interior.h
libbcachefs/buckets.c
libbcachefs/chardev.c
libbcachefs/fs-io.c
libbcachefs/fsck.c
libbcachefs/io.c
libbcachefs/io.h
libbcachefs/io_types.h
libbcachefs/journal.c
libbcachefs/journal_io.c
libbcachefs/move.c
libbcachefs/movinggc.c
libbcachefs/rebalance.c
libbcachefs/recovery.c
libbcachefs/super.c
libbcachefs/sysfs.c
libbcachefs/util.c
libbcachefs/util.h

index 697c474c036e49a1897b7b58e4e6d525be2a94a3..14085745156da766186b9bc29d4bdb7bc9c60873 100644 (file)
@@ -1 +1 @@
-446219cb11af8b6a4c6b837e336bac45f43854c9
+d7f6da1d60ec24266301231538ff6f09716537ed
index 51099f1a6dc5c487b396d51560476897dcd9d361..d27e95554e9e863c8f13f7e7c6b86cbb54028259 100644 (file)
@@ -9,7 +9,6 @@
 #include "tools-util.h"
 
 #include "libbcachefs/bcachefs.h"
-#include "libbcachefs/alloc.h"
 #include "libbcachefs/bset.h"
 #include "libbcachefs/btree_cache.h"
 #include "libbcachefs/btree_iter.h"
index 177884da197b141ab76634fcb2fef9107aab2892..352f7403ba47f9e2bd537510af0a00642df13c8d 100644 (file)
@@ -25,6 +25,8 @@
 #include <linux/generic-radix-tree.h>
 #include <linux/xattr.h>
 #include "libbcachefs/bcachefs.h"
+#include "libbcachefs/alloc_background.h"
+#include "libbcachefs/alloc_foreground.h"
 #include "libbcachefs/btree_update.h"
 #include "libbcachefs/buckets.h"
 #include "libbcachefs/dirent.h"
index 2b76d8c87f1ee82bdb6d96d4317f75cb2e4265c5..1af94d5b6a07fe113bc7cd172682695b02176d57 100644 (file)
@@ -3,5 +3,6 @@
 
 #define try_to_freeze()
 #define set_freezable()
+#define freezing(task)         false
 
 #endif /* __TOOLS_LINUX_FREEZER_H */
index 5dd666ec42d271ef23e4852bd63730139f251d6f..741e44ee9d600ff77d3c25e6789680f6819c918a 100644 (file)
@@ -373,6 +373,7 @@ int bch2_acl_chmod(struct btree_trans *trans,
                goto err;
        }
 
+       new->k.p = iter->pos;
        bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &new->k_i));
        *new_acl = acl;
        acl = NULL;
diff --git a/libbcachefs/alloc.h b/libbcachefs/alloc.h
deleted file mode 100644 (file)
index 739df23..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-#ifndef _BCACHEFS_ALLOC_H
-#define _BCACHEFS_ALLOC_H
-
-#include "bcachefs.h"
-#include "alloc_types.h"
-
-struct bkey;
-struct bch_dev;
-struct bch_fs;
-struct bch_devs_List;
-
-#define ALLOC_SCAN_BATCH(ca)           ((ca)->mi.nbuckets >> 9)
-
-const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
-int bch2_alloc_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
-
-#define bch2_bkey_alloc_ops (struct bkey_ops) {                \
-       .key_invalid    = bch2_alloc_invalid,           \
-       .val_to_text    = bch2_alloc_to_text,           \
-}
-
-struct dev_alloc_list {
-       unsigned        nr;
-       u8              devs[BCH_SB_MEMBERS_MAX];
-};
-
-struct dev_alloc_list bch2_wp_alloc_list(struct bch_fs *,
-                                        struct write_point *,
-                                        struct bch_devs_mask *);
-void bch2_wp_rescale(struct bch_fs *, struct bch_dev *,
-                    struct write_point *);
-
-int bch2_alloc_read(struct bch_fs *, struct list_head *);
-int bch2_alloc_replay_key(struct bch_fs *, struct bpos);
-
-enum bucket_alloc_ret {
-       ALLOC_SUCCESS           = 0,
-       OPEN_BUCKETS_EMPTY      = -1,
-       FREELIST_EMPTY          = -2,   /* Allocator thread not keeping up */
-       NO_DEVICES              = -3,   /* -EROFS */
-};
-
-long bch2_bucket_alloc_new_fs(struct bch_dev *);
-
-int bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, enum alloc_reserve, bool,
-                     struct closure *);
-
-#define __writepoint_for_each_ptr(_wp, _ob, _i, _start)                        \
-       for ((_i) = (_start);                                           \
-            (_i) < (_wp)->nr_ptrs && ((_ob) = (_wp)->ptrs[_i], true);  \
-            (_i)++)
-
-#define writepoint_for_each_ptr_all(_wp, _ob, _i)                      \
-       __writepoint_for_each_ptr(_wp, _ob, _i, 0)
-
-#define writepoint_for_each_ptr(_wp, _ob, _i)                          \
-       __writepoint_for_each_ptr(_wp, _ob, _i, wp->first_ptr)
-
-void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
-
-static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-{
-       if (atomic_dec_and_test(&ob->pin))
-               __bch2_open_bucket_put(c, ob);
-}
-
-static inline void bch2_open_bucket_put_refs(struct bch_fs *c, u8 *nr, u8 *refs)
-{
-       unsigned i;
-
-       for (i = 0; i < *nr; i++)
-               bch2_open_bucket_put(c, c->open_buckets + refs[i]);
-
-       *nr = 0;
-}
-
-static inline void bch2_open_bucket_get(struct bch_fs *c,
-                                       struct write_point *wp,
-                                       u8 *nr, u8 *refs)
-{
-       struct open_bucket *ob;
-       unsigned i;
-
-       writepoint_for_each_ptr(wp, ob, i) {
-               atomic_inc(&ob->pin);
-               refs[(*nr)++] = ob - c->open_buckets;
-       }
-}
-
-struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
-                                            unsigned,
-                                            struct write_point_specifier,
-                                            struct bch_devs_list *,
-                                            unsigned, unsigned,
-                                            enum alloc_reserve,
-                                            unsigned,
-                                            struct closure *);
-
-void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
-                                   struct bkey_i_extent *, unsigned);
-void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
-
-static inline void bch2_wake_allocator(struct bch_dev *ca)
-{
-       struct task_struct *p;
-
-       rcu_read_lock();
-       p = rcu_dereference(ca->alloc_thread);
-       if (p)
-               wake_up_process(p);
-       rcu_read_unlock();
-}
-
-static inline struct write_point_specifier writepoint_hashed(unsigned long v)
-{
-       return (struct write_point_specifier) { .v = v | 1 };
-}
-
-static inline struct write_point_specifier writepoint_ptr(struct write_point *wp)
-{
-       return (struct write_point_specifier) { .v = (unsigned long) wp };
-}
-
-void bch2_recalc_capacity(struct bch_fs *);
-
-void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
-void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
-
-void bch2_dev_allocator_stop(struct bch_dev *);
-int bch2_dev_allocator_start(struct bch_dev *);
-
-static inline void writepoint_init(struct write_point *wp,
-                                  enum bch_data_type type)
-{
-       mutex_init(&wp->lock);
-       wp->type = type;
-}
-
-int bch2_alloc_write(struct bch_fs *);
-int bch2_fs_allocator_start(struct bch_fs *);
-void bch2_fs_allocator_init(struct bch_fs *);
-
-#endif /* _BCACHEFS_ALLOC_H */
similarity index 63%
rename from libbcachefs/alloc.c
rename to libbcachefs/alloc_background.c
index d303ecee80d80490027b3b71bced43f7c6fef352..7ba20c87caada672c25fdd91ce85c48ce9d23503 100644 (file)
@@ -1,78 +1,17 @@
-/*
- * Primary bucket allocation code
- *
- * Copyright 2012 Google, Inc.
- *
- * Allocation in bcache is done in terms of buckets:
- *
- * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in
- * btree pointers - they must match for the pointer to be considered valid.
- *
- * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a
- * bucket simply by incrementing its gen.
- *
- * The gens (along with the priorities; it's really the gens are important but
- * the code is named as if it's the priorities) are written in an arbitrary list
- * of buckets on disk, with a pointer to them in the journal header.
- *
- * When we invalidate a bucket, we have to write its new gen to disk and wait
- * for that write to complete before we use it - otherwise after a crash we
- * could have pointers that appeared to be good but pointed to data that had
- * been overwritten.
- *
- * Since the gens and priorities are all stored contiguously on disk, we can
- * batch this up: We fill up the free_inc list with freshly invalidated buckets,
- * call prio_write(), and when prio_write() finishes we pull buckets off the
- * free_inc list and optionally discard them.
- *
- * free_inc isn't the only freelist - if it was, we'd often have to sleep while
- * priorities and gens were being written before we could allocate. c->free is a
- * smaller freelist, and buckets on that list are always ready to be used.
- *
- * If we've got discards enabled, that happens when a bucket moves from the
- * free_inc list to the free list.
- *
- * It's important to ensure that gens don't wrap around - with respect to
- * either the oldest gen in the btree or the gen on disk. This is quite
- * difficult to do in practice, but we explicitly guard against it anyways - if
- * a bucket is in danger of wrapping around we simply skip invalidating it that
- * time around, and we garbage collect or rewrite the priorities sooner than we
- * would have otherwise.
- *
- * bch2_bucket_alloc() allocates a single bucket from a specific device.
- *
- * bch2_bucket_alloc_set() allocates one or more buckets from different devices
- * in a given filesystem.
- *
- * invalidate_buckets() drives all the processes described above. It's called
- * from bch2_bucket_alloc() and a few other places that need to make sure free
- * buckets are ready.
- *
- * invalidate_buckets_(lru|fifo)() find buckets that are available to be
- * invalidated, and then invalidate them and stick them on the free_inc list -
- * in either lru or fifo order.
- */
-
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
 #include "btree_cache.h"
 #include "btree_io.h"
 #include "btree_update.h"
 #include "btree_update_interior.h"
 #include "btree_gc.h"
 #include "buckets.h"
-#include "checksum.h"
 #include "clock.h"
 #include "debug.h"
-#include "disk_groups.h"
 #include "error.h"
-#include "extents.h"
-#include "io.h"
-#include "journal.h"
 #include "journal_io.h"
-#include "super-io.h"
 
-#include <linux/blkdev.h>
 #include <linux/kthread.h>
 #include <linux/math64.h>
 #include <linux/random.h>
@@ -496,23 +435,6 @@ static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
  * commands to the newly free buckets, then puts them on the various freelists.
  */
 
-static void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
-                                  size_t bucket)
-{
-       if (expensive_debug_checks(c) &&
-           test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) {
-               size_t iter;
-               long i;
-               unsigned j;
-
-               for (j = 0; j < RESERVE_NR; j++)
-                       fifo_for_each_entry(i, &ca->free[j], iter)
-                               BUG_ON(i == bucket);
-               fifo_for_each_entry(i, &ca->free_inc, iter)
-                       BUG_ON(i == bucket);
-       }
-}
-
 #define BUCKET_GC_GEN_MAX      96U
 
 /**
@@ -1044,668 +966,6 @@ stop:
        return 0;
 }
 
-/* Allocation */
-
-/*
- * Open buckets represent a bucket that's currently being allocated from.  They
- * serve two purposes:
- *
- *  - They track buckets that have been partially allocated, allowing for
- *    sub-bucket sized allocations - they're used by the sector allocator below
- *
- *  - They provide a reference to the buckets they own that mark and sweep GC
- *    can find, until the new allocation has a pointer to it inserted into the
- *    btree
- *
- * When allocating some space with the sector allocator, the allocation comes
- * with a reference to an open bucket - the caller is required to put that
- * reference _after_ doing the index update that makes its allocation reachable.
- */
-
-void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
-{
-       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-
-       percpu_down_read_preempt_disable(&c->usage_lock);
-       spin_lock(&ob->lock);
-
-       bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
-                              false, gc_pos_alloc(c, ob), 0);
-       ob->valid = false;
-
-       spin_unlock(&ob->lock);
-       percpu_up_read_preempt_enable(&c->usage_lock);
-
-       spin_lock(&c->freelist_lock);
-       ob->freelist = c->open_buckets_freelist;
-       c->open_buckets_freelist = ob - c->open_buckets;
-       c->open_buckets_nr_free++;
-       spin_unlock(&c->freelist_lock);
-
-       closure_wake_up(&c->open_buckets_wait);
-}
-
-static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
-{
-       struct open_bucket *ob;
-
-       BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
-
-       ob = c->open_buckets + c->open_buckets_freelist;
-       c->open_buckets_freelist = ob->freelist;
-       atomic_set(&ob->pin, 1);
-
-       c->open_buckets_nr_free--;
-       return ob;
-}
-
-/* _only_ for allocating the journal on a new device: */
-long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
-{
-       struct bucket_array *buckets;
-       ssize_t b;
-
-       rcu_read_lock();
-       buckets = bucket_array(ca);
-
-       for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
-               if (is_available_bucket(buckets->b[b].mark))
-                       goto success;
-       b = -1;
-success:
-       rcu_read_unlock();
-       return b;
-}
-
-static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
-{
-       switch (reserve) {
-       case RESERVE_ALLOC:
-               return 0;
-       case RESERVE_BTREE:
-               return BTREE_NODE_RESERVE / 2;
-       default:
-               return BTREE_NODE_RESERVE;
-       }
-}
-
-/**
- * bch_bucket_alloc - allocate a single bucket from a specific device
- *
- * Returns index of bucket on success, 0 on failure
- * */
-int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
-                     enum alloc_reserve reserve,
-                     bool may_alloc_partial,
-                     struct closure *cl)
-{
-       struct bucket_array *buckets;
-       struct open_bucket *ob;
-       long bucket;
-
-       spin_lock(&c->freelist_lock);
-
-       if (may_alloc_partial &&
-           ca->open_buckets_partial_nr) {
-               int ret = ca->open_buckets_partial[--ca->open_buckets_partial_nr];
-               c->open_buckets[ret].on_partial_list = false;
-               spin_unlock(&c->freelist_lock);
-               return ret;
-       }
-
-       if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
-               if (cl)
-                       closure_wait(&c->open_buckets_wait, cl);
-               spin_unlock(&c->freelist_lock);
-               trace_open_bucket_alloc_fail(ca, reserve);
-               return OPEN_BUCKETS_EMPTY;
-       }
-
-       if (likely(fifo_pop(&ca->free[RESERVE_NONE], bucket)))
-               goto out;
-
-       switch (reserve) {
-       case RESERVE_ALLOC:
-               if (fifo_pop(&ca->free[RESERVE_BTREE], bucket))
-                       goto out;
-               break;
-       case RESERVE_BTREE:
-               if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >=
-                   ca->free[RESERVE_BTREE].size &&
-                   fifo_pop(&ca->free[RESERVE_BTREE], bucket))
-                       goto out;
-               break;
-       case RESERVE_MOVINGGC:
-               if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
-                       goto out;
-               break;
-       default:
-               break;
-       }
-
-       if (cl)
-               closure_wait(&c->freelist_wait, cl);
-
-       spin_unlock(&c->freelist_lock);
-
-       trace_bucket_alloc_fail(ca, reserve);
-       return FREELIST_EMPTY;
-out:
-       verify_not_on_freelist(c, ca, bucket);
-
-       ob = bch2_open_bucket_alloc(c);
-
-       spin_lock(&ob->lock);
-       buckets = bucket_array(ca);
-
-       ob->valid       = true;
-       ob->sectors_free = ca->mi.bucket_size;
-       ob->ptr         = (struct bch_extent_ptr) {
-               .gen    = buckets->b[bucket].mark.gen,
-               .offset = bucket_to_sector(ca, bucket),
-               .dev    = ca->dev_idx,
-       };
-
-       bucket_io_clock_reset(c, ca, bucket, READ);
-       bucket_io_clock_reset(c, ca, bucket, WRITE);
-       spin_unlock(&ob->lock);
-
-       spin_unlock(&c->freelist_lock);
-
-       bch2_wake_allocator(ca);
-
-       trace_bucket_alloc(ca, reserve);
-       return ob - c->open_buckets;
-}
-
-static int __dev_alloc_cmp(struct write_point *wp,
-                          unsigned l, unsigned r)
-{
-       return ((wp->next_alloc[l] > wp->next_alloc[r]) -
-               (wp->next_alloc[l] < wp->next_alloc[r]));
-}
-
-#define dev_alloc_cmp(l, r) __dev_alloc_cmp(wp, l, r)
-
-struct dev_alloc_list bch2_wp_alloc_list(struct bch_fs *c,
-                                        struct write_point *wp,
-                                        struct bch_devs_mask *devs)
-{
-       struct dev_alloc_list ret = { .nr = 0 };
-       struct bch_dev *ca;
-       unsigned i;
-
-       for_each_member_device_rcu(ca, c, i, devs)
-               ret.devs[ret.nr++] = i;
-
-       bubble_sort(ret.devs, ret.nr, dev_alloc_cmp);
-       return ret;
-}
-
-void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca,
-                    struct write_point *wp)
-{
-       u64 *v = wp->next_alloc + ca->dev_idx;
-       u64 free_space = dev_buckets_free(c, ca);
-       u64 free_space_inv = free_space
-               ? div64_u64(1ULL << 48, free_space)
-               : 1ULL << 48;
-       u64 scale = *v / 4;
-
-       if (*v + free_space_inv >= *v)
-               *v += free_space_inv;
-       else
-               *v = U64_MAX;
-
-       for (v = wp->next_alloc;
-            v < wp->next_alloc + ARRAY_SIZE(wp->next_alloc); v++)
-               *v = *v < scale ? 0 : *v - scale;
-}
-
-static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c,
-                                       struct write_point *wp,
-                                       unsigned nr_replicas,
-                                       enum alloc_reserve reserve,
-                                       struct bch_devs_mask *devs,
-                                       struct closure *cl)
-{
-       enum bucket_alloc_ret ret = NO_DEVICES;
-       struct dev_alloc_list devs_sorted;
-       struct bch_dev *ca;
-       unsigned i, nr_ptrs_effective = 0;
-       bool have_cache_dev = false;
-
-       BUG_ON(nr_replicas > ARRAY_SIZE(wp->ptrs));
-
-       for (i = wp->first_ptr; i < wp->nr_ptrs; i++) {
-               ca = bch_dev_bkey_exists(c, wp->ptrs[i]->ptr.dev);
-
-               nr_ptrs_effective += ca->mi.durability;
-               have_cache_dev |= !ca->mi.durability;
-       }
-
-       if (nr_ptrs_effective >= nr_replicas)
-               return ALLOC_SUCCESS;
-
-       devs_sorted = bch2_wp_alloc_list(c, wp, devs);
-
-       for (i = 0; i < devs_sorted.nr; i++) {
-               int ob;
-
-               ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
-               if (!ca)
-                       continue;
-
-               if (!ca->mi.durability &&
-                   (have_cache_dev ||
-                    wp->type != BCH_DATA_USER))
-                       continue;
-
-               ob = bch2_bucket_alloc(c, ca, reserve,
-                                      wp->type == BCH_DATA_USER, cl);
-               if (ob < 0) {
-                       ret = ob;
-                       if (ret == OPEN_BUCKETS_EMPTY)
-                               break;
-                       continue;
-               }
-
-               BUG_ON(ob <= 0 || ob > U8_MAX);
-               BUG_ON(wp->nr_ptrs >= ARRAY_SIZE(wp->ptrs));
-
-               wp->ptrs[wp->nr_ptrs++] = c->open_buckets + ob;
-
-               bch2_wp_rescale(c, ca, wp);
-
-               nr_ptrs_effective += ca->mi.durability;
-               have_cache_dev |= !ca->mi.durability;
-
-               __clear_bit(ca->dev_idx, devs->d);
-
-               if (nr_ptrs_effective >= nr_replicas) {
-                       ret = ALLOC_SUCCESS;
-                       break;
-               }
-       }
-
-       EBUG_ON(reserve == RESERVE_MOVINGGC &&
-               ret != ALLOC_SUCCESS &&
-               ret != OPEN_BUCKETS_EMPTY);
-
-       switch (ret) {
-       case ALLOC_SUCCESS:
-               return 0;
-       case NO_DEVICES:
-               return -EROFS;
-       case FREELIST_EMPTY:
-       case OPEN_BUCKETS_EMPTY:
-               return cl ? -EAGAIN : -ENOSPC;
-       default:
-               BUG();
-       }
-}
-
-/* Sector allocator */
-
-static void writepoint_drop_ptr(struct bch_fs *c,
-                               struct write_point *wp,
-                               unsigned i)
-{
-       struct open_bucket *ob = wp->ptrs[i];
-       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-
-       BUG_ON(ca->open_buckets_partial_nr >=
-              ARRAY_SIZE(ca->open_buckets_partial));
-
-       if (wp->type == BCH_DATA_USER) {
-               spin_lock(&c->freelist_lock);
-               ob->on_partial_list = true;
-               ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
-                       ob - c->open_buckets;
-               spin_unlock(&c->freelist_lock);
-
-               closure_wake_up(&c->open_buckets_wait);
-               closure_wake_up(&c->freelist_wait);
-       } else {
-               bch2_open_bucket_put(c, ob);
-       }
-
-       array_remove_item(wp->ptrs, wp->nr_ptrs, i);
-
-       if (i < wp->first_ptr)
-               wp->first_ptr--;
-}
-
-static void writepoint_drop_ptrs(struct bch_fs *c,
-                                struct write_point *wp,
-                                u16 target, bool in_target)
-{
-       int i;
-
-       for (i = wp->first_ptr - 1; i >= 0; --i)
-               if (bch2_dev_in_target(c, wp->ptrs[i]->ptr.dev,
-                                      target) == in_target)
-                       writepoint_drop_ptr(c, wp, i);
-}
-
-static void verify_not_stale(struct bch_fs *c, const struct write_point *wp)
-{
-#ifdef CONFIG_BCACHEFS_DEBUG
-       struct open_bucket *ob;
-       unsigned i;
-
-       writepoint_for_each_ptr_all(wp, ob, i) {
-               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-
-               BUG_ON(ptr_stale(ca, &ob->ptr));
-       }
-#endif
-}
-
-static int open_bucket_add_buckets(struct bch_fs *c,
-                                  u16 target,
-                                  struct write_point *wp,
-                                  struct bch_devs_list *devs_have,
-                                  unsigned nr_replicas,
-                                  enum alloc_reserve reserve,
-                                  struct closure *cl)
-{
-       struct bch_devs_mask devs = c->rw_devs[wp->type];
-       const struct bch_devs_mask *t;
-       struct open_bucket *ob;
-       unsigned i;
-       int ret;
-
-       percpu_down_read_preempt_disable(&c->usage_lock);
-       rcu_read_lock();
-
-       /* Don't allocate from devices we already have pointers to: */
-       for (i = 0; i < devs_have->nr; i++)
-               __clear_bit(devs_have->devs[i], devs.d);
-
-       writepoint_for_each_ptr_all(wp, ob, i)
-               __clear_bit(ob->ptr.dev, devs.d);
-
-       t = bch2_target_to_mask(c, target);
-       if (t)
-               bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
-
-       ret = bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl);
-
-       rcu_read_unlock();
-       percpu_up_read_preempt_enable(&c->usage_lock);
-
-       return ret;
-}
-
-static struct write_point *__writepoint_find(struct hlist_head *head,
-                                            unsigned long write_point)
-{
-       struct write_point *wp;
-
-       hlist_for_each_entry_rcu(wp, head, node)
-               if (wp->write_point == write_point)
-                       return wp;
-
-       return NULL;
-}
-
-static struct hlist_head *writepoint_hash(struct bch_fs *c,
-                                         unsigned long write_point)
-{
-       unsigned hash =
-               hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
-
-       return &c->write_points_hash[hash];
-}
-
-static struct write_point *writepoint_find(struct bch_fs *c,
-                                          unsigned long write_point)
-{
-       struct write_point *wp, *oldest;
-       struct hlist_head *head;
-
-       if (!(write_point & 1UL)) {
-               wp = (struct write_point *) write_point;
-               mutex_lock(&wp->lock);
-               return wp;
-       }
-
-       head = writepoint_hash(c, write_point);
-restart_find:
-       wp = __writepoint_find(head, write_point);
-       if (wp) {
-lock_wp:
-               mutex_lock(&wp->lock);
-               if (wp->write_point == write_point)
-                       goto out;
-               mutex_unlock(&wp->lock);
-               goto restart_find;
-       }
-
-       oldest = NULL;
-       for (wp = c->write_points;
-            wp < c->write_points + ARRAY_SIZE(c->write_points);
-            wp++)
-               if (!oldest || time_before64(wp->last_used, oldest->last_used))
-                       oldest = wp;
-
-       mutex_lock(&oldest->lock);
-       mutex_lock(&c->write_points_hash_lock);
-       wp = __writepoint_find(head, write_point);
-       if (wp && wp != oldest) {
-               mutex_unlock(&c->write_points_hash_lock);
-               mutex_unlock(&oldest->lock);
-               goto lock_wp;
-       }
-
-       wp = oldest;
-       hlist_del_rcu(&wp->node);
-       wp->write_point = write_point;
-       hlist_add_head_rcu(&wp->node, head);
-       mutex_unlock(&c->write_points_hash_lock);
-out:
-       wp->last_used = sched_clock();
-       return wp;
-}
-
-/*
- * Get us an open_bucket we can allocate from, return with it locked:
- */
-struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
-                               unsigned target,
-                               struct write_point_specifier write_point,
-                               struct bch_devs_list *devs_have,
-                               unsigned nr_replicas,
-                               unsigned nr_replicas_required,
-                               enum alloc_reserve reserve,
-                               unsigned flags,
-                               struct closure *cl)
-{
-       struct write_point *wp;
-       struct open_bucket *ob;
-       struct bch_dev *ca;
-       unsigned nr_ptrs_have, nr_ptrs_effective;
-       int ret, i, cache_idx = -1;
-
-       BUG_ON(!nr_replicas || !nr_replicas_required);
-
-       wp = writepoint_find(c, write_point.v);
-
-       wp->first_ptr = 0;
-
-       /* does writepoint have ptrs we can't use? */
-       writepoint_for_each_ptr(wp, ob, i)
-               if (bch2_dev_list_has_dev(*devs_have, ob->ptr.dev)) {
-                       swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
-                       wp->first_ptr++;
-               }
-
-       nr_ptrs_have = wp->first_ptr;
-
-       /* does writepoint have ptrs we don't want to use? */
-       if (target)
-               writepoint_for_each_ptr(wp, ob, i)
-                       if (!bch2_dev_in_target(c, ob->ptr.dev, target)) {
-                               swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
-                               wp->first_ptr++;
-                       }
-
-       if (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) {
-               ret = open_bucket_add_buckets(c, target, wp, devs_have,
-                                             nr_replicas, reserve, cl);
-       } else {
-               ret = open_bucket_add_buckets(c, target, wp, devs_have,
-                                             nr_replicas, reserve, NULL);
-               if (!ret)
-                       goto alloc_done;
-
-               wp->first_ptr = nr_ptrs_have;
-
-               ret = open_bucket_add_buckets(c, 0, wp, devs_have,
-                                             nr_replicas, reserve, cl);
-       }
-
-       if (ret && ret != -EROFS)
-               goto err;
-alloc_done:
-       /* check for more than one cache: */
-       for (i = wp->nr_ptrs - 1; i >= wp->first_ptr; --i) {
-               ca = bch_dev_bkey_exists(c, wp->ptrs[i]->ptr.dev);
-
-               if (ca->mi.durability)
-                       continue;
-
-               /*
-                * if we ended up with more than one cache device, prefer the
-                * one in the target we want:
-                */
-               if (cache_idx >= 0) {
-                       if (!bch2_dev_in_target(c, wp->ptrs[i]->ptr.dev,
-                                               target)) {
-                               writepoint_drop_ptr(c, wp, i);
-                       } else {
-                               writepoint_drop_ptr(c, wp, cache_idx);
-                               cache_idx = i;
-                       }
-               } else {
-                       cache_idx = i;
-               }
-       }
-
-       /* we might have more effective replicas than required: */
-       nr_ptrs_effective = 0;
-       writepoint_for_each_ptr(wp, ob, i) {
-               ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-               nr_ptrs_effective += ca->mi.durability;
-       }
-
-       if (ret == -EROFS &&
-           nr_ptrs_effective >= nr_replicas_required)
-               ret = 0;
-
-       if (ret)
-               goto err;
-
-       if (nr_ptrs_effective > nr_replicas) {
-               writepoint_for_each_ptr(wp, ob, i) {
-                       ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-
-                       if (ca->mi.durability &&
-                           ca->mi.durability <= nr_ptrs_effective - nr_replicas &&
-                           !bch2_dev_in_target(c, ob->ptr.dev, target)) {
-                               swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
-                               wp->first_ptr++;
-                               nr_ptrs_effective -= ca->mi.durability;
-                       }
-               }
-       }
-
-       if (nr_ptrs_effective > nr_replicas) {
-               writepoint_for_each_ptr(wp, ob, i) {
-                       ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-
-                       if (ca->mi.durability &&
-                           ca->mi.durability <= nr_ptrs_effective - nr_replicas) {
-                               swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
-                               wp->first_ptr++;
-                               nr_ptrs_effective -= ca->mi.durability;
-                       }
-               }
-       }
-
-       /* Remove pointers we don't want to use: */
-       if (target)
-               writepoint_drop_ptrs(c, wp, target, false);
-
-       BUG_ON(wp->first_ptr >= wp->nr_ptrs);
-       BUG_ON(nr_ptrs_effective < nr_replicas_required);
-
-       wp->sectors_free = UINT_MAX;
-
-       writepoint_for_each_ptr(wp, ob, i)
-               wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
-
-       BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
-
-       verify_not_stale(c, wp);
-
-       return wp;
-err:
-       mutex_unlock(&wp->lock);
-       return ERR_PTR(ret);
-}
-
-/*
- * Append pointers to the space we just allocated to @k, and mark @sectors space
- * as allocated out of @ob
- */
-void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
-                                   struct bkey_i_extent *e, unsigned sectors)
-{
-       struct open_bucket *ob;
-       unsigned i;
-
-       BUG_ON(sectors > wp->sectors_free);
-       wp->sectors_free -= sectors;
-
-       writepoint_for_each_ptr(wp, ob, i) {
-               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-               struct bch_extent_ptr tmp = ob->ptr;
-
-               EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptr.dev));
-
-               tmp.cached = bkey_extent_is_cached(&e->k) ||
-                       (!ca->mi.durability && wp->type == BCH_DATA_USER);
-
-               tmp.offset += ca->mi.bucket_size - ob->sectors_free;
-               extent_ptr_append(e, tmp);
-
-               BUG_ON(sectors > ob->sectors_free);
-               ob->sectors_free -= sectors;
-       }
-}
-
-/*
- * Append pointers to the space we just allocated to @k, and mark @sectors space
- * as allocated out of @ob
- */
-void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
-{
-       int i;
-
-       for (i = wp->nr_ptrs - 1; i >= 0; --i) {
-               struct open_bucket *ob = wp->ptrs[i];
-
-               if (!ob->sectors_free) {
-                       array_remove_item(wp->ptrs, wp->nr_ptrs, i);
-                       bch2_open_bucket_put(c, ob);
-               }
-       }
-
-       mutex_unlock(&wp->lock);
-}
-
 /* Startup/shutdown (ro/rw): */
 
 void bch2_recalc_capacity(struct bch_fs *c)
@@ -1792,19 +1052,6 @@ void bch2_recalc_capacity(struct bch_fs *c)
        closure_wake_up(&c->freelist_wait);
 }
 
-static void bch2_stop_write_point(struct bch_fs *c, struct bch_dev *ca,
-                                 struct write_point *wp)
-{
-       struct bch_devs_mask not_self;
-
-       bitmap_complement(not_self.d, ca->self.d, BCH_SB_MEMBERS_MAX);
-
-       mutex_lock(&wp->lock);
-       wp->first_ptr = wp->nr_ptrs;
-       writepoint_drop_ptrs(c, wp, dev_to_target(ca->dev_idx), true);
-       mutex_unlock(&wp->lock);
-}
-
 static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
 {
        struct open_bucket *ob;
@@ -1842,18 +1089,18 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
 
        /* Next, close write points that point to this device... */
        for (i = 0; i < ARRAY_SIZE(c->write_points); i++)
-               bch2_stop_write_point(c, ca, &c->write_points[i]);
+               bch2_writepoint_stop(c, ca, &c->write_points[i]);
 
-       bch2_stop_write_point(c, ca, &ca->copygc_write_point);
-       bch2_stop_write_point(c, ca, &c->rebalance_write_point);
-       bch2_stop_write_point(c, ca, &c->btree_write_point);
+       bch2_writepoint_stop(c, ca, &ca->copygc_write_point);
+       bch2_writepoint_stop(c, ca, &c->rebalance_write_point);
+       bch2_writepoint_stop(c, ca, &c->btree_write_point);
 
        mutex_lock(&c->btree_reserve_cache_lock);
        while (c->btree_reserve_cache_nr) {
                struct btree_alloc *a =
                        &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
 
-               bch2_open_bucket_put_refs(c, &a->ob.nr, a->ob.refs);
+               bch2_open_buckets_put(c, &a->ob);
        }
        mutex_unlock(&c->btree_reserve_cache_lock);
 
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
new file mode 100644 (file)
index 0000000..b5dbf7e
--- /dev/null
@@ -0,0 +1,61 @@
+#ifndef _BCACHEFS_ALLOC_BACKGROUND_H
+#define _BCACHEFS_ALLOC_BACKGROUND_H
+
+#include "bcachefs.h"
+#include "alloc_types.h"
+#include "debug.h"
+
+#define ALLOC_SCAN_BATCH(ca)           ((ca)->mi.nbuckets >> 9)
+
+const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c);
+int bch2_alloc_to_text(struct bch_fs *, char *, size_t, struct bkey_s_c);
+
+#define bch2_bkey_alloc_ops (struct bkey_ops) {                \
+       .key_invalid    = bch2_alloc_invalid,           \
+       .val_to_text    = bch2_alloc_to_text,           \
+}
+
+int bch2_alloc_read(struct bch_fs *, struct list_head *);
+int bch2_alloc_replay_key(struct bch_fs *, struct bpos);
+
+static inline void bch2_wake_allocator(struct bch_dev *ca)
+{
+       struct task_struct *p;
+
+       rcu_read_lock();
+       p = rcu_dereference(ca->alloc_thread);
+       if (p)
+               wake_up_process(p);
+       rcu_read_unlock();
+}
+
+static inline void verify_not_on_freelist(struct bch_fs *c, struct bch_dev *ca,
+                                         size_t bucket)
+{
+       if (expensive_debug_checks(c) &&
+           test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) {
+               size_t iter;
+               long i;
+               unsigned j;
+
+               for (j = 0; j < RESERVE_NR; j++)
+                       fifo_for_each_entry(i, &ca->free[j], iter)
+                               BUG_ON(i == bucket);
+               fifo_for_each_entry(i, &ca->free_inc, iter)
+                       BUG_ON(i == bucket);
+       }
+}
+
+void bch2_recalc_capacity(struct bch_fs *);
+
+void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
+void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
+
+void bch2_dev_allocator_stop(struct bch_dev *);
+int bch2_dev_allocator_start(struct bch_dev *);
+
+int bch2_alloc_write(struct bch_fs *);
+int bch2_fs_allocator_start(struct bch_fs *);
+void bch2_fs_allocator_init(struct bch_fs *);
+
+#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
new file mode 100644 (file)
index 0000000..be94196
--- /dev/null
@@ -0,0 +1,689 @@
+/*
+ * Primary bucket allocation code
+ *
+ * Copyright 2012 Google, Inc.
+ *
+ * Allocation in bcache is done in terms of buckets:
+ *
+ * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in
+ * btree pointers - they must match for the pointer to be considered valid.
+ *
+ * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a
+ * bucket simply by incrementing its gen.
+ *
+ * The gens (along with the priorities; it's really the gens are important but
+ * the code is named as if it's the priorities) are written in an arbitrary list
+ * of buckets on disk, with a pointer to them in the journal header.
+ *
+ * When we invalidate a bucket, we have to write its new gen to disk and wait
+ * for that write to complete before we use it - otherwise after a crash we
+ * could have pointers that appeared to be good but pointed to data that had
+ * been overwritten.
+ *
+ * Since the gens and priorities are all stored contiguously on disk, we can
+ * batch this up: We fill up the free_inc list with freshly invalidated buckets,
+ * call prio_write(), and when prio_write() finishes we pull buckets off the
+ * free_inc list and optionally discard them.
+ *
+ * free_inc isn't the only freelist - if it was, we'd often have to sleep while
+ * priorities and gens were being written before we could allocate. c->free is a
+ * smaller freelist, and buckets on that list are always ready to be used.
+ *
+ * If we've got discards enabled, that happens when a bucket moves from the
+ * free_inc list to the free list.
+ *
+ * It's important to ensure that gens don't wrap around - with respect to
+ * either the oldest gen in the btree or the gen on disk. This is quite
+ * difficult to do in practice, but we explicitly guard against it anyways - if
+ * a bucket is in danger of wrapping around we simply skip invalidating it that
+ * time around, and we garbage collect or rewrite the priorities sooner than we
+ * would have otherwise.
+ *
+ * bch2_bucket_alloc() allocates a single bucket from a specific device.
+ *
+ * bch2_bucket_alloc_set() allocates one or more buckets from different devices
+ * in a given filesystem.
+ *
+ * invalidate_buckets() drives all the processes described above. It's called
+ * from bch2_bucket_alloc() and a few other places that need to make sure free
+ * buckets are ready.
+ *
+ * invalidate_buckets_(lru|fifo)() find buckets that are available to be
+ * invalidated, and then invalidate them and stick them on the free_inc list -
+ * in either lru or fifo order.
+ */
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
+#include "btree_gc.h"
+#include "buckets.h"
+#include "clock.h"
+#include "debug.h"
+#include "disk_groups.h"
+#include "io.h"
+
+#include <linux/math64.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <trace/events/bcachefs.h>
+
+enum bucket_alloc_ret {
+       ALLOC_SUCCESS,
+       OPEN_BUCKETS_EMPTY,
+       FREELIST_EMPTY,         /* Allocator thread not keeping up */
+};
+
+/*
+ * Open buckets represent a bucket that's currently being allocated from.  They
+ * serve two purposes:
+ *
+ *  - They track buckets that have been partially allocated, allowing for
+ *    sub-bucket sized allocations - they're used by the sector allocator below
+ *
+ *  - They provide a reference to the buckets they own that mark and sweep GC
+ *    can find, until the new allocation has a pointer to it inserted into the
+ *    btree
+ *
+ * When allocating some space with the sector allocator, the allocation comes
+ * with a reference to an open bucket - the caller is required to put that
+ * reference _after_ doing the index update that makes its allocation reachable.
+ */
+
+void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+       percpu_down_read_preempt_disable(&c->usage_lock);
+       spin_lock(&ob->lock);
+
+       bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
+                              false, gc_pos_alloc(c, ob), 0);
+       ob->valid = false;
+
+       spin_unlock(&ob->lock);
+       percpu_up_read_preempt_enable(&c->usage_lock);
+
+       spin_lock(&c->freelist_lock);
+       ob->freelist = c->open_buckets_freelist;
+       c->open_buckets_freelist = ob - c->open_buckets;
+       c->open_buckets_nr_free++;
+       spin_unlock(&c->freelist_lock);
+
+       closure_wake_up(&c->open_buckets_wait);
+}
+
+static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
+{
+       struct open_bucket *ob;
+
+       BUG_ON(!c->open_buckets_freelist || !c->open_buckets_nr_free);
+
+       ob = c->open_buckets + c->open_buckets_freelist;
+       c->open_buckets_freelist = ob->freelist;
+       atomic_set(&ob->pin, 1);
+
+       c->open_buckets_nr_free--;
+       return ob;
+}
+
+static void open_bucket_free_unused(struct bch_fs *c,
+                                   struct write_point *wp,
+                                   struct open_bucket *ob)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+       BUG_ON(ca->open_buckets_partial_nr >=
+              ARRAY_SIZE(ca->open_buckets_partial));
+
+       if (wp->type == BCH_DATA_USER) {
+               spin_lock(&c->freelist_lock);
+               ob->on_partial_list = true;
+               ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
+                       ob - c->open_buckets;
+               spin_unlock(&c->freelist_lock);
+
+               closure_wake_up(&c->open_buckets_wait);
+               closure_wake_up(&c->freelist_wait);
+       } else {
+               bch2_open_bucket_put(c, ob);
+       }
+}
+
+static void verify_not_stale(struct bch_fs *c, const struct open_buckets *obs)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, obs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+               BUG_ON(ptr_stale(ca, &ob->ptr));
+       }
+#endif
+}
+
+/* _only_ for allocating the journal on a new device: */
+long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
+{
+       struct bucket_array *buckets;
+       ssize_t b;
+
+       rcu_read_lock();
+       buckets = bucket_array(ca);
+
+       for (b = ca->mi.first_bucket; b < ca->mi.nbuckets; b++)
+               if (is_available_bucket(buckets->b[b].mark))
+                       goto success;
+       b = -1;
+success:
+       rcu_read_unlock();
+       return b;
+}
+
+static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
+{
+       switch (reserve) {
+       case RESERVE_ALLOC:
+               return 0;
+       case RESERVE_BTREE:
+               return BTREE_NODE_RESERVE / 2;
+       default:
+               return BTREE_NODE_RESERVE;
+       }
+}
+
+/**
+ * bch_bucket_alloc - allocate a single bucket from a specific device
+ *
+ * Returns index of bucket on success, 0 on failure
+ * */
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+                                     enum alloc_reserve reserve,
+                                     bool may_alloc_partial,
+                                     struct closure *cl)
+{
+       struct bucket_array *buckets;
+       struct open_bucket *ob;
+       long bucket = 0;
+
+       spin_lock(&c->freelist_lock);
+
+       if (may_alloc_partial &&
+           ca->open_buckets_partial_nr) {
+               ob = c->open_buckets +
+                       ca->open_buckets_partial[--ca->open_buckets_partial_nr];
+               ob->on_partial_list = false;
+               spin_unlock(&c->freelist_lock);
+               return ob;
+       }
+
+       if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
+               if (cl)
+                       closure_wait(&c->open_buckets_wait, cl);
+               spin_unlock(&c->freelist_lock);
+               trace_open_bucket_alloc_fail(ca, reserve);
+               return ERR_PTR(-OPEN_BUCKETS_EMPTY);
+       }
+
+       if (likely(fifo_pop(&ca->free[RESERVE_NONE], bucket)))
+               goto out;
+
+       switch (reserve) {
+       case RESERVE_ALLOC:
+               if (fifo_pop(&ca->free[RESERVE_BTREE], bucket))
+                       goto out;
+               break;
+       case RESERVE_BTREE:
+               if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >=
+                   ca->free[RESERVE_BTREE].size &&
+                   fifo_pop(&ca->free[RESERVE_BTREE], bucket))
+                       goto out;
+               break;
+       case RESERVE_MOVINGGC:
+               if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
+                       goto out;
+               break;
+       default:
+               break;
+       }
+
+       if (cl)
+               closure_wait(&c->freelist_wait, cl);
+
+       spin_unlock(&c->freelist_lock);
+
+       trace_bucket_alloc_fail(ca, reserve);
+       return ERR_PTR(-FREELIST_EMPTY);
+out:
+       verify_not_on_freelist(c, ca, bucket);
+
+       ob = bch2_open_bucket_alloc(c);
+
+       spin_lock(&ob->lock);
+       buckets = bucket_array(ca);
+
+       ob->valid       = true;
+       ob->sectors_free = ca->mi.bucket_size;
+       ob->ptr         = (struct bch_extent_ptr) {
+               .gen    = buckets->b[bucket].mark.gen,
+               .offset = bucket_to_sector(ca, bucket),
+               .dev    = ca->dev_idx,
+       };
+
+       bucket_io_clock_reset(c, ca, bucket, READ);
+       bucket_io_clock_reset(c, ca, bucket, WRITE);
+       spin_unlock(&ob->lock);
+
+       spin_unlock(&c->freelist_lock);
+
+       bch2_wake_allocator(ca);
+
+       trace_bucket_alloc(ca, reserve);
+       return ob;
+}
+
+static int __dev_alloc_cmp(struct write_point *wp,
+                          unsigned l, unsigned r)
+{
+       return ((wp->next_alloc[l] > wp->next_alloc[r]) -
+               (wp->next_alloc[l] < wp->next_alloc[r]));
+}
+
+#define dev_alloc_cmp(l, r) __dev_alloc_cmp(wp, l, r)
+
+struct dev_alloc_list bch2_wp_alloc_list(struct bch_fs *c,
+                                        struct write_point *wp,
+                                        struct bch_devs_mask *devs)
+{
+       struct dev_alloc_list ret = { .nr = 0 };
+       struct bch_dev *ca;
+       unsigned i;
+
+       for_each_member_device_rcu(ca, c, i, devs)
+               ret.devs[ret.nr++] = i;
+
+       bubble_sort(ret.devs, ret.nr, dev_alloc_cmp);
+       return ret;
+}
+
+void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca,
+                    struct write_point *wp)
+{
+       u64 *v = wp->next_alloc + ca->dev_idx;
+       u64 free_space = dev_buckets_free(c, ca);
+       u64 free_space_inv = free_space
+               ? div64_u64(1ULL << 48, free_space)
+               : 1ULL << 48;
+       u64 scale = *v / 4;
+
+       if (*v + free_space_inv >= *v)
+               *v += free_space_inv;
+       else
+               *v = U64_MAX;
+
+       for (v = wp->next_alloc;
+            v < wp->next_alloc + ARRAY_SIZE(wp->next_alloc); v++)
+               *v = *v < scale ? 0 : *v - scale;
+}
+
+static int bch2_bucket_alloc_set(struct bch_fs *c,
+                                struct open_buckets *ptrs,
+                                struct write_point *wp,
+                                struct bch_devs_mask *devs_may_alloc,
+                                unsigned nr_replicas,
+                                unsigned *nr_effective,
+                                bool *have_cache,
+                                enum alloc_reserve reserve,
+                                struct closure *cl)
+{
+       struct dev_alloc_list devs_sorted =
+               bch2_wp_alloc_list(c, wp, devs_may_alloc);
+       struct bch_dev *ca;
+       bool alloc_failure = false;
+       unsigned i;
+
+       BUG_ON(*nr_effective >= nr_replicas);
+
+       for (i = 0; i < devs_sorted.nr; i++) {
+               struct open_bucket *ob;
+
+               ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
+               if (!ca)
+                       continue;
+
+               if (!ca->mi.durability &&
+                   (*have_cache ||
+                    wp->type != BCH_DATA_USER))
+                       continue;
+
+               ob = bch2_bucket_alloc(c, ca, reserve,
+                                      wp->type == BCH_DATA_USER, cl);
+               if (IS_ERR(ob)) {
+                       enum bucket_alloc_ret ret = -PTR_ERR(ob);
+
+                       WARN_ON(reserve == RESERVE_MOVINGGC &&
+                               ret != OPEN_BUCKETS_EMPTY);
+
+                       if (cl)
+                               return -EAGAIN;
+                       if (ret == OPEN_BUCKETS_EMPTY)
+                               return -ENOSPC;
+                       alloc_failure = true;
+                       continue;
+               }
+
+               __clear_bit(ca->dev_idx, devs_may_alloc->d);
+               *nr_effective   += ca->mi.durability;
+               *have_cache     |= !ca->mi.durability;
+
+               ob_push(c, ptrs, ob);
+
+               bch2_wp_rescale(c, ca, wp);
+
+               if (*nr_effective >= nr_replicas)
+                       return 0;
+       }
+
+       return alloc_failure ? -ENOSPC : -EROFS;
+}
+
+/* Sector allocator */
+
+static int get_buckets_from_writepoint(struct bch_fs *c,
+                                      struct open_buckets *ptrs,
+                                      struct write_point *wp,
+                                      struct bch_devs_mask *devs_may_alloc,
+                                      unsigned nr_replicas,
+                                      unsigned *nr_effective,
+                                      bool *have_cache)
+{
+       struct open_buckets ptrs_skip = { .nr = 0 };
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+               if (*nr_effective < nr_replicas &&
+                   test_bit(ob->ptr.dev, devs_may_alloc->d) &&
+                   (ca->mi.durability ||
+                    (wp->type == BCH_DATA_USER && !*have_cache))) {
+                       __clear_bit(ob->ptr.dev, devs_may_alloc->d);
+                       *nr_effective   += ca->mi.durability;
+                       *have_cache     |= !ca->mi.durability;
+
+                       ob_push(c, ptrs, ob);
+               } else {
+                       ob_push(c, &ptrs_skip, ob);
+               }
+       }
+       wp->ptrs = ptrs_skip;
+
+       return *nr_effective < nr_replicas ? -ENOSPC : 0;
+}
+
+static int open_bucket_add_buckets(struct bch_fs *c,
+                                  struct open_buckets *ptrs,
+                                  struct write_point *wp,
+                                  struct bch_devs_list *devs_have,
+                                  u16 target,
+                                  unsigned nr_replicas,
+                                  unsigned *nr_effective,
+                                  bool *have_cache,
+                                  enum alloc_reserve reserve,
+                                  struct closure *cl)
+{
+       struct bch_devs_mask devs;
+       const struct bch_devs_mask *t;
+       struct open_bucket *ob;
+       unsigned i;
+       int ret;
+
+       percpu_down_read_preempt_disable(&c->usage_lock);
+       rcu_read_lock();
+
+       devs = c->rw_devs[wp->type];
+
+       /* Don't allocate from devices we already have pointers to: */
+       for (i = 0; i < devs_have->nr; i++)
+               __clear_bit(devs_have->devs[i], devs.d);
+
+       open_bucket_for_each(c, ptrs, ob, i)
+               __clear_bit(ob->ptr.dev, devs.d);
+
+       t = bch2_target_to_mask(c, target);
+       if (t)
+               bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
+
+       ret = get_buckets_from_writepoint(c, ptrs, wp, &devs,
+                               nr_replicas, nr_effective, have_cache);
+       if (!ret)
+               goto out;
+
+       /*
+        * Try nonblocking first, so that if one device is full we'll try from
+        * other devices:
+        */
+       ret = bch2_bucket_alloc_set(c, ptrs, wp, &devs,
+                               nr_replicas, nr_effective, have_cache,
+                               reserve, NULL);
+       if (!ret || ret == -EROFS || !cl)
+               goto out;
+
+       ret = bch2_bucket_alloc_set(c, ptrs, wp, &devs,
+                               nr_replicas, nr_effective, have_cache,
+                               reserve, cl);
+out:
+       rcu_read_unlock();
+       percpu_up_read_preempt_enable(&c->usage_lock);
+
+       return ret;
+}
+
+void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
+                         struct write_point *wp)
+{
+       struct open_buckets ptrs = { .nr = 0 };
+       struct open_bucket *ob;
+       unsigned i;
+
+       mutex_lock(&wp->lock);
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               if (ob->ptr.dev == ca->dev_idx)
+                       open_bucket_free_unused(c, wp, ob);
+               else
+                       ob_push(c, &ptrs, ob);
+
+       wp->ptrs = ptrs;
+       mutex_unlock(&wp->lock);
+}
+
+static struct write_point *__writepoint_find(struct hlist_head *head,
+                                            unsigned long write_point)
+{
+       struct write_point *wp;
+
+       hlist_for_each_entry_rcu(wp, head, node)
+               if (wp->write_point == write_point)
+                       return wp;
+
+       return NULL;
+}
+
+static struct write_point *writepoint_find(struct bch_fs *c,
+                                          unsigned long write_point)
+{
+       struct write_point *wp, *oldest;
+       struct hlist_head *head;
+
+       if (!(write_point & 1UL)) {
+               wp = (struct write_point *) write_point;
+               mutex_lock(&wp->lock);
+               return wp;
+       }
+
+       head = writepoint_hash(c, write_point);
+restart_find:
+       wp = __writepoint_find(head, write_point);
+       if (wp) {
+lock_wp:
+               mutex_lock(&wp->lock);
+               if (wp->write_point == write_point)
+                       goto out;
+               mutex_unlock(&wp->lock);
+               goto restart_find;
+       }
+
+       oldest = NULL;
+       for (wp = c->write_points;
+            wp < c->write_points + ARRAY_SIZE(c->write_points);
+            wp++)
+               if (!oldest || time_before64(wp->last_used, oldest->last_used))
+                       oldest = wp;
+
+       mutex_lock(&oldest->lock);
+       mutex_lock(&c->write_points_hash_lock);
+       wp = __writepoint_find(head, write_point);
+       if (wp && wp != oldest) {
+               mutex_unlock(&c->write_points_hash_lock);
+               mutex_unlock(&oldest->lock);
+               goto lock_wp;
+       }
+
+       wp = oldest;
+       hlist_del_rcu(&wp->node);
+       wp->write_point = write_point;
+       hlist_add_head_rcu(&wp->node, head);
+       mutex_unlock(&c->write_points_hash_lock);
+out:
+       wp->last_used = sched_clock();
+       return wp;
+}
+
+/*
+ * Get us an open_bucket we can allocate from, return with it locked:
+ */
+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
+                               unsigned target,
+                               struct write_point_specifier write_point,
+                               struct bch_devs_list *devs_have,
+                               unsigned nr_replicas,
+                               unsigned nr_replicas_required,
+                               enum alloc_reserve reserve,
+                               unsigned flags,
+                               struct closure *cl)
+{
+       struct write_point *wp;
+       struct open_bucket *ob;
+       unsigned nr_effective = 0;
+       struct open_buckets ptrs = { .nr = 0 };
+       bool have_cache = false;
+       int ret = 0, i;
+
+       BUG_ON(!nr_replicas || !nr_replicas_required);
+
+       wp = writepoint_find(c, write_point.v);
+
+       if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, target,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve, cl);
+       } else {
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, target,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve, NULL);
+               if (!ret)
+                       goto alloc_done;
+
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, 0,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve, cl);
+       }
+alloc_done:
+       BUG_ON(!ret && nr_effective < nr_replicas);
+
+       if (ret == -EROFS &&
+           nr_effective >= nr_replicas_required)
+               ret = 0;
+
+       if (ret)
+               goto err;
+
+       /* Free buckets we didn't use: */
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               open_bucket_free_unused(c, wp, ob);
+
+       wp->ptrs = ptrs;
+
+       wp->sectors_free = UINT_MAX;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
+
+       BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
+
+       verify_not_stale(c, &wp->ptrs);
+
+       return wp;
+err:
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               if (ptrs.nr < ARRAY_SIZE(ptrs.v))
+                       ob_push(c, &ptrs, ob);
+               else
+                       open_bucket_free_unused(c, wp, ob);
+       wp->ptrs = ptrs;
+
+       mutex_unlock(&wp->lock);
+       return ERR_PTR(ret);
+}
+
+/*
+ * Append pointers to the space we just allocated to @k, and mark @sectors space
+ * as allocated out of @ob
+ */
+void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
+                                   struct bkey_i_extent *e, unsigned sectors)
+{
+       struct open_bucket *ob;
+       unsigned i;
+
+       BUG_ON(sectors > wp->sectors_free);
+       wp->sectors_free -= sectors;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+               struct bch_extent_ptr tmp = ob->ptr;
+
+               EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptr.dev));
+
+               tmp.cached = bkey_extent_is_cached(&e->k) ||
+                       (!ca->mi.durability && wp->type == BCH_DATA_USER);
+
+               tmp.offset += ca->mi.bucket_size - ob->sectors_free;
+               extent_ptr_append(e, tmp);
+
+               BUG_ON(sectors > ob->sectors_free);
+               ob->sectors_free -= sectors;
+       }
+}
+
+/*
+ * Append pointers to the space we just allocated to @k, and mark @sectors space
+ * as allocated out of @ob
+ */
+void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
+{
+       struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 };
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
+       wp->ptrs = keep;
+
+       mutex_unlock(&wp->lock);
+
+       bch2_open_buckets_put(c, &ptrs);
+}
diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h
new file mode 100644 (file)
index 0000000..ae9844b
--- /dev/null
@@ -0,0 +1,119 @@
+#ifndef _BCACHEFS_ALLOC_FOREGROUND_H
+#define _BCACHEFS_ALLOC_FOREGROUND_H
+
+#include "bcachefs.h"
+#include "alloc_types.h"
+
+#include <linux/hash.h>
+
+struct bkey;
+struct bch_dev;
+struct bch_fs;
+struct bch_devs_List;
+
+struct dev_alloc_list {
+       unsigned        nr;
+       u8              devs[BCH_SB_MEMBERS_MAX];
+};
+
+struct dev_alloc_list bch2_wp_alloc_list(struct bch_fs *,
+                                        struct write_point *,
+                                        struct bch_devs_mask *);
+void bch2_wp_rescale(struct bch_fs *, struct bch_dev *,
+                    struct write_point *);
+
+long bch2_bucket_alloc_new_fs(struct bch_dev *);
+
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
+                                     enum alloc_reserve, bool,
+                                     struct closure *);
+
+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
+                          struct open_bucket *ob)
+{
+       BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
+
+       obs->v[obs->nr++] = ob - c->open_buckets;
+}
+
+#define open_bucket_for_each(_c, _obs, _ob, _i)                                \
+       for ((_i) = 0;                                                  \
+            (_i) < (_obs)->nr &&                                       \
+            ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true);        \
+            (_i)++)
+
+void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
+
+static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
+{
+       if (atomic_dec_and_test(&ob->pin))
+               __bch2_open_bucket_put(c, ob);
+}
+
+static inline void bch2_open_buckets_put(struct bch_fs *c,
+                                        struct open_buckets *ptrs)
+{
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, ptrs, ob, i)
+               bch2_open_bucket_put(c, ob);
+       ptrs->nr = 0;
+}
+
+static inline void bch2_open_bucket_get(struct bch_fs *c,
+                                       struct write_point *wp,
+                                       struct open_buckets *ptrs)
+{
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
+               atomic_inc(&ob->pin);
+               ob_push(c, ptrs, ob);
+       }
+}
+
+struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
+                                            unsigned,
+                                            struct write_point_specifier,
+                                            struct bch_devs_list *,
+                                            unsigned, unsigned,
+                                            enum alloc_reserve,
+                                            unsigned,
+                                            struct closure *);
+
+void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
+                                   struct bkey_i_extent *, unsigned);
+void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
+
+void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
+                         struct write_point *);
+
+static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
+                                                unsigned long write_point)
+{
+       unsigned hash =
+               hash_long(write_point, ilog2(ARRAY_SIZE(c->write_points_hash)));
+
+       return &c->write_points_hash[hash];
+}
+
+static inline struct write_point_specifier writepoint_hashed(unsigned long v)
+{
+       return (struct write_point_specifier) { .v = v | 1 };
+}
+
+static inline struct write_point_specifier writepoint_ptr(struct write_point *wp)
+{
+       return (struct write_point_specifier) { .v = (unsigned long) wp };
+}
+
+static inline void writepoint_init(struct write_point *wp,
+                                  enum bch_data_type type)
+{
+       mutex_init(&wp->lock);
+       wp->type = type;
+}
+
+#endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
index 8a71a37637dee128da0607ff55c787dedcf60e95..94c041d2f3294f314e27b426d4ff77cefd1fade8 100644 (file)
@@ -57,6 +57,13 @@ struct open_bucket {
        struct bch_extent_ptr   ptr;
 };
 
+#define OPEN_BUCKET_LIST_MAX   15
+
+struct open_buckets {
+       u8                      nr;
+       u8                      v[OPEN_BUCKET_LIST_MAX];
+};
+
 struct write_point {
        struct hlist_node       node;
        struct mutex            lock;
@@ -64,13 +71,10 @@ struct write_point {
        unsigned long           write_point;
        enum bch_data_type      type;
 
-       u8                      nr_ptrs;
-       u8                      first_ptr;
-
        /* calculated based on how many pointers we're actually going to use: */
        unsigned                sectors_free;
 
-       struct open_bucket      *ptrs[BCH_REPLICAS_MAX * 2];
+       struct open_buckets     ptrs;
        u64                     next_alloc[BCH_SB_MEMBERS_MAX];
 };
 
index bbe9af67bb2dfc61c84c9aae6d5ebb863e857bfa..21883c57fb616df9b3264459fe7ee33e10da1ecc 100644 (file)
@@ -2,7 +2,7 @@
 #include "bcachefs.h"
 #include "bkey_methods.h"
 #include "btree_types.h"
-#include "alloc.h"
+#include "alloc_background.h"
 #include "dirent.h"
 #include "error.h"
 #include "extents.h"
index 7c18d8303aaa05c943bb10b1097f56805b3d0143..b0f9bd75588b41b5ed562ab7a777e23b0df7750d 100644 (file)
@@ -4,7 +4,8 @@
  */
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
 #include "bkey_methods.h"
 #include "btree_locking.h"
 #include "btree_update_interior.h"
@@ -800,7 +801,7 @@ next:
        bch2_btree_iter_node_replace(iter, new_nodes[0]);
 
        for (i = 0; i < nr_new_nodes; i++)
-               bch2_btree_open_bucket_put(c, new_nodes[i]);
+               bch2_open_buckets_put(c, &new_nodes[i]->ob);
 
        /* Free the old nodes and update our sliding window */
        for (i = 0; i < nr_old_nodes; i++) {
index 5f137af4da53e23b8ec6b05f3d6baec1e89cb291..e20dd7a2be8d3e5a57d2649277c0814ad70c4633 100644 (file)
@@ -53,13 +53,8 @@ struct btree_write {
        struct closure_waitlist         wait;
 };
 
-struct btree_ob_ref {
-       u8                      nr;
-       u8                      refs[BCH_REPLICAS_MAX];
-};
-
 struct btree_alloc {
-       struct btree_ob_ref     ob;
+       struct open_buckets     ob;
        BKEY_PADDED(k);
 };
 
@@ -126,7 +121,7 @@ struct btree {
         */
        unsigned long           will_make_reachable;
 
-       struct btree_ob_ref     ob;
+       struct open_buckets     ob;
 
        /* lru list */
        struct list_head        list;
index 9828ad0fb7afa46783e365b2b2cfeb8b6d5acd74..6d3fab8e767ef0214abb811ce1c919839b4f5d02 100644 (file)
@@ -1,6 +1,6 @@
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_foreground.h"
 #include "bkey_methods.h"
 #include "btree_cache.h"
 #include "btree_gc.h"
@@ -246,7 +246,7 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b)
 
 void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
 {
-       struct btree_ob_ref ob = b->ob;
+       struct open_buckets ob = b->ob;
 
        btree_update_drop_new_node(c, b);
 
@@ -258,7 +258,7 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
        __btree_node_free(c, b);
        six_unlock_write(&b->lock);
 
-       bch2_open_bucket_put_refs(c, &ob.nr, ob.refs);
+       bch2_open_buckets_put(c, &ob);
 }
 
 void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
@@ -299,11 +299,6 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
         */
 }
 
-void bch2_btree_open_bucket_put(struct bch_fs *c, struct btree *b)
-{
-       bch2_open_bucket_put_refs(c, &b->ob.nr, b->ob.refs);
-}
-
 static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
                                             struct disk_reservation *res,
                                             struct closure *cl,
@@ -313,7 +308,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
        struct btree *b;
        BKEY_PADDED(k) tmp;
        struct bkey_i_extent *e;
-       struct btree_ob_ref ob;
+       struct open_buckets ob = { .nr = 0 };
        struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
        unsigned nr_reserve;
        enum alloc_reserve alloc_reserve;
@@ -355,7 +350,7 @@ retry:
                struct open_bucket *ob;
                unsigned i;
 
-               writepoint_for_each_ptr(wp, ob, i)
+               open_bucket_for_each(c, &wp->ptrs, ob, i)
                        if (ob->sectors_free < c->opts.btree_node_size)
                                ob->sectors_free = 0;
 
@@ -366,8 +361,7 @@ retry:
        e = bkey_extent_init(&tmp.k);
        bch2_alloc_sectors_append_ptrs(c, wp, e, c->opts.btree_node_size);
 
-       ob.nr = 0;
-       bch2_open_bucket_get(c, wp, &ob.nr, ob.refs);
+       bch2_open_bucket_get(c, wp, &ob);
        bch2_alloc_sectors_done(c, wp);
 mem_alloc:
        b = bch2_btree_node_mem_alloc(c);
@@ -488,7 +482,7 @@ static void bch2_btree_reserve_put(struct bch_fs *c, struct btree_reserve *reser
                        b->ob.nr = 0;
                        bkey_copy(&a->k, &b->key);
                } else {
-                       bch2_btree_open_bucket_put(c, b);
+                       bch2_open_buckets_put(c, &b->ob);
                }
 
                btree_node_lock_type(c, b, SIX_LOCK_write);
@@ -1431,11 +1425,11 @@ static void btree_split(struct btree_update *as, struct btree *b,
                bch2_btree_set_root(as, n1, iter);
        }
 
-       bch2_btree_open_bucket_put(c, n1);
+       bch2_open_buckets_put(c, &n1->ob);
        if (n2)
-               bch2_btree_open_bucket_put(c, n2);
+               bch2_open_buckets_put(c, &n2->ob);
        if (n3)
-               bch2_btree_open_bucket_put(c, n3);
+               bch2_open_buckets_put(c, &n3->ob);
 
        /*
         * Note - at this point other linked iterators could still have @b read
@@ -1549,7 +1543,13 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
 
        btree_node_interior_verify(b);
 
-       bch2_foreground_maybe_merge(c, iter, b->level, flags);
+       /*
+        * when called from the btree_split path the new nodes aren't added to
+        * the btree iterator yet, so the merge path's unlock/wait/relock dance
+        * won't work:
+        */
+       bch2_foreground_maybe_merge(c, iter, b->level,
+                                   flags|BTREE_INSERT_NOUNLOCK);
        return;
 split:
        btree_split(as, b, iter, keys, flags);
@@ -1744,7 +1744,7 @@ retry:
 
        bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
 
-       bch2_btree_open_bucket_put(c, n);
+       bch2_open_buckets_put(c, &n->ob);
        bch2_btree_node_free_inmem(c, b, iter);
        bch2_btree_node_free_inmem(c, m, iter);
        bch2_btree_iter_node_replace(iter, n);
@@ -1836,7 +1836,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
                bch2_btree_set_root(as, n, iter);
        }
 
-       bch2_btree_open_bucket_put(c, n);
+       bch2_open_buckets_put(c, &n->ob);
 
        bch2_btree_node_free_inmem(c, b, iter);
 
index fa30809d50f4364ae174b44bb053ddef0abafe0c..6ae17ffa4101932283444132b18a045fcedae1f4 100644 (file)
@@ -131,7 +131,6 @@ struct btree_update {
 void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *,
                                struct btree_iter *);
 void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
-void bch2_btree_open_bucket_put(struct bch_fs *, struct btree *);
 
 struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
                                                  struct btree *,
index 801f6c3735028670aeda833df3d8d615a421b8d0..052e8af8708a7f71672164bd5e99179171413328 100644 (file)
@@ -63,7 +63,7 @@
  */
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_background.h"
 #include "btree_gc.h"
 #include "buckets.h"
 #include "error.h"
index c18079f9c0cb830b07384e3ef0863e490779bde9..808167d5a60ccfd7556a7312c7f76d87232b97e4 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef NO_BCACHEFS_CHARDEV
 
 #include "bcachefs.h"
-#include "alloc.h"
 #include "bcachefs_ioctl.h"
 #include "buckets.h"
 #include "chardev.h"
index d4384303a9a1d9c65ed6eee4f62c94ae6ccd52f1..250dd55fe0fd58fe4dfe538e305f13d273469fe3 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef NO_BCACHEFS_FS
 
 #include "bcachefs.h"
+#include "alloc_foreground.h"
 #include "btree_update.h"
 #include "buckets.h"
 #include "clock.h"
@@ -770,56 +771,86 @@ static void bch2_readpages_end_io(struct bio *bio)
        bio_put(bio);
 }
 
+static inline void page_state_init_for_read(struct page *page)
+{
+       SetPagePrivate(page);
+       page->private = 0;
+}
+
 struct readpages_iter {
        struct address_space    *mapping;
-       struct list_head        pages;
+       struct page             **pages;
        unsigned                nr_pages;
+       unsigned                nr_added;
+       unsigned                idx;
+       pgoff_t                 offset;
 };
 
-static inline void page_state_init_for_read(struct page *page)
+static int readpages_iter_init(struct readpages_iter *iter,
+                              struct address_space *mapping,
+                              struct list_head *pages, unsigned nr_pages)
 {
-       struct bch_page_state *s = page_state(page);
+       memset(iter, 0, sizeof(*iter));
 
-       BUG_ON(s->reserved);
-       s->sectors      = 0;
-       s->compressed   = 0;
-}
+       iter->mapping   = mapping;
+       iter->offset    = list_last_entry(pages, struct page, lru)->index;
 
-static int readpage_add_page(struct readpages_iter *iter, struct page *page)
-{
-       int ret;
+       iter->pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS);
+       if (!iter->pages)
+               return -ENOMEM;
 
-       prefetchw(&page->flags);
+       while (!list_empty(pages)) {
+               struct page *page = list_last_entry(pages, struct page, lru);
 
-       ret = add_to_page_cache_lru(page, iter->mapping,
-                                   page->index, GFP_NOFS);
-       if (!ret)
-               page_state_init_for_read(page);
+               prefetchw(&page->flags);
+               iter->pages[iter->nr_pages++] = page;
+               list_del(&page->lru);
+       }
 
-       put_page(page);
-       return ret;
+       return 0;
 }
 
 static inline struct page *readpage_iter_next(struct readpages_iter *iter)
 {
-       while (iter->nr_pages) {
-               struct page *page =
-                       list_last_entry(&iter->pages, struct page, lru);
+       struct page *page;
+       unsigned i;
+       int ret;
 
-               prefetchw(&page->flags);
-               list_del(&page->lru);
-               iter->nr_pages--;
+       BUG_ON(iter->idx > iter->nr_added);
+       BUG_ON(iter->nr_added > iter->nr_pages);
+
+       if (iter->idx < iter->nr_added)
+               goto out;
+
+       while (1) {
+               if (iter->idx == iter->nr_pages)
+                       return NULL;
+
+               ret = add_to_page_cache_lru_vec(iter->mapping,
+                               iter->pages     + iter->nr_added,
+                               iter->nr_pages  - iter->nr_added,
+                               iter->offset    + iter->nr_added,
+                               GFP_NOFS);
+               if (ret > 0)
+                       break;
 
-               if (!readpage_add_page(iter, page))
-                       return page;
+               page = iter->pages[iter->nr_added];
+               iter->idx++;
+               iter->nr_added++;
+
+               put_page(page);
        }
 
-       return NULL;
-}
+       iter->nr_added += ret;
+
+       for (i = iter->idx; i < iter->nr_added; i++)
+               put_page(iter->pages[i]);
+out:
+       EBUG_ON(iter->pages[iter->idx]->index != iter->offset + iter->idx);
 
-#define for_each_readpage_page(_iter, _page)                           \
-       for (;                                                          \
-            ((_page) = __readpage_next_page(&(_iter)));)               \
+       page_state_init_for_read(iter->pages[iter->idx]);
+       return iter->pages[iter->idx];
+}
 
 static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
 {
@@ -851,22 +882,21 @@ static void readpage_bio_extend(struct readpages_iter *iter,
                                struct bio *bio, u64 offset,
                                bool get_more)
 {
-       struct page *page;
-       pgoff_t page_offset;
-       int ret;
-
        while (bio_end_sector(bio) < offset &&
               bio->bi_vcnt < bio->bi_max_vecs) {
-               page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
+               pgoff_t page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
+               struct page *page = readpage_iter_next(iter);
+               int ret;
+
+               if (page) {
+                       if (iter->offset + iter->idx != page_offset)
+                               break;
 
-               if (iter->nr_pages) {
-                       page = list_last_entry(&iter->pages, struct page, lru);
-                       if (page->index != page_offset)
+                       iter->idx++;
+               } else {
+                       if (!get_more)
                                break;
 
-                       list_del(&page->lru);
-                       iter->nr_pages--;
-               } else if (get_more) {
                        rcu_read_lock();
                        page = radix_tree_lookup(&iter->mapping->i_pages, page_offset);
                        rcu_read_unlock();
@@ -878,21 +908,21 @@ static void readpage_bio_extend(struct readpages_iter *iter,
                        if (!page)
                                break;
 
-                       page->index = page_offset;
-                       ClearPageReadahead(bio->bi_io_vec[bio->bi_vcnt - 1].bv_page);
-               } else {
-                       break;
-               }
+                       page_state_init_for_read(page);
 
-               ret = readpage_add_page(iter, page);
-               if (ret)
-                       break;
+                       ret = add_to_page_cache_lru(page, iter->mapping,
+                                                   page_offset, GFP_NOFS);
+                       if (ret) {
+                               ClearPagePrivate(page);
+                               put_page(page);
+                               break;
+                       }
+
+                       put_page(page);
+               }
 
                __bio_add_page(bio, page, PAGE_SIZE, 0);
        }
-
-       if (!iter->nr_pages)
-               SetPageReadahead(bio->bi_io_vec[bio->bi_vcnt - 1].bv_page);
 }
 
 static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
@@ -974,38 +1004,43 @@ int bch2_readpages(struct file *file, struct address_space *mapping,
        struct bch_io_opts opts = io_opts(c, inode);
        struct btree_iter iter;
        struct page *page;
-       struct readpages_iter readpages_iter = {
-               .mapping = mapping, .nr_pages = nr_pages
-       };
+       struct readpages_iter readpages_iter;
+       int ret;
+
+       ret = readpages_iter_init(&readpages_iter, mapping, pages, nr_pages);
+       BUG_ON(ret);
 
        bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
                             BTREE_ITER_SLOTS);
 
-       INIT_LIST_HEAD(&readpages_iter.pages);
-       list_add(&readpages_iter.pages, pages);
-       list_del_init(pages);
-
        if (current->pagecache_lock != &mapping->add_lock)
                pagecache_add_get(&mapping->add_lock);
 
        while ((page = readpage_iter_next(&readpages_iter))) {
-               unsigned n = max_t(unsigned,
-                                  min_t(unsigned, readpages_iter.nr_pages + 1,
-                                        BIO_MAX_PAGES),
-                                  c->sb.encoded_extent_max >> PAGE_SECTOR_SHIFT);
-
+               pgoff_t index = readpages_iter.offset + readpages_iter.idx;
+               unsigned n = min_t(unsigned,
+                                  readpages_iter.nr_pages -
+                                  readpages_iter.idx,
+                                  BIO_MAX_PAGES);
                struct bch_read_bio *rbio =
                        rbio_init(bio_alloc_bioset(GFP_NOFS, n, &c->bio_read),
                                  opts);
 
+               readpages_iter.idx++;
+
+               bio_set_op_attrs(&rbio->bio, REQ_OP_READ, 0);
+               rbio->bio.bi_iter.bi_sector = (sector_t) index << PAGE_SECTOR_SHIFT;
                rbio->bio.bi_end_io = bch2_readpages_end_io;
-               bio_add_page_contig(&rbio->bio, page);
+               __bio_add_page(&rbio->bio, page, PAGE_SIZE, 0);
+
                bchfs_read(c, &iter, rbio, inode->v.i_ino, &readpages_iter);
        }
 
        if (current->pagecache_lock != &mapping->add_lock)
                pagecache_add_put(&mapping->add_lock);
 
+       kfree(readpages_iter.pages);
+
        return 0;
 }
 
index 6d5fc177747a62f36e281e5d8e00669cceda4c5b..e82e272359ee8b5f5c72a002167534f5d2701299 100644 (file)
@@ -374,17 +374,15 @@ fsck_err:
        kfree(d);
        return ret;
 err_redo:
-       bch_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)",
-               buf, strlen(buf));
-
        hash = bch2_dirent_hash_desc.hash_bkey(&h->info, *k);
 
-       if (fsck_err(c, "hash table key at wrong offset: btree %u, offset %llu, "
-                       "hashed to %llu chain starts at %llu\n%s",
-                       BTREE_ID_DIRENTS,
-                       k->k->p.offset, hash, h->chain->pos.offset,
-                       (bch2_bkey_val_to_text(c, bkey_type(0, BTREE_ID_DIRENTS),
-                                              buf, sizeof(buf), *k), buf))) {
+       if (fsck_err(c, "cannot fix dirent by removing trailing garbage %s (%zu)\n"
+                    "hash table key at wrong offset: btree %u, offset %llu, "
+                    "hashed to %llu chain starts at %llu\n%s",
+                    buf, strlen(buf), BTREE_ID_DIRENTS,
+                    k->k->p.offset, hash, h->chain->pos.offset,
+                    (bch2_bkey_val_to_text(c, bkey_type(0, BTREE_ID_DIRENTS),
+                                           buf, sizeof(buf), *k), buf))) {
                ret = hash_redo_key(bch2_dirent_hash_desc,
                                    h, c, iter, *k, hash);
                if (ret)
index 5ca2a2dd83cadcb70c92829c3bff14b256b5fc99..021a80df098fc29cc44f24c218f77469b8c888d4 100644 (file)
@@ -6,7 +6,7 @@
  */
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_foreground.h"
 #include "bset.h"
 #include "btree_update.h"
 #include "buckets.h"
@@ -352,7 +352,7 @@ static void __bch2_write_index(struct bch_write_op *op)
                }
        }
 out:
-       bch2_open_bucket_put_refs(c, &op->open_buckets_nr, op->open_buckets);
+       bch2_open_buckets_put(c, &op->open_buckets);
        return;
 err:
        keys->top = keys->keys;
@@ -798,8 +798,8 @@ static void __bch2_write(struct closure *cl)
 again:
        do {
                /* +1 for possible cache device: */
-               if (op->open_buckets_nr + op->nr_replicas + 1 >
-                   ARRAY_SIZE(op->open_buckets))
+               if (op->open_buckets.nr + op->nr_replicas + 1 >
+                   ARRAY_SIZE(op->open_buckets.v))
                        goto flush_io;
 
                if (bch2_keylist_realloc(&op->insert_keys,
@@ -830,11 +830,7 @@ again:
 
                ret = bch2_write_extent(op, wp);
 
-               BUG_ON(op->open_buckets_nr + wp->nr_ptrs - wp->first_ptr >
-                      ARRAY_SIZE(op->open_buckets));
-               bch2_open_bucket_get(c, wp,
-                                    &op->open_buckets_nr,
-                                    op->open_buckets);
+               bch2_open_bucket_get(c, wp, &op->open_buckets);
                bch2_alloc_sectors_done(c, wp);
 
                if (ret < 0)
index 68539c78f292fa7f648fe1e258ba40fd13294d4c..1724232fd2747c17201e92e87d64520ab04910d2 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef _BCACHEFS_IO_H
 #define _BCACHEFS_IO_H
 
-#include "alloc.h"
 #include "checksum.h"
 #include "io_types.h"
 
@@ -71,7 +70,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
        op->nr_replicas         = 0;
        op->nr_replicas_required = c->opts.data_replicas_required;
        op->alloc_reserve       = RESERVE_NONE;
-       op->open_buckets_nr     = 0;
+       op->open_buckets.nr     = 0;
        op->devs_have.nr        = 0;
        op->target              = 0;
        op->opts                = opts;
index 28281ea6c43a023d2dee03d6dddbfbcd663453ee..fe5779b361c5efbb2b4e0b1aa2fa66eab1c7ba5e 100644 (file)
@@ -103,7 +103,6 @@ struct bch_write_op {
        unsigned                nr_replicas_required:4;
        unsigned                alloc_reserve:4;
 
-       u8                      open_buckets_nr;
        struct bch_devs_list    devs_have;
        u16                     target;
        u16                     nonce;
@@ -120,7 +119,7 @@ struct bch_write_op {
 
        struct disk_reservation res;
 
-       u8                      open_buckets[16];
+       struct open_buckets     open_buckets;
 
        /*
         * If caller wants to flush but hasn't passed us a journal_seq ptr, we
index 634123ebdf1377473304d384da2095aaefa2c66f..f712fd1e425a806193e7a66bbdd85185853b9bd6 100644 (file)
@@ -5,7 +5,7 @@
  */
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_foreground.h"
 #include "bkey_methods.h"
 #include "btree_gc.h"
 #include "buckets.h"
@@ -716,13 +716,13 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                                goto err;
                        }
                } else {
-                       int ob_idx = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, false, cl);
-                       if (ob_idx < 0) {
+                       ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC,
+                                              false, cl);
+                       if (IS_ERR(ob)) {
                                ret = cl ? -EAGAIN : -ENOSPC;
                                goto err;
                        }
 
-                       ob = c->open_buckets + ob_idx;
                        bucket = sector_to_bucket(ca, ob->ptr.offset);
                }
 
index 2a70edc28184e38608f67ed5ea3dbbee345069b7..2f88e2422c5e8e8fafd4e3573ee39679950989ed 100644 (file)
@@ -1,5 +1,6 @@
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
 #include "btree_gc.h"
 #include "btree_update.h"
 #include "buckets.h"
index 4a5e435bfe4b1bffee773c1dc1005d449a6ac403..e75e6e71ecc7ae2f854c896ce5a7a9e731e71019 100644 (file)
@@ -1,5 +1,6 @@
 
 #include "bcachefs.h"
+#include "alloc_foreground.h"
 #include "btree_gc.h"
 #include "btree_update.h"
 #include "buckets.h"
@@ -471,7 +472,7 @@ int bch2_move_data(struct bch_fs *c,
        struct bkey_s_c_extent e;
        struct data_opts data_opts;
        enum data_cmd data_cmd;
-       u64 cur_inum = U64_MAX;
+       u64 delay, cur_inum = U64_MAX;
        int ret = 0, ret2;
 
        closure_init_stack(&ctxt.cl);
@@ -485,12 +486,29 @@ int bch2_move_data(struct bch_fs *c,
        if (rate)
                bch2_ratelimit_reset(rate);
 
-       while (!kthread || !(ret = kthread_should_stop())) {
-               if (rate &&
-                   bch2_ratelimit_delay(rate) &&
-                   (bch2_btree_iter_unlock(&stats->iter),
-                    (ret = bch2_ratelimit_wait_freezable_stoppable(rate))))
-                       break;
+       while (1) {
+               do {
+                       delay = rate ? bch2_ratelimit_delay(rate) : 0;
+
+                       if (delay) {
+                               bch2_btree_iter_unlock(&stats->iter);
+                               set_current_state(TASK_INTERRUPTIBLE);
+                       }
+
+                       if (kthread && (ret = kthread_should_stop())) {
+                               __set_current_state(TASK_RUNNING);
+                               goto out;
+                       }
+
+                       if (delay)
+                               schedule_timeout(delay);
+
+                       if (unlikely(freezing(current))) {
+                               bch2_btree_iter_unlock(&stats->iter);
+                               move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
+                               try_to_freeze();
+                       }
+               } while (delay);
 peek:
                k = bch2_btree_iter_peek(&stats->iter);
                if (!k.k)
@@ -561,7 +579,7 @@ next_nondata:
                bch2_btree_iter_next(&stats->iter);
                bch2_btree_iter_cond_resched(&stats->iter);
        }
-
+out:
        bch2_btree_iter_unlock(&stats->iter);
 
        move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
index d414ee94cc2c3677da2468c512f4871911c25a30..468865625f3122277cc2c5a422bb28b2e6e678c2 100644 (file)
@@ -5,6 +5,7 @@
  */
 
 #include "bcachefs.h"
+#include "alloc_foreground.h"
 #include "btree_iter.h"
 #include "btree_update.h"
 #include "buckets.h"
index 4154b1e97acd205c74d7bc949f5a4cacaafab781..3fbe7b10be35a0f4b8ba7310b6b5b25b91323ba4 100644 (file)
@@ -1,6 +1,6 @@
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_foreground.h"
 #include "btree_iter.h"
 #include "buckets.h"
 #include "clock.h"
@@ -112,7 +112,7 @@ static void rebalance_work_accumulate(struct rebalance_work *w,
                work = U64_MAX;
        work = min(work, capacity);
 
-       percent_full = div_u64(work * 100, capacity);
+       percent_full = div64_u64(work * 100, capacity);
 
        if (percent_full >= w->dev_most_full_percent) {
                w->dev_most_full_idx            = idx;
index 3a20a77474f11106ad05a429de122ac1753b30a6..2b053e61bde7e2c0ea64589bc1d723f5020149f0 100644 (file)
@@ -1,6 +1,6 @@
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_background.h"
 #include "btree_gc.h"
 #include "btree_update.h"
 #include "btree_update_interior.h"
index f4cf44a03394bcf83aded948e349e68de371980f..be28d40f9b4153e42ae68e8f1ae60e3a1a2d146a 100644 (file)
@@ -7,7 +7,8 @@
  */
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_background.h"
+#include "alloc_foreground.h"
 #include "btree_cache.h"
 #include "btree_gc.h"
 #include "btree_update_interior.h"
index b353d7cdb6cdb884544bded4a427185406eeb298..3038b455209f973cf1958d4efa63e7ba84b96623 100644 (file)
@@ -8,7 +8,7 @@
 #ifndef NO_BCACHEFS_SYSFS
 
 #include "bcachefs.h"
-#include "alloc.h"
+#include "alloc_background.h"
 #include "compress.h"
 #include "sysfs.h"
 #include "btree_cache.h"
index 1272ea7a7a2864d7f55c989f07b4f85a2ece8144..5cfaed5b305c12514bd5f01cb6910756d85bd55c 100644 (file)
@@ -421,27 +421,6 @@ void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done)
                d->next = now - NSEC_PER_SEC * 2;
 }
 
-int bch2_ratelimit_wait_freezable_stoppable(struct bch_ratelimit *d)
-{
-       bool kthread = (current->flags & PF_KTHREAD) != 0;
-
-       while (1) {
-               u64 delay = bch2_ratelimit_delay(d);
-
-               if (delay)
-                       set_current_state(TASK_INTERRUPTIBLE);
-
-               if (kthread && kthread_should_stop())
-                       return 1;
-
-               if (!delay)
-                       return 0;
-
-               schedule_timeout(delay);
-               try_to_freeze();
-       }
-}
-
 /* pd controller: */
 
 /*
index 487591c4c19852777dd4a07cadc96aa756849aea..178bf98308e882c6362508241a564b1859b3b15c 100644 (file)
@@ -383,7 +383,6 @@ static inline void bch2_ratelimit_reset(struct bch_ratelimit *d)
 
 u64 bch2_ratelimit_delay(struct bch_ratelimit *);
 void bch2_ratelimit_increment(struct bch_ratelimit *, u64);
-int bch2_ratelimit_wait_freezable_stoppable(struct bch_ratelimit *);
 
 struct bch_pd_controller {
        struct bch_ratelimit    rate;