]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/btree_update.h
Update bcachefs sources to 2272c5f5b7 bcachefs: Mark stripe buckets with correct...
[bcachefs-tools-debian] / libbcachefs / btree_update.h
index b18c44c744449537c5cf0bda8a3637c41c72fd33..ee1d15931022f42a4331d48e70d0f1ffaffee501 100644 (file)
-#ifndef _BCACHE_BTREE_INSERT_H
-#define _BCACHE_BTREE_INSERT_H
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_UPDATE_H
+#define _BCACHEFS_BTREE_UPDATE_H
 
-#include "btree_cache.h"
 #include "btree_iter.h"
-#include "buckets.h"
 #include "journal.h"
-#include "vstructs.h"
 
 struct bch_fs;
-struct bkey_format_state;
-struct bkey_format;
 struct btree;
 
-static inline void btree_node_reset_sib_u64s(struct btree *b)
-{
-       b->sib_u64s[0] = b->nr.live_u64s;
-       b->sib_u64s[1] = b->nr.live_u64s;
-}
-
-struct btree_reserve {
-       struct disk_reservation disk_res;
-       unsigned                nr;
-       struct btree            *b[BTREE_RESERVE_MAX];
-};
-
-void __bch2_btree_calc_format(struct bkey_format_state *, struct btree *);
-bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *,
-                               struct bkey_format *);
-
-/* Btree node freeing/allocation: */
-
-/*
- * Tracks a btree node that has been (or is about to be) freed in memory, but
- * has _not_ yet been freed on disk (because the write that makes the new
- * node(s) visible and frees the old hasn't completed yet)
- */
-struct pending_btree_node_free {
-       bool                    index_update_done;
-
-       __le64                  seq;
-       enum btree_id           btree_id;
-       unsigned                level;
-       __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
-};
-
-/*
- * Tracks an in progress split/rewrite of a btree node and the update to the
- * parent node:
- *
- * When we split/rewrite a node, we do all the updates in memory without
- * waiting for any writes to complete - we allocate the new node(s) and update
- * the parent node, possibly recursively up to the root.
- *
- * The end result is that we have one or more new nodes being written -
- * possibly several, if there were multiple splits - and then a write (updating
- * an interior node) which will make all these new nodes visible.
- *
- * Additionally, as we split/rewrite nodes we free the old nodes - but the old
- * nodes can't be freed (their space on disk can't be reclaimed) until the
- * update to the interior node that makes the new node visible completes -
- * until then, the old nodes are still reachable on disk.
- *
- */
-struct btree_interior_update {
-       struct closure                  cl;
-       struct bch_fs           *c;
-
-       struct list_head                list;
-
-       /* What kind of update are we doing? */
-       enum {
-               BTREE_INTERIOR_NO_UPDATE,
-               BTREE_INTERIOR_UPDATING_NODE,
-               BTREE_INTERIOR_UPDATING_ROOT,
-               BTREE_INTERIOR_UPDATING_AS,
-       } mode;
-
-       /*
-        * BTREE_INTERIOR_UPDATING_NODE:
-        * The update that made the new nodes visible was a regular update to an
-        * existing interior node - @b. We can't write out the update to @b
-        * until the new nodes we created are finished writing, so we block @b
-        * from writing by putting this btree_interior update on the
-        * @b->write_blocked list with @write_blocked_list:
-        */
-       struct btree                    *b;
-       struct list_head                write_blocked_list;
-
-       /*
-        * BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now
-        * we're now blocking another btree_interior_update
-        * @parent_as - btree_interior_update that's waiting on our nodes to finish
-        * writing, before it can make new nodes visible on disk
-        * @wait - list of child btree_interior_updates that are waiting on this
-        * btree_interior_update to make all the new nodes visible before they can free
-        * their old btree nodes
-        */
-       struct btree_interior_update    *parent_as;
-       struct closure_waitlist         wait;
-
-       /*
-        * We may be freeing nodes that were dirty, and thus had journal entries
-        * pinned: we need to transfer the oldest of those pins to the
-        * btree_interior_update operation, and release it when the new node(s)
-        * are all persistent and reachable:
-        */
-       struct journal_entry_pin        journal;
-
-       u64                             journal_seq;
-
-       /*
-        * Nodes being freed:
-        * Protected by c->btree_node_pending_free_lock
-        */
-       struct pending_btree_node_free  pending[BTREE_MAX_DEPTH + GC_MERGE_NODES];
-       unsigned                        nr_pending;
-
-       /* Only here to reduce stack usage on recursive splits: */
-       struct keylist                  parent_keys;
-       /*
-        * Enough room for btree_split's keys without realloc - btree node
-        * pointers never have crc/compression info, so we only need to acount
-        * for the pointers for three keys
-        */
-       u64                             inline_keys[BKEY_BTREE_PTR_U64s_MAX * 3];
+void bch2_btree_node_prep_for_write(struct btree_trans *,
+                                   struct btree_path *, struct btree *);
+bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
+                               struct btree *, struct btree_node_iter *,
+                               struct bkey_i *);
+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
+
+void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
+                               struct bkey_i *, u64);
+
+enum btree_insert_flags {
+       /* First two bits for journal watermark: */
+       __BTREE_INSERT_NOFAIL = 2,
+       __BTREE_INSERT_NOCHECK_RW,
+       __BTREE_INSERT_LAZY_RW,
+       __BTREE_INSERT_USE_RESERVE,
+       __BTREE_INSERT_JOURNAL_REPLAY,
+       __BTREE_INSERT_JOURNAL_RECLAIM,
+       __BTREE_INSERT_NOWAIT,
+       __BTREE_INSERT_GC_LOCK_HELD,
+       __BCH_HASH_SET_MUST_CREATE,
+       __BCH_HASH_SET_MUST_REPLACE,
 };
 
-#define for_each_pending_btree_node_free(c, as, p)                     \
-       list_for_each_entry(as, &c->btree_interior_update_list, list)   \
-               for (p = as->pending; p < as->pending + as->nr_pending; p++)
-
-void bch2_btree_node_free_inmem(struct btree_iter *, struct btree *);
-void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
-void bch2_btree_open_bucket_put(struct bch_fs *c, struct btree *);
-
-struct btree *__bch2_btree_node_alloc_replacement(struct bch_fs *,
-                                            struct btree *,
-                                            struct bkey_format,
-                                            struct btree_reserve *);
-
-struct btree_interior_update *
-bch2_btree_interior_update_alloc(struct bch_fs *);
-
-void bch2_btree_interior_update_will_free_node(struct bch_fs *,
-                                             struct btree_interior_update *,
-                                             struct btree *);
-
-void bch2_btree_set_root_initial(struct bch_fs *, struct btree *,
-                               struct btree_reserve *);
-
-void bch2_btree_reserve_put(struct bch_fs *, struct btree_reserve *);
-struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *,
-                                           struct btree *, unsigned,
-                                           unsigned, struct closure *);
-
-int bch2_btree_root_alloc(struct bch_fs *, enum btree_id, struct closure *);
-
-/* Inserting into a given leaf node (last stage of insert): */
-
-bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
-                              struct btree_node_iter *, struct bkey_i *);
-void bch2_btree_journal_key(struct btree_insert *trans, struct btree_iter *,
-                          struct bkey_i *);
-
-static inline void *btree_data_end(struct bch_fs *c, struct btree *b)
-{
-       return (void *) b->data + btree_bytes(c);
-}
-
-static inline struct bkey_packed *unwritten_whiteouts_start(struct bch_fs *c,
-                                                           struct btree *b)
-{
-       return (void *) ((u64 *) btree_data_end(c, b) - b->whiteout_u64s);
-}
-
-static inline struct bkey_packed *unwritten_whiteouts_end(struct bch_fs *c,
-                                                         struct btree *b)
-{
-       return btree_data_end(c, b);
-}
-
-static inline void *write_block(struct btree *b)
-{
-       return (void *) b->data + (b->written << 9);
-}
-
-static inline bool bset_written(struct btree *b, struct bset *i)
-{
-       return (void *) i < write_block(b);
-}
-
-static inline bool bset_unwritten(struct btree *b, struct bset *i)
-{
-       return (void *) i > write_block(b);
-}
-
-static inline unsigned bset_end_sector(struct bch_fs *c, struct btree *b,
-                                      struct bset *i)
-{
-       return round_up(bset_byte_offset(b, vstruct_end(i)),
-                       block_bytes(c)) >> 9;
-}
+/* Don't check for -ENOSPC: */
+#define BTREE_INSERT_NOFAIL            (1 << __BTREE_INSERT_NOFAIL)
 
-static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
-                                                  struct btree *b)
-{
-       struct bset *i = btree_bset_last(b);
-       unsigned used = bset_byte_offset(b, vstruct_end(i)) / sizeof(u64) +
-               b->whiteout_u64s +
-               b->uncompacted_whiteout_u64s;
-       unsigned total = c->sb.btree_node_size << 6;
+#define BTREE_INSERT_NOCHECK_RW                (1 << __BTREE_INSERT_NOCHECK_RW)
+#define BTREE_INSERT_LAZY_RW           (1 << __BTREE_INSERT_LAZY_RW)
 
-       EBUG_ON(used > total);
+/* for copygc, or when merging btree nodes */
+#define BTREE_INSERT_USE_RESERVE       (1 << __BTREE_INSERT_USE_RESERVE)
 
-       if (bset_written(b, i))
-               return 0;
+/* Insert is for journal replay - don't get journal reservations: */
+#define BTREE_INSERT_JOURNAL_REPLAY    (1 << __BTREE_INSERT_JOURNAL_REPLAY)
 
-       return total - used;
-}
+/* Insert is being called from journal reclaim path: */
+#define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)
 
-static inline unsigned btree_write_set_buffer(struct btree *b)
-{
-       /*
-        * Could buffer up larger amounts of keys for btrees with larger keys,
-        * pending benchmarking:
-        */
-       return 4 << 10;
-}
-
-static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,
-                                                    struct btree *b)
-{
-       struct bset *i = btree_bset_last(b);
-       unsigned offset = max_t(unsigned, b->written << 9,
-                               bset_byte_offset(b, vstruct_end(i)));
-       ssize_t n = (ssize_t) btree_bytes(c) - (ssize_t)
-               (offset + sizeof(struct btree_node_entry) +
-                b->whiteout_u64s * sizeof(u64) +
-                b->uncompacted_whiteout_u64s * sizeof(u64));
-
-       EBUG_ON(offset > btree_bytes(c));
-
-       if ((unlikely(bset_written(b, i)) && n > 0) ||
-           (unlikely(vstruct_bytes(i) > btree_write_set_buffer(b)) &&
-            n > btree_write_set_buffer(b)))
-               return (void *) b->data + offset;
-
-       return NULL;
-}
+/* Don't block on allocation failure (for new btree nodes: */
+#define BTREE_INSERT_NOWAIT            (1 << __BTREE_INSERT_NOWAIT)
+#define BTREE_INSERT_GC_LOCK_HELD      (1 << __BTREE_INSERT_GC_LOCK_HELD)
 
-/*
- * write lock must be held on @b (else the dirty bset that we were going to
- * insert into could be written out from under us)
- */
-static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
-                                             struct btree *b, unsigned u64s)
-{
-       if (btree_node_is_extents(b)) {
-               /* The insert key might split an existing key
-                * (bch2_insert_fixup_extent() -> BCH_EXTENT_OVERLAP_MIDDLE case:
-                */
-               u64s += BKEY_EXTENT_U64s_MAX;
-       }
+#define BCH_HASH_SET_MUST_CREATE       (1 << __BCH_HASH_SET_MUST_CREATE)
+#define BCH_HASH_SET_MUST_REPLACE      (1 << __BCH_HASH_SET_MUST_REPLACE)
 
-       return u64s <= bch_btree_keys_u64s_remaining(c, b);
-}
+int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
+                               unsigned, unsigned);
+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
 
-static inline void unreserve_whiteout(struct btree *b, struct bset_tree *t,
-                                     struct bkey_packed *k)
-{
-       if (bset_written(b, bset(b, t))) {
-               EBUG_ON(b->uncompacted_whiteout_u64s <
-                       bkeyp_key_u64s(&b->format, k));
-               b->uncompacted_whiteout_u64s -=
-                       bkeyp_key_u64s(&b->format, k);
-       }
-}
+int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
+                               struct bkey_i *, enum btree_update_flags);
 
-static inline void reserve_whiteout(struct btree *b, struct bset_tree *t,
-                                   struct bkey_packed *k)
-{
-       if (bset_written(b, bset(b, t))) {
-               BUG_ON(!k->needs_whiteout);
-               b->uncompacted_whiteout_u64s +=
-                       bkeyp_key_u64s(&b->format, k);
-       }
-}
+int __bch2_btree_insert(struct btree_trans *, enum btree_id, struct bkey_i *,
+                       enum btree_update_flags);
+int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
+                    struct disk_reservation *, u64 *, int flags);
 
-void bch2_btree_insert_node(struct btree *, struct btree_iter *,
-                          struct keylist *, struct btree_reserve *,
-                          struct btree_interior_update *as);
-
-/* Normal update interface: */
-
-struct btree_insert {
-       struct bch_fs   *c;
-       struct disk_reservation *disk_res;
-       struct journal_res      journal_res;
-       u64                     *journal_seq;
-       struct extent_insert_hook *hook;
-       unsigned                flags;
-       bool                    did_work;
-
-       unsigned short          nr;
-       struct btree_insert_entry {
-               struct btree_iter *iter;
-               struct bkey_i   *k;
-               unsigned        extra_res;
-               /*
-                * true if entire key was inserted - can only be false for
-                * extents
-                */
-               bool            done;
-       }                       *entries;
-};
+int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
+                                 struct bpos, struct bpos, unsigned, u64 *);
+int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
+                           struct bpos, struct bpos, unsigned, u64 *);
 
-int __bch2_btree_insert_at(struct btree_insert *);
+int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *,
+                           struct btree *, unsigned);
+void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *);
+int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
+                              struct btree *, struct bkey_i *, bool);
+int bch2_btree_node_update_key_get_iter(struct btree_trans *,
+                               struct btree *, struct bkey_i *, bool);
 
+int bch2_trans_update_extent(struct btree_trans *, struct btree_iter *,
+                            struct bkey_i *, enum btree_update_flags);
 
-#define _TENTH_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, N, ...)   N
-#define COUNT_ARGS(...)  _TENTH_ARG(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
+int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
+                                  struct bkey_i *, enum btree_update_flags);
+int __must_check bch2_trans_update_buffered(struct btree_trans *,
+                                           enum btree_id, struct bkey_i *);
 
-#define BTREE_INSERT_ENTRY(_iter, _k)                                  \
-       ((struct btree_insert_entry) {                                  \
-               .iter           = (_iter),                              \
-               .k              = (_k),                                 \
-               .done           = false,                                \
-       })
+void bch2_trans_commit_hook(struct btree_trans *,
+                           struct btree_trans_commit_hook *);
+int __bch2_trans_commit(struct btree_trans *, unsigned);
 
-#define BTREE_INSERT_ENTRY_EXTRA_RES(_iter, _k, _extra)                        \
-       ((struct btree_insert_entry) {                                  \
-               .iter           = (_iter),                              \
-               .k              = (_k),                                 \
-               .extra_res = (_extra),                                  \
-               .done           = false,                                \
-       })
+int bch2_trans_log_msg(struct btree_trans *, const char *, ...);
+int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
 
 /**
- * bch_btree_insert_at - insert one or more keys at iterator positions
- * @iter:              btree iterator
- * @insert_key:                key to insert
- * @disk_res:          disk reservation
- * @hook:              extent insert callback
+ * bch2_trans_commit - insert keys at given iterator positions
+ *
+ * This is main entry point for btree updates.
  *
  * Return values:
- * -EINTR: locking changed, this function should be called again. Only returned
- *  if passed BTREE_INSERT_ATOMIC.
  * -EROFS: filesystem read only
  * -EIO: journal or btree node IO error
  */
-#define bch2_btree_insert_at(_c, _disk_res, _hook,                     \
-                           _journal_seq, _flags, ...)                  \
-       __bch2_btree_insert_at(&(struct btree_insert) {                 \
-               .c              = (_c),                                 \
-               .disk_res       = (_disk_res),                          \
-               .journal_seq    = (_journal_seq),                       \
-               .hook           = (_hook),                              \
-               .flags          = (_flags),                             \
-               .nr             = COUNT_ARGS(__VA_ARGS__),              \
-               .entries        = (struct btree_insert_entry[]) {       \
-                       __VA_ARGS__                                     \
-               }})
-
-/*
- * Don't drop/retake locks: instead return -EINTR if need to upgrade to intent
- * locks, -EAGAIN if need to wait on btree reserve
- */
-#define BTREE_INSERT_ATOMIC            (1 << 0)
-
-/* Don't check for -ENOSPC: */
-#define BTREE_INSERT_NOFAIL            (1 << 1)
-
-/* for copygc, or when merging btree nodes */
-#define BTREE_INSERT_USE_RESERVE       (1 << 2)
-
-/*
- * Insert is for journal replay: don't get journal reservations, or mark extents
- * (bch_mark_key)
- */
-#define BTREE_INSERT_JOURNAL_REPLAY    (1 << 3)
+static inline int bch2_trans_commit(struct btree_trans *trans,
+                                   struct disk_reservation *disk_res,
+                                   u64 *journal_seq,
+                                   unsigned flags)
+{
+       trans->disk_res         = disk_res;
+       trans->journal_seq      = journal_seq;
 
-int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *,
-                            struct disk_reservation *,
-                            struct extent_insert_hook *, u64 *, unsigned);
+       return __bch2_trans_commit(trans, flags);
+}
 
-static inline bool journal_res_insert_fits(struct btree_insert *trans,
-                                          struct btree_insert_entry *insert)
+#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do)        \
+       lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
+                                       (_journal_seq), (_flags)))
+
+#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
+       nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
+                                       (_journal_seq), (_flags)))
+
+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)                \
+({                                                                     \
+       struct btree_trans trans;                                       \
+       int _ret;                                                       \
+                                                                       \
+       bch2_trans_init(&trans, (_c), 0, 0);                            \
+       _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \
+       bch2_trans_exit(&trans);                                        \
+                                                                       \
+       _ret;                                                           \
+})
+
+#define bch2_trans_run(_c, _do)                                                \
+({                                                                     \
+       struct btree_trans trans;                                       \
+       int _ret;                                                       \
+                                                                       \
+       bch2_trans_init(&trans, (_c), 0, 0);                            \
+       _ret = (_do);                                                   \
+       bch2_trans_exit(&trans);                                        \
+                                                                       \
+       _ret;                                                           \
+})
+
+#define trans_for_each_update(_trans, _i)                              \
+       for ((_i) = (_trans)->updates;                                  \
+            (_i) < (_trans)->updates + (_trans)->nr_updates;           \
+            (_i)++)
+
+#define trans_for_each_wb_update(_trans, _i)                           \
+       for ((_i) = (_trans)->wb_updates;                               \
+            (_i) < (_trans)->wb_updates + (_trans)->nr_wb_updates;     \
+            (_i)++)
+
+static inline void bch2_trans_reset_updates(struct btree_trans *trans)
 {
-       unsigned u64s = 0;
        struct btree_insert_entry *i;
 
-       /*
-        * If we didn't get a journal reservation, we're in journal replay and
-        * we're not journalling updates:
-        */
-       if (!trans->journal_res.ref)
-               return true;
-
-       for (i = insert; i < trans->entries + trans->nr; i++)
-               u64s += jset_u64s(i->k->k.u64s + i->extra_res);
-
-       return u64s <= trans->journal_res.u64s;
+       trans_for_each_update(trans, i)
+               bch2_path_put(trans, i->path, true);
+
+       trans->extra_journal_res        = 0;
+       trans->nr_updates               = 0;
+       trans->nr_wb_updates            = 0;
+       trans->wb_updates               = NULL;
+       trans->hooks                    = NULL;
+       trans->extra_journal_entries.nr = 0;
+
+       if (trans->fs_usage_deltas) {
+               trans->fs_usage_deltas->used = 0;
+               memset((void *) trans->fs_usage_deltas +
+                      offsetof(struct replicas_delta_list, memset_start), 0,
+                      (void *) &trans->fs_usage_deltas->memset_end -
+                      (void *) &trans->fs_usage_deltas->memset_start);
+       }
 }
 
-int bch2_btree_insert_check_key(struct btree_iter *, struct bkey_i *);
-int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
-                    struct disk_reservation *,
-                    struct extent_insert_hook *, u64 *, int flags);
-int bch2_btree_update(struct bch_fs *, enum btree_id,
-                    struct bkey_i *, u64 *);
-
-int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
-                          struct bpos, struct bpos, struct bversion,
-                          struct disk_reservation *,
-                          struct extent_insert_hook *, u64 *);
-
-int bch2_btree_node_rewrite(struct btree_iter *, struct btree *, struct closure *);
-
-#endif /* _BCACHE_BTREE_INSERT_H */
-
+#endif /* _BCACHEFS_BTREE_UPDATE_H */