#include <linux/rhashtable.h>
#include <linux/six.h>
-#include "bkey_methods.h"
+//#include "bkey_methods.h"
#include "buckets_types.h"
+#include "darray.h"
#include "journal_types.h"
struct open_bucket;
struct six_lock lock;
u8 level;
u8 btree_id;
+ bool cached;
};
struct btree {
struct mutex lock;
struct list_head live;
struct list_head freeable;
- struct list_head freed;
+ struct list_head freed_pcpu;
+ struct list_head freed_nonpcpu;
/* Number of elements in live + freeable lists */
unsigned used;
unsigned reserve;
+ unsigned freed;
+ unsigned not_freed_lock_intent;
+ unsigned not_freed_lock_write;
+ unsigned not_freed_dirty;
+ unsigned not_freed_read_in_flight;
+ unsigned not_freed_write_in_flight;
+ unsigned not_freed_noevict;
+ unsigned not_freed_write_blocked;
+ unsigned not_freed_will_make_reachable;
+ unsigned not_freed_access_bit;
atomic_t dirty;
struct shrinker shrink;
* Iterate over all possible positions, synthesizing deleted keys for holes:
*/
#define BTREE_ITER_SLOTS (1 << 0)
+#define BTREE_ITER_ALL_LEVELS (1 << 1)
/*
* Indicates that intent locks should be taken on leaf nodes, because we expect
* to be doing updates:
*/
-#define BTREE_ITER_INTENT (1 << 1)
+#define BTREE_ITER_INTENT (1 << 2)
/*
* Causes the btree iterator code to prefetch additional btree nodes from disk:
*/
-#define BTREE_ITER_PREFETCH (1 << 2)
-/*
- * Indicates that this iterator should not be reused until transaction commit,
- * either because a pending update references it or because the update depends
- * on that particular key being locked (e.g. by the str_hash code, for hash
- * table consistency)
- */
-#define BTREE_ITER_KEEP_UNTIL_COMMIT (1 << 3)
+#define BTREE_ITER_PREFETCH (1 << 3)
/*
* Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
* @pos or the first key strictly greater than @pos
#define BTREE_ITER_IS_EXTENTS (1 << 4)
#define BTREE_ITER_NOT_EXTENTS (1 << 5)
#define BTREE_ITER_CACHED (1 << 6)
-#define BTREE_ITER_CACHED_NOFILL (1 << 7)
-#define BTREE_ITER_CACHED_NOCREATE (1 << 8)
-#define BTREE_ITER_WITH_KEY_CACHE (1 << 9)
-#define BTREE_ITER_WITH_UPDATES (1 << 10)
-#define BTREE_ITER_WITH_JOURNAL (1 << 11)
-#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
-#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13)
-#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14)
-#define BTREE_ITER_NOPRESERVE (1 << 15)
+#define BTREE_ITER_WITH_KEY_CACHE (1 << 7)
+#define BTREE_ITER_WITH_UPDATES (1 << 8)
+#define BTREE_ITER_WITH_JOURNAL (1 << 9)
+#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 10)
+#define BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
+#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 12)
+#define BTREE_ITER_NOPRESERVE (1 << 13)
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,
BTREE_ITER_NEED_TRAVERSE = 2,
};
-#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1)
-#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2)
-#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3)
-#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4)
-#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5)
-#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6)
-#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7)
-#define BTREE_ITER_NO_NODE_CACHED ((struct btree *) 8)
-
struct btree_path {
u8 idx;
u8 sorted_idx;
*/
bool should_be_locked:1;
unsigned level:3,
- locks_want:4,
- nodes_locked:4,
- nodes_intent_locked:4;
+ locks_want:4;
+ u8 nodes_locked;
struct btree_path_level {
struct btree *b;
struct btree_node_iter iter;
u32 lock_seq;
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ u64 lock_taken_time;
+#endif
} l[BTREE_MAX_DEPTH];
#ifdef CONFIG_BCACHEFS_DEBUG
unsigned long ip_allocated;
struct btree_path *key_cache_path;
enum btree_id btree_id:4;
- unsigned min_depth:4;
+ unsigned min_depth:3;
+ unsigned advanced:1;
/* btree_iter_copy starts here: */
u16 flags;
* bch2_btree_iter_next_slot() can correctly advance pos.
*/
struct bkey k;
+
+ /* BTREE_ITER_WITH_JOURNAL: */
+ size_t journal_idx;
+ struct bpos journal_pos;
#ifdef CONFIG_BCACHEFS_DEBUG
unsigned long ip_allocated;
#endif
};
+struct btree_key_cache_freelist {
+ struct bkey_cached *objs[16];
+ unsigned nr;
+};
+
struct btree_key_cache {
struct mutex lock;
struct rhashtable table;
bool table_init_done;
- struct list_head freed;
+ struct list_head freed_pcpu;
+ struct list_head freed_nonpcpu;
struct shrinker shrink;
unsigned shrink_iter;
+ struct btree_key_cache_freelist __percpu *pcpu_freed;
- size_t nr_freed;
+ atomic_long_t nr_freed;
atomic_long_t nr_keys;
atomic_long_t nr_dirty;
};
struct btree_bkey_cached_common c;
unsigned long flags;
- u8 u64s;
+ u16 u64s;
bool valid;
u32 btree_trans_barrier_seq;
struct bkey_cached_key key;
struct bkey_i *k;
};
+static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
+{
+ return !b->cached
+ ? container_of(b, struct btree, c)->key.k.p
+ : container_of(b, struct bkey_cached, c)->key.pos;
+}
+
struct btree_insert_entry {
unsigned flags;
u8 bkey_type;
enum btree_id btree_id:8;
- u8 level;
+ u8 level:4;
bool cached:1;
bool insert_trigger_run:1;
bool overwrite_trigger_run:1;
+ bool key_cache_already_flushed:1;
+ /*
+ * @old_k may be a key from the journal; @old_btree_u64s always refers
+ * to the size of the key being overwritten in the btree:
+ */
+ u8 old_btree_u64s;
struct bkey_i *k;
struct btree_path *path;
+ /* key being overwritten: */
+ struct bkey old_k;
+ const struct bch_val *old_v;
unsigned long ip_allocated;
};
struct btree_trans_commit_hook *next;
};
-#define BTREE_TRANS_MEM_MAX (1U << 14)
+#define BTREE_TRANS_MEM_MAX (1U << 16)
+
+#define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS 10000
struct btree_trans {
struct bch_fs *c;
const char *fn;
+ struct closure ref;
struct list_head list;
- struct btree *locking;
- unsigned locking_path_idx;
- struct bpos locking_pos;
- u8 locking_btree_id;
- u8 locking_level;
- u8 locking_lock_type;
- pid_t pid;
+ u64 last_begin_time;
+
+ u8 lock_may_not_fail;
+ u8 lock_must_abort;
+ struct btree_bkey_cached_common *locking;
+ struct six_lock_waiter locking_wait;
+
int srcu_idx;
+ u8 fn_idx;
u8 nr_sorted;
u8 nr_updates;
+ u8 traverse_all_idx;
bool used_mempool:1;
bool in_traverse_all:1;
- bool restarted:1;
- bool journal_transaction_names:1;
+ bool memory_allocation_failure:1;
+ bool is_initial_gc:1;
+ bool journal_replay_not_finished:1;
+ enum bch_errcode restarted:16;
+ u32 restart_count;
+ unsigned long last_restarted_ip;
+
/*
* For when bch2_trans_update notices we'll be splitting a compressed
* extent:
*/
unsigned extra_journal_res;
+ unsigned nr_max_paths;
u64 paths_allocated;
unsigned mem_top;
+ unsigned mem_max;
unsigned mem_bytes;
void *mem;
/* update path: */
struct btree_trans_commit_hook *hooks;
- struct jset_entry *extra_journal_entries;
- unsigned extra_journal_entry_u64s;
+ DARRAY(u64) extra_journal_entries;
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;
struct replicas_delta_list *fs_usage_deltas;
};
-#define BTREE_FLAG(flag) \
+#define BTREE_FLAGS() \
+ x(read_in_flight) \
+ x(read_error) \
+ x(dirty) \
+ x(need_write) \
+ x(write_blocked) \
+ x(will_make_reachable) \
+ x(noevict) \
+ x(write_idx) \
+ x(accessed) \
+ x(write_in_flight) \
+ x(write_in_flight_inner) \
+ x(just_written) \
+ x(dying) \
+ x(fake) \
+ x(need_rewrite) \
+ x(never_write)
+
+enum btree_flags {
+#define x(flag) BTREE_NODE_##flag,
+ BTREE_FLAGS()
+#undef x
+};
+
+#define x(flag) \
static inline bool btree_node_ ## flag(struct btree *b) \
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
\
static inline void clear_btree_node_ ## flag(struct btree *b) \
{ clear_bit(BTREE_NODE_ ## flag, &b->flags); }
-enum btree_flags {
- BTREE_NODE_read_in_flight,
- BTREE_NODE_read_error,
- BTREE_NODE_dirty,
- BTREE_NODE_need_write,
- BTREE_NODE_noevict,
- BTREE_NODE_write_idx,
- BTREE_NODE_accessed,
- BTREE_NODE_write_in_flight,
- BTREE_NODE_write_in_flight_inner,
- BTREE_NODE_just_written,
- BTREE_NODE_dying,
- BTREE_NODE_fake,
- BTREE_NODE_need_rewrite,
- BTREE_NODE_never_write,
-};
-
-BTREE_FLAG(read_in_flight);
-BTREE_FLAG(read_error);
-BTREE_FLAG(need_write);
-BTREE_FLAG(noevict);
-BTREE_FLAG(write_idx);
-BTREE_FLAG(accessed);
-BTREE_FLAG(write_in_flight);
-BTREE_FLAG(write_in_flight_inner);
-BTREE_FLAG(just_written);
-BTREE_FLAG(dying);
-BTREE_FLAG(fake);
-BTREE_FLAG(need_rewrite);
-BTREE_FLAG(never_write);
+BTREE_FLAGS()
+#undef x
static inline struct btree_write *btree_current_write(struct btree *b)
{
return __btree_node_type(b->c.level, b->c.btree_id);
}
-static inline bool btree_node_type_is_extents(enum btree_node_type type)
-{
- switch (type) {
- case BKEY_TYPE_extents:
- case BKEY_TYPE_reflink:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool btree_node_is_extents(struct btree *b)
-{
- return btree_node_type_is_extents(btree_node_type(b));
-}
-
#define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \
((1U << BKEY_TYPE_extents)| \
+ (1U << BKEY_TYPE_alloc)| \
(1U << BKEY_TYPE_inodes)| \
(1U << BKEY_TYPE_stripes)| \
(1U << BKEY_TYPE_reflink)| \
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
+#define BTREE_ID_IS_EXTENTS \
+ ((1U << BTREE_ID_extents)| \
+ (1U << BTREE_ID_reflink)| \
+ (1U << BTREE_ID_freespace))
+
+static inline bool btree_node_type_is_extents(enum btree_node_type type)
+{
+ return (1U << type) & BTREE_ID_IS_EXTENTS;
+}
+
#define BTREE_ID_HAS_SNAPSHOTS \
((1U << BTREE_ID_extents)| \
(1U << BTREE_ID_inodes)| \
return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
}
-enum btree_update_flags {
- __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
- __BTREE_UPDATE_KEY_CACHE_RECLAIM,
-
- __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
-
- __BTREE_TRIGGER_INSERT,
- __BTREE_TRIGGER_OVERWRITE,
-
- __BTREE_TRIGGER_GC,
- __BTREE_TRIGGER_BUCKET_INVALIDATE,
- __BTREE_TRIGGER_NOATOMIC,
-};
-
-#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
-#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
-
-#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
-
-#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
-#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
-
-#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
-#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
-
-#define BTREE_TRIGGER_WANTS_OLD_AND_NEW \
- ((1U << KEY_TYPE_alloc)| \
- (1U << KEY_TYPE_alloc_v2)| \
- (1U << KEY_TYPE_alloc_v3)| \
- (1U << KEY_TYPE_stripe)| \
- (1U << KEY_TYPE_inode)| \
- (1U << KEY_TYPE_inode_v2)| \
- (1U << KEY_TYPE_snapshot))
+static inline bool btree_type_has_ptrs(enum btree_id id)
+{
+ return (1 << id) & BTREE_ID_HAS_PTRS;
+}
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
{