From: Kent Overstreet Date: Wed, 28 Jul 2021 21:26:38 +0000 (-0400) Subject: Update bcachefs sources to b964c6cba8 bcachefs: Change lockrestart_do() to always... X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=f06b01e9eacca7cd23679ee92f3d082c9352263f;p=bcachefs-tools-debian Update bcachefs sources to b964c6cba8 bcachefs: Change lockrestart_do() to always call bch2_trans_begin() --- diff --git a/.bcachefs_revision b/.bcachefs_revision index 8d922cc..f6620a2 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -0a9be96b5087967ac57007a131614ab6b4cadce4 +b964c6cba873eb5d2ebd0174876b664730e69a73 diff --git a/Kbuild.include b/Kbuild.include deleted file mode 100644 index 509e085..0000000 --- a/Kbuild.include +++ /dev/null @@ -1,319 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -#### -# kbuild: Generic definitions - -# Convenient variables -comma := , -quote := " -squote := ' -empty := -space := $(empty) $(empty) -space_escape := _-_SPACE_-_ -pound := \# - -### -# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o -dot-target = $(dir $@).$(notdir $@) - -### -# The temporary file to save gcc -MMD generated dependencies must not -# contain a comma -depfile = $(subst $(comma),_,$(dot-target).d) - -### -# filename of target with directory and extension stripped -basetarget = $(basename $(notdir $@)) - -### -# real prerequisites without phony targets -real-prereqs = $(filter-out $(PHONY), $^) - -### -# Escape single quote for use in echo statements -escsq = $(subst $(squote),'\$(squote)',$1) - -### -# Quote a string to pass it to C files. foo => '"foo"' -stringify = $(squote)$(quote)$1$(quote)$(squote) - -### -# Easy method for doing a status message - kecho := : - quiet_kecho := echo -silent_kecho := : -kecho := $($(quiet)kecho) - -### -# filechk is used to check if the content of a generated file is updated. -# Sample usage: -# -# filechk_sample = echo $(KERNELRELEASE) -# version.h: FORCE -# $(call filechk,sample) -# -# The rule defined shall write to stdout the content of the new file. -# The existing file will be compared with the new one. -# - If no file exist it is created -# - If the content differ the new file is used -# - If they are equal no change, and no timestamp update -define filechk - $(Q)set -e; \ - mkdir -p $(dir $@); \ - trap "rm -f $(dot-target).tmp" EXIT; \ - { $(filechk_$(1)); } > $(dot-target).tmp; \ - if [ ! -r $@ ] || ! cmp -s $@ $(dot-target).tmp; then \ - $(kecho) ' UPD $@'; \ - mv -f $(dot-target).tmp $@; \ - fi -endef - -###### -# gcc support functions -# See documentation in Documentation/kbuild/makefiles.rst - -# cc-cross-prefix -# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-) -# Return first where a gcc is found in PATH. -# If no gcc found in PATH with listed prefixes return nothing -# -# Note: '2>/dev/null' is here to force Make to invoke a shell. Otherwise, it -# would try to directly execute the shell builtin 'command'. This workaround -# should be kept for a long time since this issue was fixed only after the -# GNU Make 4.2.1 release. -cc-cross-prefix = $(firstword $(foreach c, $(1), \ - $(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c)))) - -# output directory for tests below -TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$ - -# try-run -# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) -# Exit code chooses option. "$$TMP" serves as a temporary file and is -# automatically cleaned up. -try-run = $(shell set -e; \ - TMP=$(TMPOUT)/tmp; \ - TMPO=$(TMPOUT)/tmp.o; \ - mkdir -p $(TMPOUT); \ - trap "rm -rf $(TMPOUT)" EXIT; \ - if ($(1)) >/dev/null 2>&1; \ - then echo "$(2)"; \ - else echo "$(3)"; \ - fi) - -# as-option -# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,) - -as-option = $(call try-run,\ - $(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2)) - -# as-instr -# Usage: cflags-y += $(call as-instr,instr,option1,option2) - -as-instr = $(call try-run,\ - printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) - -# __cc-option -# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) -__cc-option = $(call try-run,\ - $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4)) - -# cc-option -# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) - -cc-option = $(call __cc-option, $(CC),\ - $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2)) - -# cc-option-yn -# Usage: flag := $(call cc-option-yn,-march=winchip-c6) -cc-option-yn = $(call try-run,\ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) - -# cc-disable-warning -# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable) -cc-disable-warning = $(call try-run,\ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) - -# cc-ifversion -# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) -cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4)) - -# ld-option -# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) -ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) - -# ld-ifversion -# Usage: $(call ld-ifversion, -ge, 22252, y) -ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4)) - -###### - -### -# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.build obj= -# Usage: -# $(Q)$(MAKE) $(build)=dir -build := -f $(srctree)/scripts/Makefile.build obj - -### -# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.dtbinst obj= -# Usage: -# $(Q)$(MAKE) $(dtbinst)=dir -dtbinst := -f $(srctree)/scripts/Makefile.dtbinst obj - -### -# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj= -# Usage: -# $(Q)$(MAKE) $(clean)=dir -clean := -f $(srctree)/scripts/Makefile.clean obj - -# echo command. -# Short version is used, if $(quiet) equals `quiet_', otherwise full one. -echo-cmd = $(if $($(quiet)cmd_$(1)),\ - echo ' $(call escsq,$($(quiet)cmd_$(1)))$(echo-why)';) - -# printing commands -cmd = @set -e; $(echo-cmd) $(cmd_$(1)) - -### -# if_changed - execute command if any prerequisite is newer than -# target, or command line has changed -# if_changed_dep - as if_changed, but uses fixdep to reveal dependencies -# including used config symbols -# if_changed_rule - as if_changed but execute rule instead -# See Documentation/kbuild/makefiles.rst for more info - -ifneq ($(KBUILD_NOCMDDEP),1) -# Check if both commands are the same including their order. Result is empty -# string if equal. User may override this check using make KBUILD_NOCMDDEP=1 -cmd-check = $(filter-out $(subst $(space),$(space_escape),$(strip $(cmd_$@))), \ - $(subst $(space),$(space_escape),$(strip $(cmd_$1)))) -else -cmd-check = $(if $(strip $(cmd_$@)),,1) -endif - -# Replace >$< with >$$< to preserve $ when reloading the .cmd file -# (needed for make) -# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file -# (needed for make) -# Replace >'< with >'\''< to be able to enclose the whole string in '...' -# (needed for the shell) -make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1))))) - -# Find any prerequisites that are newer than target or that do not exist. -# (This is not true for now; $? should contain any non-existent prerequisites, -# but it does not work as expected when .SECONDARY is present. This seems a bug -# of GNU Make.) -# PHONY targets skipped in both cases. -newer-prereqs = $(filter-out $(PHONY),$?) - -# Execute command if command has changed or prerequisite(s) are updated. -if_changed = $(if $(newer-prereqs)$(cmd-check), \ - $(cmd); \ - printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) - -# Execute the command and also postprocess generated .d dependencies file. -if_changed_dep = $(if $(newer-prereqs)$(cmd-check),$(cmd_and_fixdep),@:) - -cmd_and_fixdep = \ - $(cmd); \ - scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).cmd;\ - rm -f $(depfile) - -# Usage: $(call if_changed_rule,foo) -# Will check if $(cmd_foo) or any of the prerequisites changed, -# and if so will execute $(rule_foo). -if_changed_rule = $(if $(newer-prereqs)$(cmd-check),$(rule_$(1)),@:) - -### -# why - tell why a target got built -# enabled by make V=2 -# Output (listed in the order they are checked): -# (1) - due to target is PHONY -# (2) - due to target missing -# (3) - due to: file1.h file2.h -# (4) - due to command line change -# (5) - due to missing .cmd file -# (6) - due to target not in $(targets) -# (1) PHONY targets are always build -# (2) No target, so we better build it -# (3) Prerequisite is newer than target -# (4) The command line stored in the file named dir/.target.cmd -# differed from actual command line. This happens when compiler -# options changes -# (5) No dir/.target.cmd file (used to store command line) -# (6) No dir/.target.cmd file and target not listed in $(targets) -# This is a good hint that there is a bug in the kbuild file -ifeq ($(KBUILD_VERBOSE),2) -why = \ - $(if $(filter $@, $(PHONY)),- due to target is PHONY, \ - $(if $(wildcard $@), \ - $(if $(newer-prereqs),- due to: $(newer-prereqs), \ - $(if $(cmd-check), \ - $(if $(cmd_$@),- due to command line change, \ - $(if $(filter $@, $(targets)), \ - - due to missing .cmd file, \ - - due to $(notdir $@) not in $$(targets) \ - ) \ - ) \ - ) \ - ), \ - - due to target missing \ - ) \ - ) - -echo-why = $(call escsq, $(strip $(why))) -endif - -############################################################################### -# -# When a Kconfig string contains a filename, it is suitable for -# passing to shell commands. It is surrounded by double-quotes, and -# any double-quotes or backslashes within it are escaped by -# backslashes. -# -# This is no use for dependencies or $(wildcard). We need to strip the -# surrounding quotes and the escaping from quotes and backslashes, and -# we *do* need to escape any spaces in the string. So, for example: -# -# Usage: $(eval $(call config_filename,FOO)) -# -# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option, -# transformed as described above to be suitable for use within the -# makefile. -# -# Also, if the filename is a relative filename and exists in the source -# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to -# be prefixed to *both* command invocation and dependencies. -# -# Note: We also print the filenames in the quiet_cmd_foo text, and -# perhaps ought to have a version specially escaped for that purpose. -# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good -# enough. It'll strip the quotes in the common case where there's no -# space and it's a simple filename, and it'll retain the quotes when -# there's a space. There are some esoteric cases in which it'll print -# the wrong thing, but we don't really care. The actual dependencies -# and commands *do* get it right, with various combinations of single -# and double quotes, backslashes and spaces in the filenames. -# -############################################################################### -# -define config_filename -ifneq ($$(CONFIG_$(1)),"") -$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1))))))) -ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME))) -else -ifeq ($$(wildcard $$($(1)_FILENAME)),) -ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),) -$(1)_SRCPREFIX := $(srctree)/ -endif -endif -endif -endif -endef -# -############################################################################### - -# delete partially updated (i.e. corrupted) files on error -.DELETE_ON_ERROR: - -# do not delete intermediate files automatically -.SECONDARY: diff --git a/Makefile b/Makefile index 596357c..23e0508 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ LDFLAGS+=$(CFLAGS) $(EXTRA_LDFLAGS) VERSION?=$(shell git describe --dirty=+ 2>/dev/null || echo v0.1-nogit) -include Kbuild.include +include Makefile.compiler CFLAGS+=$(call cc-disable-warning, unused-but-set-variable) CFLAGS+=$(call cc-disable-warning, stringop-overflow) @@ -178,8 +178,8 @@ update-bcachefs-sources: git add include/linux/list_nulls.h cp $(LINUX_DIR)/include/linux/poison.h include/linux/ git add include/linux/poison.h - cp $(LINUX_DIR)/scripts/Kbuild.include ./ - git add Kbuild.include + cp $(LINUX_DIR)/scripts/Makefile.compiler ./ + git add Makefile.compiler $(RM) libbcachefs/*.mod.c git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision git add .bcachefs_revision diff --git a/Makefile.compiler b/Makefile.compiler new file mode 100644 index 0000000..86ecd2a --- /dev/null +++ b/Makefile.compiler @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: GPL-2.0-only + +# cc-cross-prefix +# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-) +# Return first where a gcc is found in PATH. +# If no gcc found in PATH with listed prefixes return nothing +# +# Note: '2>/dev/null' is here to force Make to invoke a shell. Otherwise, it +# would try to directly execute the shell builtin 'command'. This workaround +# should be kept for a long time since this issue was fixed only after the +# GNU Make 4.2.1 release. +cc-cross-prefix = $(firstword $(foreach c, $(1), \ + $(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c)))) + +# output directory for tests below +TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$ + +# try-run +# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) +# Exit code chooses option. "$$TMP" serves as a temporary file and is +# automatically cleaned up. +try-run = $(shell set -e; \ + TMP=$(TMPOUT)/tmp; \ + mkdir -p $(TMPOUT); \ + trap "rm -rf $(TMPOUT)" EXIT; \ + if ($(1)) >/dev/null 2>&1; \ + then echo "$(2)"; \ + else echo "$(3)"; \ + fi) + +# as-option +# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,) + +as-option = $(call try-run,\ + $(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2)) + +# as-instr +# Usage: cflags-y += $(call as-instr,instr,option1,option2) + +as-instr = $(call try-run,\ + printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) + +# __cc-option +# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) +__cc-option = $(call try-run,\ + $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4)) + +# cc-option +# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) + +cc-option = $(call __cc-option, $(CC),\ + $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2)) + +# cc-option-yn +# Usage: flag := $(call cc-option-yn,-march=winchip-c6) +cc-option-yn = $(call try-run,\ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) + +# cc-disable-warning +# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable) +cc-disable-warning = $(call try-run,\ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) + +# cc-ifversion +# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) +cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4)) + +# ld-option +# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) +ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) + +# ld-ifversion +# Usage: $(call ld-ifversion, -ge, 22252, y) +ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4)) diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 4c0d9b7..a11bb5f 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -775,14 +775,16 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse, TRACE_EVENT(iter_traverse, TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, + bool key_cache, enum btree_id btree_id, struct bpos *pos, int ret), - TP_ARGS(trans_ip, caller_ip, btree_id, pos, ret), + TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, ret), TP_STRUCT__entry( __field(unsigned long, trans_ip ) __field(unsigned long, caller_ip ) + __field(u8, key_cache ) __field(u8, btree_id ) __field(u64, pos_inode ) __field(u64, pos_offset ) @@ -793,6 +795,7 @@ TRACE_EVENT(iter_traverse, TP_fast_assign( __entry->trans_ip = trans_ip; __entry->caller_ip = caller_ip; + __entry->key_cache = key_cache; __entry->btree_id = btree_id; __entry->pos_inode = pos->inode; __entry->pos_offset = pos->offset; @@ -800,9 +803,10 @@ TRACE_EVENT(iter_traverse, __entry->ret = ret; ), - TP_printk("%ps %pS pos %u %llu:%llu:%u ret %i", + TP_printk("%ps %pS key cache %u btree %u %llu:%llu:%u ret %i", (void *) __entry->trans_ip, (void *) __entry->caller_ip, + __entry->key_cache, __entry->btree_id, __entry->pos_inode, __entry->pos_offset, @@ -953,15 +957,17 @@ TRACE_EVENT(trans_restart_mem_realloced, DECLARE_EVENT_CLASS(node_lock_fail, TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, + bool key_cache, enum btree_id btree_id, struct bpos *pos, unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, btree_id, pos, + TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, level, iter_seq, node, node_seq), TP_STRUCT__entry( __field(unsigned long, trans_ip ) __field(unsigned long, caller_ip ) + __field(u8, key_cache ) __field(u8, btree_id ) __field(u64, pos_inode ) __field(u64, pos_offset ) @@ -975,6 +981,7 @@ DECLARE_EVENT_CLASS(node_lock_fail, TP_fast_assign( __entry->trans_ip = trans_ip; __entry->caller_ip = caller_ip; + __entry->key_cache = key_cache; __entry->btree_id = btree_id; __entry->pos_inode = pos->inode; __entry->pos_offset = pos->offset; @@ -985,9 +992,10 @@ DECLARE_EVENT_CLASS(node_lock_fail, __entry->node_seq = node_seq; ), - TP_printk("%ps %pS btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u", + TP_printk("%ps %pS key cache %u btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u", (void *) __entry->trans_ip, (void *) __entry->caller_ip, + __entry->key_cache, __entry->btree_id, __entry->pos_inode, __entry->pos_offset, @@ -999,20 +1007,22 @@ DECLARE_EVENT_CLASS(node_lock_fail, DEFINE_EVENT(node_lock_fail, node_upgrade_fail, TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, + bool key_cache, enum btree_id btree_id, struct bpos *pos, unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, btree_id, pos, + TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, level, iter_seq, node, node_seq) ); DEFINE_EVENT(node_lock_fail, node_relock_fail, TP_PROTO(unsigned long trans_ip, unsigned long caller_ip, + bool key_cache, enum btree_id btree_id, struct bpos *pos, unsigned level, u32 iter_seq, unsigned node, u32 node_seq), - TP_ARGS(trans_ip, caller_ip, btree_id, pos, + TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, level, iter_seq, node, node_seq) ); diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c index 5408a92..eb907e5 100644 --- a/libbcachefs/acl.c +++ b/libbcachefs/acl.c @@ -221,6 +221,8 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type) struct btree_iter *iter; struct bkey_s_c_xattr xattr; struct posix_acl *acl = NULL; + struct bkey_s_c k; + int ret; bch2_trans_init(&trans, c, 0, 0); retry: @@ -239,7 +241,14 @@ retry: goto out; } - xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) { + acl = ERR_PTR(ret); + goto out; + } + + xattr = bkey_s_c_to_xattr(k); acl = bch2_acl_from_disk(xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); @@ -325,8 +334,7 @@ retry: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, - &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK); + &inode->ei_journal_seq, 0); btree_err: bch2_trans_iter_put(&trans, inode_iter); @@ -356,6 +364,7 @@ int bch2_acl_chmod(struct btree_trans *trans, struct bkey_s_c_xattr xattr; struct bkey_i_xattr *new; struct posix_acl *acl; + struct bkey_s_c k; int ret; iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, @@ -366,7 +375,11 @@ int bch2_acl_chmod(struct btree_trans *trans, if (ret) return ret == -ENOENT ? 0 : ret; - xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); + k = bch2_btree_iter_peek_slot(iter); + xattr = bkey_s_c_to_xattr(k); + if (ret) + goto err; + acl = bch2_acl_from_disk(xattr_val(xattr.v), le16_to_cpu(xattr.v->x_val_len)); ret = PTR_ERR_OR_ZERO(acl); diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index fff85c1..886861a 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -374,7 +374,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags) percpu_ref_put(&ca->ref); goto err; } - bch2_btree_iter_next_slot(iter); + bch2_btree_iter_advance(iter); } } err: diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index ff9d770..f8adbf4 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -269,7 +269,7 @@ void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c, { bch2_bkey_to_text(out, k.k); - if (k.k) { + if (bkey_val_bytes(k.k)) { pr_buf(out, ": "); bch2_val_to_text(out, c, k); } diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index ed44827..cd0c500 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -648,8 +648,10 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, * Parent node must be locked, else we could read in a btree node that's * been freed: */ - if (iter && !bch2_btree_node_relock(iter, level + 1)) + if (iter && !bch2_btree_node_relock(iter, level + 1)) { + btree_trans_restart(iter->trans); return ERR_PTR(-EINTR); + } b = bch2_btree_node_mem_alloc(c); if (IS_ERR(b)) @@ -686,18 +688,17 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, if (!sync) return NULL; - /* - * XXX: this will probably always fail because btree_iter_relock() - * currently fails for iterators that aren't pointed at a valid btree - * node - */ if (iter && (!bch2_trans_relock(iter->trans) || - !bch2_btree_iter_relock(iter, _THIS_IP_))) + !bch2_btree_iter_relock_intent(iter))) { + BUG_ON(!iter->trans->restarted); return ERR_PTR(-EINTR); + } - if (!six_relock_type(&b->c.lock, lock_type, seq)) + if (!six_relock_type(&b->c.lock, lock_type, seq)) { + btree_trans_restart(iter->trans); return ERR_PTR(-EINTR); + } return b; } @@ -718,6 +719,7 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) return; bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&b->key)); + bch2_bpos_to_text(&PBUF(buf2), b->data->min_key); bch2_bpos_to_text(&PBUF(buf3), b->data->max_key); bch2_fs_inconsistent(c, "btree node header doesn't match ptr\n" @@ -752,20 +754,23 @@ static inline void btree_check_header(struct bch_fs *c, struct btree *b) * The btree node will have either a read or a write lock held, depending on * the @write parameter. */ -struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, +struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_iter *iter, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) { + struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; struct bset_tree *t; EBUG_ON(level >= BTREE_MAX_DEPTH); - b = btree_node_mem_ptr(k); - if (b) - goto lock_node; + if (c->opts.btree_node_mem_ptr_optimization) { + b = btree_node_mem_ptr(k); + if (b) + goto lock_node; + } retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { @@ -818,7 +823,7 @@ lock_node: if (!btree_node_lock(b, k->k.p, level, iter, lock_type, lock_node_check_fn, (void *) k, trace_ip)) { - if (b->hash_val != btree_ptr_hash_val(k)) + if (!trans->restarted) goto retry; return ERR_PTR(-EINTR); } @@ -830,10 +835,11 @@ lock_node: if (bch2_btree_node_relock(iter, level + 1)) goto retry; - trace_trans_restart_btree_node_reused(iter->trans->ip, + trace_trans_restart_btree_node_reused(trans->ip, trace_ip, iter->btree_id, &iter->real_pos); + btree_trans_restart(trans); return ERR_PTR(-EINTR); } } @@ -842,19 +848,20 @@ lock_node: u32 seq = b->c.lock.state.seq; six_unlock_type(&b->c.lock, lock_type); - bch2_trans_unlock(iter->trans); + bch2_trans_unlock(trans); bch2_btree_node_wait_on_read(b); /* - * XXX: check if this always fails - btree_iter_relock() - * currently fails for iterators that aren't pointed at a valid - * btree node + * should_be_locked is not set on this iterator yet, so we need + * to relock it specifically: */ if (iter && - (!bch2_trans_relock(iter->trans) || - !bch2_btree_iter_relock(iter, _THIS_IP_))) + (!bch2_trans_relock(trans) || + !bch2_btree_iter_relock_intent(iter))) { + BUG_ON(!trans->restarted); return ERR_PTR(-EINTR); + } if (!six_relock_type(&b->c.lock, lock_type, seq)) goto retry; @@ -899,9 +906,11 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c, EBUG_ON(level >= BTREE_MAX_DEPTH); - b = btree_node_mem_ptr(k); - if (b) - goto lock_node; + if (c->opts.btree_node_mem_ptr_optimization) { + b = btree_node_mem_ptr(k); + if (b) + goto lock_node; + } retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { @@ -966,9 +975,9 @@ out: return b; } -void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, - const struct bkey_i *k, - enum btree_id btree_id, unsigned level) +int bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, + const struct bkey_i *k, + enum btree_id btree_id, unsigned level) { struct btree_cache *bc = &c->btree_cache; struct btree *b; @@ -978,9 +987,10 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter, b = btree_cache_find(bc, k); if (b) - return; + return 0; - bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false); + b = bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false); + return PTR_ERR_OR_ZERO(b); } void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k) diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index fd5026c..5032293 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -22,15 +22,15 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct bch_fs *); -struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *, +struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_iter *, const struct bkey_i *, unsigned, enum six_lock_type, unsigned long); struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *, enum btree_id, unsigned, bool); -void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *, - const struct bkey_i *, enum btree_id, unsigned); +int bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *, + const struct bkey_i *, enum btree_id, unsigned); void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index c747398..3af00e2 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -189,6 +189,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade, (upgrade ? trace_node_upgrade_fail : trace_node_relock_fail)(iter->trans->ip, trace_ip, + btree_iter_type(iter) == BTREE_ITER_CACHED, iter->btree_id, &iter->real_pos, l, iter->l[l].lock_seq, is_btree_node(iter, l) @@ -197,7 +198,6 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade, is_btree_node(iter, l) ? iter->l[l].b->c.lock.state.seq : 0); - fail_idx = l; btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); } @@ -309,7 +309,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, } if (unlikely(deadlock_iter)) { - trace_trans_restart_would_deadlock(iter->trans->ip, ip, + trace_trans_restart_would_deadlock(trans->ip, ip, trans->in_traverse_all, reason, deadlock_iter->btree_id, btree_iter_type(deadlock_iter), @@ -317,6 +317,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, iter->btree_id, btree_iter_type(iter), &pos); + btree_trans_restart(trans); return false; } @@ -375,10 +376,44 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans) static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {} #endif +/* + * Only for btree_cache.c - only relocks intent locks + */ +bool bch2_btree_iter_relock_intent(struct btree_iter *iter) +{ + unsigned l; + + for (l = iter->level; + l < iter->locks_want && btree_iter_node(iter, l); + l++) { + if (!bch2_btree_node_relock(iter, l)) { + trace_node_relock_fail(iter->trans->ip, _RET_IP_, + btree_iter_type(iter) == BTREE_ITER_CACHED, + iter->btree_id, &iter->real_pos, + l, iter->l[l].lock_seq, + is_btree_node(iter, l) + ? 0 + : (unsigned long) iter->l[l].b, + is_btree_node(iter, l) + ? iter->l[l].b->c.lock.state.seq + : 0); + btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); + btree_trans_restart(iter->trans); + return false; + } + } + + return true; +} + __flatten bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip) { - return btree_iter_get_locks(iter, false, trace_ip); + bool ret = btree_iter_get_locks(iter, false, trace_ip); + + if (!ret) + btree_trans_restart(iter->trans); + return ret; } bool __bch2_btree_iter_upgrade(struct btree_iter *iter, @@ -421,6 +456,8 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, btree_iter_get_locks(linked, true, _THIS_IP_); } + if (iter->should_be_locked) + btree_trans_restart(iter->trans); return false; } @@ -459,8 +496,7 @@ void bch2_trans_downgrade(struct btree_trans *trans) /* Btree transaction locking: */ -static inline bool btree_iter_should_be_locked(struct btree_trans *trans, - struct btree_iter *iter) +static inline bool btree_iter_should_be_locked(struct btree_iter *iter) { return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) || iter->should_be_locked; @@ -470,11 +506,15 @@ bool bch2_trans_relock(struct btree_trans *trans) { struct btree_iter *iter; + if (unlikely(trans->restarted)) + return false; + trans_for_each_iter(trans, iter) - if (!bch2_btree_iter_relock(iter, _RET_IP_) && - btree_iter_should_be_locked(trans, iter)) { + if (btree_iter_should_be_locked(iter) && + !bch2_btree_iter_relock(iter, _RET_IP_)) { trace_trans_restart_relock(trans->ip, _RET_IP_, iter->btree_id, &iter->real_pos); + BUG_ON(!trans->restarted); return false; } return true; @@ -1055,11 +1095,12 @@ static int lock_root_check_fn(struct six_lock *lock, void *p) return b == *rootp ? 0 : -1; } -static inline int btree_iter_lock_root(struct btree_iter *iter, +static inline int btree_iter_lock_root(struct btree_trans *trans, + struct btree_iter *iter, unsigned depth_want, unsigned long trace_ip) { - struct bch_fs *c = iter->trans->c; + struct bch_fs *c = trans->c; struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; enum six_lock_type lock_type; unsigned i; @@ -1087,8 +1128,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level, iter, lock_type, lock_root_check_fn, rootp, - trace_ip))) - return -EINTR; + trace_ip))) { + if (trans->restarted) + return -EINTR; + continue; + } if (likely(b == READ_ONCE(*rootp) && b->c.level == iter->level && @@ -1109,7 +1153,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, } noinline -static void btree_iter_prefetch(struct btree_iter *iter) +static int btree_iter_prefetch(struct btree_iter *iter) { struct bch_fs *c = iter->trans->c; struct btree_iter_level *l = &iter->l[iter->level]; @@ -1120,10 +1164,11 @@ static void btree_iter_prefetch(struct btree_iter *iter) ? (iter->level > 1 ? 0 : 2) : (iter->level > 1 ? 1 : 16); bool was_locked = btree_node_locked(iter, iter->level); + int ret = 0; bch2_bkey_buf_init(&tmp); - while (nr) { + while (nr && !ret) { if (!bch2_btree_node_relock(iter, iter->level)) break; @@ -1133,14 +1178,15 @@ static void btree_iter_prefetch(struct btree_iter *iter) break; bch2_bkey_buf_unpack(&tmp, c, l->b, k); - bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id, - iter->level - 1); + ret = bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id, + iter->level - 1); } if (!was_locked) btree_node_unlock(iter, iter->level); bch2_bkey_buf_exit(&tmp, c); + return ret; } static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, @@ -1164,10 +1210,11 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, btree_node_unlock(iter, plevel); } -static __always_inline int btree_iter_down(struct btree_iter *iter, +static __always_inline int btree_iter_down(struct btree_trans *trans, + struct btree_iter *iter, unsigned long trace_ip) { - struct bch_fs *c = iter->trans->c; + struct bch_fs *c = trans->c; struct btree_iter_level *l = &iter->l[iter->level]; struct btree *b; unsigned level = iter->level - 1; @@ -1181,7 +1228,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter, bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_btree_node_iter_peek(&l->iter, l->b)); - b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip); + b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip); ret = PTR_ERR_OR_ZERO(b); if (unlikely(ret)) goto err; @@ -1194,7 +1241,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter, btree_node_mem_ptr_set(iter, level + 1, b); if (iter->flags & BTREE_ITER_PREFETCH) - btree_iter_prefetch(iter); + ret = btree_iter_prefetch(iter); if (btree_node_read_locked(iter, level + 1)) btree_node_unlock(iter, level + 1); @@ -1215,25 +1262,19 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, struct btree_iter *iter; u8 sorted[BTREE_ITER_MAX]; int i, nr_sorted = 0; - bool relock_fail; if (trans->in_traverse_all) return -EINTR; trans->in_traverse_all = true; retry_all: + trans->restarted = false; + nr_sorted = 0; - relock_fail = false; trans_for_each_iter(trans, iter) { - if (!bch2_btree_iter_relock(iter, _THIS_IP_)) - relock_fail = true; sorted[nr_sorted++] = iter->idx; - } - - if (!relock_fail) { - trans->in_traverse_all = false; - return 0; + iter->should_be_locked = false; } #define btree_iter_cmp_by_idx(_l, _r) \ @@ -1289,15 +1330,6 @@ retry_all: if (ret) goto retry_all; } - - if (hweight64(trans->iters_live) > 1) - ret = -EINTR; - else - trans_for_each_iter(trans, iter) - if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) { - ret = -EINTR; - break; - } out: bch2_btree_cache_cannibalize_unlock(c); @@ -1307,7 +1339,7 @@ out: return ret; } -int bch2_btree_iter_traverse_all(struct btree_trans *trans) +static int bch2_btree_iter_traverse_all(struct btree_trans *trans) { return __btree_iter_traverse_all(trans, 0, _RET_IP_); } @@ -1353,9 +1385,19 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter, static int btree_iter_traverse_one(struct btree_iter *iter, unsigned long trace_ip) { + struct btree_trans *trans = iter->trans; unsigned l, depth_want = iter->level; int ret = 0; + /* + * Ensure we obey iter->should_be_locked: if it's set, we can't unlock + * and re-traverse the iterator without a transaction restart: + */ + if (iter->should_be_locked) { + ret = bch2_btree_iter_relock(iter, trace_ip) ? 0 : -EINTR; + goto out; + } + if (btree_iter_type(iter) == BTREE_ITER_CACHED) { ret = bch2_btree_iter_traverse_cached(iter); goto out; @@ -1385,8 +1427,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter, */ while (iter->level > depth_want) { ret = btree_iter_node(iter, iter->level) - ? btree_iter_down(iter, trace_ip) - : btree_iter_lock_root(iter, depth_want, trace_ip); + ? btree_iter_down(trans, iter, trace_ip) + : btree_iter_lock_root(trans, iter, depth_want, trace_ip); if (unlikely(ret)) { if (ret == 1) { /* @@ -1414,7 +1456,9 @@ static int btree_iter_traverse_one(struct btree_iter *iter, iter->uptodate = BTREE_ITER_NEED_PEEK; out: - trace_iter_traverse(iter->trans->ip, trace_ip, + BUG_ON((ret == -EINTR) != !!trans->restarted); + trace_iter_traverse(trans->ip, trace_ip, + btree_iter_type(iter) == BTREE_ITER_CACHED, iter->btree_id, &iter->real_pos, ret); bch2_btree_iter_verify(iter); return ret; @@ -1427,8 +1471,10 @@ static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) ret = bch2_trans_cond_resched(trans) ?: btree_iter_traverse_one(iter, _RET_IP_); - if (unlikely(ret)) + if (unlikely(ret) && hweight64(trans->iters_linked) == 1) { ret = __btree_iter_traverse_all(trans, ret, _RET_IP_); + BUG_ON(ret == -EINTR); + } return ret; } @@ -1559,6 +1605,8 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p int cmp = bpos_cmp(new_pos, iter->real_pos); unsigned l = iter->level; + EBUG_ON(iter->trans->restarted); + if (!cmp) goto out; @@ -2118,6 +2166,8 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, struct btree_iter *iter, *best = NULL; struct bpos real_pos, pos_min = POS_MIN; + EBUG_ON(trans->restarted); + if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && btree_node_type_is_extents(btree_id) && !(flags & BTREE_ITER_NOT_EXTENTS) && @@ -2282,6 +2332,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (old_bytes) { trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes); + btree_trans_restart(trans); return ERR_PTR(-EINTR); } } @@ -2307,33 +2358,26 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans) } /** - * bch2_trans_reset() - reset a transaction after a interrupted attempt + * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset - * @flags: transaction reset flags. * * While iterating over nodes or updating nodes a attempt to lock a btree * node may return EINTR when the trylock fails. When this occurs - * bch2_trans_reset() or bch2_trans_begin() should be called and the - * transaction retried. - * - * Transaction reset flags include: - * - * - TRANS_RESET_NOUNLOCK - Do not attempt to unlock and reschedule the - * transaction. - * - TRANS_RESET_NOTRAVERSE - Do not traverse all linked iters. + * bch2_trans_begin() should be called and the transaction retried. */ -void bch2_trans_reset(struct btree_trans *trans, unsigned flags) +void bch2_trans_begin(struct btree_trans *trans) { struct btree_iter *iter; - trans_for_each_iter(trans, iter) { + trans_for_each_iter(trans, iter) iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT| BTREE_ITER_SET_POS_AFTER_COMMIT); - iter->should_be_locked = false; - } + /* + * XXX: we shouldn't be doing this if the transaction was restarted, but + * currently we still overflow transaction iterators if we do that + * */ bch2_trans_unlink_iters(trans); - trans->iters_touched &= trans->iters_live; trans->extra_journal_res = 0; @@ -2351,12 +2395,12 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags) (void *) &trans->fs_usage_deltas->memset_start); } - if (!(flags & TRANS_RESET_NOUNLOCK)) - bch2_trans_cond_resched(trans); + bch2_trans_cond_resched(trans); - if (!(flags & TRANS_RESET_NOTRAVERSE) && - trans->iters_linked) + if (trans->restarted) bch2_btree_iter_traverse_all(trans); + + trans->restarted = false; } static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c) diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 7385cca..aeabc07 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -111,11 +111,20 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *, struct btree_node_iter *, struct bkey_packed *, unsigned, unsigned); +bool bch2_btree_iter_relock_intent(struct btree_iter *); bool bch2_btree_iter_relock(struct btree_iter *, unsigned long); bool bch2_trans_relock(struct btree_trans *); void bch2_trans_unlock(struct btree_trans *); +__always_inline +static inline int btree_trans_restart(struct btree_trans *trans) +{ + trans->restarted = true; + bch2_trans_unlock(trans); + return -EINTR; +} + bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned); static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter, @@ -147,8 +156,6 @@ void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *); int __must_check bch2_btree_iter_traverse(struct btree_iter *); -int bch2_btree_iter_traverse_all(struct btree_trans *); - struct btree *bch2_btree_iter_peek_node(struct btree_iter *); struct btree *bch2_btree_iter_next_node(struct btree_iter *); @@ -316,22 +323,7 @@ static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btr trans->iters_touched &= ~(1ULL << iter->idx); } -#define TRANS_RESET_NOTRAVERSE (1 << 0) -#define TRANS_RESET_NOUNLOCK (1 << 1) - -void bch2_trans_reset(struct btree_trans *, unsigned); - -/** - * bch2_trans_begin() - ensure lock consistency of transaction on retry - * @trans: transaction to prepare - * - * Ensure lock ordering is correct before potentially retrying a transaction - * after a failed trylock. - */ -static inline void bch2_trans_begin(struct btree_trans *trans) -{ - return bch2_trans_reset(trans, 0); -} +void bch2_trans_begin(struct btree_trans *); void *bch2_trans_kmalloc(struct btree_trans *, size_t); void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index 7f47ef3..e327ef3 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -214,7 +214,7 @@ static int btree_key_cache_fill(struct btree_trans *trans, if (!bch2_btree_node_relock(ck_iter, 0)) { trace_transaction_restart_ip(trans->ip, _THIS_IP_); - ret = -EINTR; + ret = btree_trans_restart(trans); goto err; } @@ -233,6 +233,10 @@ static int btree_key_cache_fill(struct btree_trans *trans, } } + /* + * XXX: not allowed to be holding read locks when we take a write lock, + * currently + */ bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter); if (new_k) { kfree(ck->k); @@ -299,10 +303,8 @@ retry: if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want, bkey_cached_check_fn, iter, _THIS_IP_)) { - if (ck->key.btree_id != iter->btree_id || - bpos_cmp(ck->key.pos, iter->pos)) { + if (!trans->restarted) goto retry; - } trace_transaction_restart_ip(trans->ip, _THIS_IP_); ret = -EINTR; @@ -322,10 +324,10 @@ retry: iter->l[0].b = (void *) ck; fill: if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) { - if (!btree_node_intent_locked(iter, 0)) - bch2_btree_iter_upgrade(iter, 1); - if (!btree_node_intent_locked(iter, 0)) { + if (!iter->locks_want && + !!__bch2_btree_iter_upgrade(iter, 1)) { trace_transaction_restart_ip(trans->ip, _THIS_IP_); + BUG_ON(!trans->restarted); ret = -EINTR; goto err; } @@ -340,13 +342,14 @@ fill: iter->uptodate = BTREE_ITER_NEED_PEEK; - if (!(iter->flags & BTREE_ITER_INTENT)) - bch2_btree_iter_downgrade(iter); - else if (!iter->locks_want) { - if (!__bch2_btree_iter_upgrade(iter, 1)) - ret = -EINTR; + if ((iter->flags & BTREE_ITER_INTENT) && + !bch2_btree_iter_upgrade(iter, 1)) { + BUG_ON(!trans->restarted); + ret = -EINTR; } + BUG_ON(!ret && !btree_node_locked(iter, 0)); + return ret; err: if (ret != -EINTR) { @@ -377,10 +380,9 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, BTREE_ITER_CACHED_NOFILL| BTREE_ITER_CACHED_NOCREATE| BTREE_ITER_INTENT); -retry: ret = bch2_btree_iter_traverse(c_iter); if (ret) - goto err; + goto out; ck = (void *) c_iter->l[0].b; if (!ck || @@ -399,9 +401,10 @@ retry: * to be using alloc reserves: * */ ret = bch2_btree_iter_traverse(b_iter) ?: - bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?: + bch2_trans_update(trans, b_iter, ck->k, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| + BTREE_TRIGGER_NORUN) ?: bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| @@ -409,15 +412,10 @@ retry: ? BTREE_INSERT_JOURNAL_RESERVED : 0)| commit_flags); -err: - if (ret == -EINTR) - goto retry; - - if (ret == -EAGAIN) - goto out; - if (ret) { - bch2_fs_fatal_err_on(!bch2_journal_error(j), c, + bch2_fs_fatal_err_on(ret != -EINTR && + ret != -EAGAIN && + !bch2_journal_error(j), c, "error flushing key cache: %i", ret); goto out; } @@ -465,7 +463,6 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, struct bkey_cached *ck = container_of(pin, struct bkey_cached, journal); struct bkey_cached_key key; - struct btree_trans trans; int ret = 0; int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); @@ -480,10 +477,9 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, } six_unlock_read(&ck->c.lock); - bch2_trans_init(&trans, c, 0, 0); - ret = btree_key_cache_flush_pos(&trans, key, seq, - BTREE_INSERT_JOURNAL_RECLAIM, false); - bch2_trans_exit(&trans); + ret = bch2_trans_do(c, NULL, NULL, 0, + btree_key_cache_flush_pos(&trans, key, seq, + BTREE_INSERT_JOURNAL_RECLAIM, false)); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 07c9ba4..6882873 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -380,9 +380,10 @@ struct btree_trans { int srcu_idx; u8 nr_updates; - unsigned used_mempool:1; - unsigned error:1; - unsigned in_traverse_all:1; + bool used_mempool:1; + bool error:1; + bool in_traverse_all:1; + bool restarted:1; /* * For when bch2_trans_update notices we'll be splitting a compressed * extent: diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index bab135f..217b52e 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -15,7 +15,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *, void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); enum btree_insert_flags { - __BTREE_INSERT_NOUNLOCK, __BTREE_INSERT_NOFAIL, __BTREE_INSERT_NOCHECK_RW, __BTREE_INSERT_LAZY_RW, @@ -29,11 +28,6 @@ enum btree_insert_flags { __BCH_HASH_SET_MUST_REPLACE, }; -/* - * Don't drop locks _after_ successfully updating btree: - */ -#define BTREE_INSERT_NOUNLOCK (1 << __BTREE_INSERT_NOUNLOCK) - /* Don't check for -ENOSPC: */ #define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL) @@ -110,12 +104,10 @@ static inline int bch2_trans_commit(struct btree_trans *trans, ({ \ int _ret; \ \ - while (1) { \ + do { \ + bch2_trans_begin(_trans); \ _ret = (_do); \ - if (_ret != -EINTR) \ - break; \ - bch2_trans_reset(_trans, 0); \ - } \ + } while (_ret == -EINTR); \ \ _ret; \ }) diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index a254240..c8c3382 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -960,9 +960,6 @@ retry: if (flags & BTREE_INSERT_GC_LOCK_HELD) lockdep_assert_held(&c->gc_lock); else if (!down_read_trylock(&c->gc_lock)) { - if (flags & BTREE_INSERT_NOUNLOCK) - return ERR_PTR(-EINTR); - bch2_trans_unlock(trans); down_read(&c->gc_lock); if (!bch2_trans_relock(trans)) { @@ -1005,20 +1002,11 @@ retry: BTREE_UPDATE_JOURNAL_RES, journal_flags|JOURNAL_RES_GET_NONBLOCK); if (ret == -EAGAIN) { - /* - * this would be cleaner if bch2_journal_preres_get() took a - * closure argument - */ - if (flags & BTREE_INSERT_NOUNLOCK) { - trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_); - ret = -EINTR; - goto err; - } - bch2_trans_unlock(trans); if (flags & BTREE_INSERT_JOURNAL_RECLAIM) { bch2_btree_update_free(as); + btree_trans_restart(trans); return ERR_PTR(ret); } @@ -1043,8 +1031,7 @@ retry: if (ret) goto err; - ret = bch2_btree_reserve_get(as, nr_nodes, flags, - !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); + ret = bch2_btree_reserve_get(as, nr_nodes, flags, &cl); if (ret) goto err; @@ -1057,8 +1044,6 @@ err: bch2_btree_update_free(as); if (ret == -EAGAIN) { - BUG_ON(flags & BTREE_INSERT_NOUNLOCK); - bch2_trans_unlock(trans); closure_sync(&cl); ret = -EINTR; @@ -1593,12 +1578,12 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, size_t sib_u64s; int ret = 0, ret2 = 0; - BUG_ON(!btree_node_locked(iter, level)); retry: ret = bch2_btree_iter_traverse(iter); if (ret) - goto err; + return ret; + BUG_ON(!iter->should_be_locked); BUG_ON(!btree_node_locked(iter, level)); b = iter->l[level].b; @@ -1751,13 +1736,6 @@ err: if (ret == -EINTR && bch2_trans_relock(trans)) goto retry; - if (ret == -EINTR && !(flags & BTREE_INSERT_NOUNLOCK)) { - ret2 = ret; - ret = bch2_btree_iter_traverse_all(trans); - if (!ret) - goto retry; - } - goto out; } @@ -1949,8 +1927,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, BTREE_INSERT_NOFAIL| BTREE_INSERT_NOCHECK_RW| BTREE_INSERT_JOURNAL_RECLAIM| - BTREE_INSERT_JOURNAL_RESERVED| - BTREE_INSERT_NOUNLOCK); + BTREE_INSERT_JOURNAL_RESERVED); if (ret) goto err; diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index d319e27..e9e5422 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -348,11 +348,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans, } } -static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter) -{ - __bch2_btree_iter_unlock(iter); -} - static noinline void bch2_trans_mark_gc(struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -384,6 +379,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, if (race_fault()) { trace_trans_restart_fault_inject(trans->ip, trace_ip); + trans->restarted = true; return -EINTR; } @@ -520,10 +516,11 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0; } - return u64s_delta <= 0 - ? (bch2_foreground_maybe_merge(trans, iter, iter->level, - trans->flags & ~BTREE_INSERT_NOUNLOCK) ?: -EINTR) - : 0; + if (u64s_delta > 0) + return 0; + + return bch2_foreground_maybe_merge(trans, iter, + iter->level, trans->flags); } /* @@ -580,20 +577,15 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, * or anything else that might call bch2_trans_relock(), since that * would just retake the read locks: */ - trans_for_each_iter(trans, iter) { - if (iter->nodes_locked != iter->nodes_intent_locked) { - if (btree_iter_keep(trans, iter)) { - if (!bch2_btree_iter_upgrade(iter, 1)) { - trace_trans_restart_upgrade(trans->ip, trace_ip, - iter->btree_id, - &iter->real_pos); - return -EINTR; - } - } else { - bch2_btree_iter_unlock_noinline(iter); - } + trans_for_each_iter(trans, iter) + if (iter->nodes_locked != iter->nodes_intent_locked && + !bch2_btree_iter_upgrade(iter, 1)) { + trace_trans_restart_upgrade(trans->ip, trace_ip, + iter->btree_id, + &iter->real_pos); + trans->restarted = true; + return -EINTR; } - } trans_for_each_update(trans, i) { const char *invalid = bch2_bkey_invalid(c, @@ -655,56 +647,24 @@ int bch2_trans_commit_error(struct btree_trans *trans, int ret, unsigned long trace_ip) { struct bch_fs *c = trans->c; - unsigned flags = trans->flags; - - /* - * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree - * update; if we haven't done anything yet it doesn't apply - */ - flags &= ~BTREE_INSERT_NOUNLOCK; switch (ret) { case BTREE_INSERT_BTREE_NODE_FULL: - ret = bch2_btree_split_leaf(trans, i->iter, flags); - - /* - * if the split succeeded without dropping locks the insert will - * still be atomic (what the caller peeked() and is overwriting - * won't have changed) - */ -#if 0 - /* - * XXX: - * split -> btree node merging (of parent node) might still drop - * locks when we're not passing it BTREE_INSERT_NOUNLOCK - * - * we don't want to pass BTREE_INSERT_NOUNLOCK to split as that - * will inhibit merging - but we don't have a reliable way yet - * (do we?) of checking if we dropped locks in this path - */ + ret = bch2_btree_split_leaf(trans, i->iter, trans->flags); if (!ret) - goto retry; -#endif + return 0; - /* - * don't care if we got ENOSPC because we told split it - * couldn't block: - */ - if (!ret || - ret == -EINTR || - (flags & BTREE_INSERT_NOUNLOCK)) { + if (ret == -EINTR) trace_trans_restart_btree_node_split(trans->ip, trace_ip, i->iter->btree_id, &i->iter->real_pos); - ret = -EINTR; - } break; case BTREE_INSERT_NEED_MARK_REPLICAS: bch2_trans_unlock(trans); ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas); if (ret) - return ret; + break; if (bch2_trans_relock(trans)) return 0; @@ -716,12 +676,15 @@ int bch2_trans_commit_error(struct btree_trans *trans, bch2_trans_unlock(trans); if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && - !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) - return -EAGAIN; + !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) { + trans->restarted = true; + ret = -EAGAIN; + break; + } ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK); if (ret) - return ret; + break; if (bch2_trans_relock(trans)) return 0; @@ -737,7 +700,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); if (ret < 0) - return ret; + break; if (bch2_trans_relock(trans)) return 0; @@ -750,7 +713,8 @@ int bch2_trans_commit_error(struct btree_trans *trans, break; } - BUG_ON(ret == -ENOSPC && (flags & BTREE_INSERT_NOFAIL)); + BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted); + BUG_ON(ret == -ENOSPC && (trans->flags & BTREE_INSERT_NOFAIL)); return ret; } @@ -839,8 +803,10 @@ static int extent_handle_overwrites(struct btree_trans *trans, BTREE_ITER_NOT_EXTENTS| BTREE_ITER_INTENT); ret = bch2_btree_iter_traverse(update_iter); - if (ret) + if (ret) { + bch2_trans_iter_put(trans, update_iter); goto out; + } bch2_trans_update(trans, update_iter, update, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| @@ -887,7 +853,7 @@ int __bch2_trans_commit(struct btree_trans *trans) struct btree_insert_entry *i = NULL; struct btree_iter *iter; bool trans_trigger_run; - unsigned u64s, reset_flags = 0; + unsigned u64s; int ret = 0; if (!trans->nr_updates && @@ -944,18 +910,13 @@ int __bch2_trans_commit(struct btree_trans *trans) } while (trans_trigger_run); trans_for_each_update(trans, i) { - ret = bch2_btree_iter_traverse(i->iter); - if (unlikely(ret)) { - trace_trans_restart_traverse(trans->ip, _RET_IP_, - i->iter->btree_id, - &i->iter->pos); - goto out; - } + BUG_ON(!i->iter->should_be_locked); if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) { trace_trans_restart_upgrade(trans->ip, _RET_IP_, i->iter->btree_id, &i->iter->pos); + trans->restarted = true; ret = -EINTR; goto out; } @@ -978,6 +939,7 @@ int __bch2_trans_commit(struct btree_trans *trans) goto err; } retry: + BUG_ON(trans->restarted); memset(&trans->journal_res, 0, sizeof(trans->journal_res)); ret = do_bch2_trans_commit(trans, &i, _RET_IP_); @@ -998,11 +960,18 @@ out: if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) percpu_ref_put(&trans->c->writes); out_reset: - if (!ret) - reset_flags |= TRANS_RESET_NOTRAVERSE; - if (!ret && (trans->flags & BTREE_INSERT_NOUNLOCK)) - reset_flags |= TRANS_RESET_NOUNLOCK; - bch2_trans_reset(trans, reset_flags); + trans->extra_journal_res = 0; + trans->nr_updates = 0; + trans->hooks = NULL; + trans->extra_journal_entries = NULL; + trans->extra_journal_entry_u64s = 0; + + if (trans->fs_usage_deltas) { + trans->fs_usage_deltas->used = 0; + memset(&trans->fs_usage_deltas->memset_start, 0, + (void *) &trans->fs_usage_deltas->memset_end - + (void *) &trans->fs_usage_deltas->memset_start); + } return ret; err: @@ -1050,7 +1019,11 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, n.iter = bch2_trans_get_iter(trans, n.btree_id, n.k->k.p, BTREE_ITER_INTENT| BTREE_ITER_NOT_EXTENTS); + ret = bch2_btree_iter_traverse(n.iter); bch2_trans_iter_put(trans, n.iter); + + if (ret) + return ret; } BUG_ON(n.iter->flags & BTREE_ITER_IS_EXTENTS); diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index a95165b..02b2968 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -210,6 +210,9 @@ int bch2_dirent_rename(struct btree_trans *trans, goto out; old_dst = bch2_btree_iter_peek_slot(dst_iter); + ret = bkey_err(old_dst); + if (ret) + goto out; if (mode != BCH_RENAME) *dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum); @@ -225,6 +228,10 @@ int bch2_dirent_rename(struct btree_trans *trans, goto out; old_src = bch2_btree_iter_peek_slot(src_iter); + ret = bkey_err(old_src); + if (ret) + goto out; + *src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum); /* Create new dst key: */ @@ -329,20 +336,25 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum, struct btree_iter *iter; struct bkey_s_c k; u64 inum = 0; + int ret = 0; bch2_trans_init(&trans, c, 0, 0); iter = __bch2_dirent_lookup_trans(&trans, dir_inum, hash_info, name, 0); - if (IS_ERR(iter)) { - BUG_ON(PTR_ERR(iter) == -EINTR); + ret = PTR_ERR_OR_ZERO(iter); + if (ret) goto out; - } k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto out; + inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); bch2_trans_iter_put(&trans, iter); out: + BUG_ON(ret == -EINTR); bch2_trans_exit(&trans); return inum; } diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c index 60c5443..2189a11 100644 --- a/libbcachefs/fs-common.c +++ b/libbcachefs/fs-common.c @@ -168,6 +168,10 @@ int bch2_unlink_trans(struct btree_trans *trans, goto err; k = bch2_btree_iter_peek_slot(dirent_iter); + ret = bkey_err(k); + if (ret) + goto err; + inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum); inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT); diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index ae55453..1ac99f3 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -801,6 +801,8 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_buf_init(&sk); retry: + bch2_trans_begin(trans); + while (1) { struct bkey_s_c k; unsigned bytes, sectors, offset_into_extent; @@ -2301,8 +2303,6 @@ int bch2_truncate(struct user_namespace *mnt_userns, struct bch_fs *c = inode->v.i_sb->s_fs_info; struct address_space *mapping = inode->v.i_mapping; struct bch_inode_unpacked inode_u; - struct btree_trans trans; - struct btree_iter *iter; u64 new_i_size = iattr->ia_size; s64 i_sectors_delta = 0; int ret = 0; @@ -2323,16 +2323,7 @@ int bch2_truncate(struct user_namespace *mnt_userns, inode_dio_wait(&inode->v); bch2_pagecache_block_get(&inode->ei_pagecache_lock); - /* - * fetch current on disk i_size: inode is locked, i_size can only - * increase underneath us: - */ - bch2_trans_init(&trans, c, 0, 0); - iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0); - ret = PTR_ERR_OR_ZERO(iter); - bch2_trans_iter_put(&trans, iter); - bch2_trans_exit(&trans); - + ret = bch2_inode_find_by_inum(c, inode->v.i_ino, &inode_u); if (ret) goto err; @@ -2557,6 +2548,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, struct bpos atomic_end; unsigned trigger_flags = 0; + bch2_trans_begin(&trans); + k = insert ? bch2_btree_iter_peek_prev(src) : bch2_btree_iter_peek(src); @@ -2684,13 +2677,13 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, /* already reserved */ if (k.k->type == KEY_TYPE_reservation && bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) { - bch2_btree_iter_next_slot(iter); + bch2_btree_iter_advance(iter); continue; } if (bkey_extent_is_data(k.k) && !(mode & FALLOC_FL_ZERO_RANGE)) { - bch2_btree_iter_next_slot(iter); + bch2_btree_iter_advance(iter); continue; } diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index bc7cea0..631fb87 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -156,7 +156,6 @@ retry: bch2_inode_write(&trans, iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); /* @@ -294,8 +293,7 @@ retry: if (unlikely(ret)) goto err_before_quota; - ret = bch2_trans_commit(&trans, NULL, &journal_seq, - BTREE_INSERT_NOUNLOCK); + ret = bch2_trans_commit(&trans, NULL, &journal_seq, 0); if (unlikely(ret)) { bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); @@ -416,8 +414,7 @@ static int __bch2_link(struct bch_fs *c, mutex_lock(&inode->ei_update_lock); bch2_trans_init(&trans, c, 4, 1024); - ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK, + ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0, bch2_link_trans(&trans, dir->v.i_ino, inode->v.i_ino, &dir_u, &inode_u, @@ -469,7 +466,6 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) bch2_trans_init(&trans, c, 4, 1024); ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq, - BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL, bch2_unlink_trans(&trans, dir->v.i_ino, &dir_u, @@ -590,8 +586,7 @@ static int bch2_rename2(struct user_namespace *mnt_userns, goto err; } - ret = __bch2_trans_do(&trans, NULL, &journal_seq, - BTREE_INSERT_NOUNLOCK, + ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0, bch2_rename_trans(&trans, src_dir->v.i_ino, &src_dir_u, dst_dir->v.i_ino, &dst_dir_u, @@ -734,7 +729,6 @@ retry: ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?: bch2_trans_commit(&trans, NULL, &inode->ei_journal_seq, - BTREE_INSERT_NOUNLOCK| BTREE_INSERT_NOFAIL); btree_err: bch2_trans_iter_put(&trans, inode_iter); @@ -909,6 +903,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(ei->v.i_ino, start >> 9), 0); retry: + bch2_trans_begin(&trans); + while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k)) && bkey_cmp(iter->pos, end) < 0) { diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index bedfd34..36eba46 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -727,7 +727,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, k = bch2_btree_iter_peek(iter); if (!k.k) - return 1; + return 0; ret = bkey_err(k); if (ret) @@ -803,8 +803,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = __write_inode(trans, &target, target_snapshot) ?: bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL| - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOUNLOCK); + BTREE_INSERT_LAZY_RW); if (ret) return ret; return -EINTR; @@ -904,19 +903,12 @@ static int check_dirents(struct bch_fs *c) BTREE_ITER_INTENT| BTREE_ITER_PREFETCH); - while (1) { + do { ret = lockrestart_do(&trans, check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs)); - if (ret == 1) { - /* at end */ - ret = 0; - break; - } if (ret) break; - - bch2_btree_iter_advance(iter); - } + } while (bch2_btree_iter_advance(iter)); bch2_trans_iter_put(&trans, iter); return bch2_trans_exit(&trans) ?: ret; diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 67983ff..25607b5 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -519,7 +519,7 @@ again: if (k.k->p.snapshot == snapshot && k.k->type != KEY_TYPE_inode && !bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) { - bch2_btree_iter_next(iter); + bch2_btree_iter_advance(iter); continue; } diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 092ece2..a59b291 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -235,8 +235,12 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans, * writing to, because i_size could be up to one block * less: */ - if (!bkey_cmp(old.k->p, new->k.p)) + if (!bkey_cmp(old.k->p, new->k.p)) { old = bch2_btree_iter_next(iter); + ret = bkey_err(old); + if (ret) + break; + } if (old.k && !bkey_err(old) && old.k->p.inode == extent_iter->pos.inode && @@ -362,14 +366,13 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k; int ret = 0, ret2 = 0; - while ((k = bch2_btree_iter_peek(iter)).k && + while ((bch2_trans_begin(trans), + (k = bch2_btree_iter_peek(iter)).k) && bkey_cmp(iter->pos, end) < 0) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; - bch2_trans_begin(trans); - ret = bkey_err(k); if (ret) goto btree_err; @@ -2270,12 +2273,13 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); -retry: - bch2_trans_begin(&trans); iter = bch2_trans_get_iter(&trans, BTREE_ID_extents, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS); +retry: + bch2_trans_begin(&trans); + while (1) { unsigned bytes, sectors, offset_into_extent; enum btree_id data_btree = BTREE_ID_extents; @@ -2331,19 +2335,20 @@ retry: swap(bvec_iter.bi_size, bytes); bio_advance_iter(&rbio->bio, &bvec_iter, bytes); } - bch2_trans_iter_put(&trans, iter); if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID) goto retry; + bch2_trans_iter_put(&trans, iter); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&sk, c); + if (ret) { bch_err_inum_ratelimited(c, inode, "read error %i from btree lookup", ret); rbio->bio.bi_status = BLK_STS_IOERR; bch2_rbio_done(rbio); } - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&sk, c); } void bch2_fs_io_exit(struct bch_fs *c) diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 80a54e1..ee0f155 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -84,7 +84,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bool extending = false, should_check_enospc; s64 i_sectors_delta = 0, disk_sectors_delta = 0; - bch2_trans_reset(&trans, 0); + bch2_trans_begin(&trans); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); @@ -191,7 +191,7 @@ nomatch: } atomic_long_inc(&c->extent_migrate_raced); trace_move_race(&new->k); - bch2_btree_iter_next_slot(iter); + bch2_btree_iter_advance(iter); goto next; } out: @@ -597,6 +597,8 @@ static int __bch2_move_data(struct bch_fs *c, } } while (delay); + bch2_trans_begin(&trans); + k = bch2_btree_iter_peek(iter); stats->pos = iter->pos; @@ -652,8 +654,7 @@ static int __bch2_move_data(struct bch_fs *c, data_cmd, data_opts); if (ret2) { if (ret2 == -EINTR) { - bch2_trans_reset(&trans, 0); - bch2_trans_cond_resched(&trans); + bch2_trans_begin(&trans); continue; } diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index ed50585..003c00f 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -178,6 +178,11 @@ enum opt_type { OPT_BOOL(), \ BCH_SB_INODES_USE_KEY_CACHE, true, \ NULL, "Use the btree key cache for the inodes btree") \ + x(btree_node_mem_ptr_optimization, u8, \ + OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + NO_SB_OPT, true, \ + NULL, "Stash pointer to in memory btree node in btree ptr")\ x(gc_reserve_percent, u8, \ OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_UINT(5, 21), \ diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c index 35b409e..7861781 100644 --- a/libbcachefs/quota.c +++ b/libbcachefs/quota.c @@ -760,7 +760,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, bkey_quota_init(&new_quota.k_i); new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); - ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK, + ret = bch2_trans_do(c, NULL, NULL, 0, bch2_set_quota_trans(&trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i)); diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index ebf3912..3d9c5c5 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -192,8 +192,9 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end) return k; } - bch2_btree_iter_set_pos(iter, end); - return bkey_s_c_null; + if (bkey_cmp(iter->pos, end) >= 0) + bch2_btree_iter_set_pos(iter, end); + return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } s64 bch2_remap_range(struct bch_fs *c, @@ -304,12 +305,12 @@ s64 bch2_remap_range(struct bch_fs *c, dst_done = dst_iter->pos.offset - dst_start.offset; new_i_size = min(dst_iter->pos.offset << 9, new_i_size); - bch2_trans_begin(&trans); - do { struct bch_inode_unpacked inode_u; struct btree_iter *inode_iter; + bch2_trans_begin(&trans); + inode_iter = bch2_inode_peek(&trans, &inode_u, dst_start.inode, BTREE_ITER_INTENT); ret2 = PTR_ERR_OR_ZERO(inode_iter); diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index 2ff8e5b..2360234 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -209,7 +209,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans, iter = bch2_trans_copy_iter(trans, start); - bch2_btree_iter_next_slot(iter); + bch2_btree_iter_advance(iter); for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) { if (k.k->type != desc.key_type && diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index 8bd7553..e4d400b 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -118,18 +118,16 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, le16_to_cpu(xattr.v->x_val_len)); } -int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, - const char *name, void *buffer, size_t size, int type) +static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info *inode, + const char *name, void *buffer, size_t size, int type) { - struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); - struct btree_trans trans; + struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode); struct btree_iter *iter; struct bkey_s_c_xattr xattr; + struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c, 0, 0); - - iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, &hash, + iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &hash, inode->v.i_ino, &X_SEARCH(type, name, strlen(name)), 0); @@ -137,7 +135,12 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, if (ret) goto err; - xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter)); + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto err; + + xattr = bkey_s_c_to_xattr(k); ret = le16_to_cpu(xattr.v->x_val_len); if (buffer) { if (ret > size) @@ -145,14 +148,18 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, else memcpy(buffer, xattr_val(xattr.v), ret); } - bch2_trans_iter_put(&trans, iter); + bch2_trans_iter_put(trans, iter); err: - bch2_trans_exit(&trans); - - BUG_ON(ret == -EINTR); return ret == -ENOENT ? -ENODATA : ret; } +int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, + const char *name, void *buffer, size_t size, int type) +{ + return bch2_trans_do(c, NULL, NULL, 0, + bch2_xattr_get_trans(&trans, inode, name, buffer, size, type)); +} + int bch2_xattr_set(struct btree_trans *trans, u64 inum, const struct bch_hash_info *hash_info, const char *name, const void *value, size_t size,