]> git.sesse.net Git - bcachefs-tools-debian/commitdiff
Update bcachefs sources to e48731a188 bcachefs: Fix BTREE_TRIGGER_WANTS_OLD_AND_NEW
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 13 Mar 2022 23:14:01 +0000 (19:14 -0400)
committerKent Overstreet <kent.overstreet@gmail.com>
Sun, 13 Mar 2022 23:20:59 +0000 (19:20 -0400)
36 files changed:
.bcachefs_revision
include/linux/zstd.h
include/linux/zstd_errors.h [new file with mode: 0644]
libbcachefs/alloc_background.h
libbcachefs/bkey_methods.h
libbcachefs/btree_io.c
libbcachefs/btree_iter.c
libbcachefs/btree_iter.h
libbcachefs/btree_key_cache.c
libbcachefs/btree_locking.h
libbcachefs/btree_types.h
libbcachefs/btree_update_leaf.c
libbcachefs/buckets.c
libbcachefs/buckets.h
libbcachefs/compress.c
libbcachefs/dirent.c
libbcachefs/ec.h
libbcachefs/error.h
libbcachefs/extents.h
libbcachefs/fs-io.c
libbcachefs/fs.c
libbcachefs/inode.c
libbcachefs/inode.h
libbcachefs/journal.c
libbcachefs/journal.h
libbcachefs/journal_reclaim.c
libbcachefs/journal_types.h
libbcachefs/recovery.c
libbcachefs/reflink.h
libbcachefs/str_hash.h
libbcachefs/sysfs.c
libbcachefs/tests.c
libbcachefs/util.h
libbcachefs/xattr.c
linux/zstd_compress_module.c [new file with mode: 0644]
linux/zstd_decompress_module.c [new file with mode: 0644]

index 8e1032accb68c1c3ecc486daf3f79cd4720660f8..74f5970f7a516fb9f45efdc43ec5497455261b08 100644 (file)
@@ -1 +1 @@
-e318fabeb424d4b8fdd46329125c30aaa4f9006a
+e48731a188639563444d475622782b7963df4b47
index 0dd1b0230293847cec379b60e8afe0e6a656d037..b0fa1eda5acb7c66fe2e83407b0fa0bf6c8865fd 100644 (file)
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd) and
+ * the GPLv2 (found in the COPYING file in the root directory of
+ * https://github.com/facebook/zstd). You may select, at your option, one of the
+ * above-listed licenses.
+ */
+
+#ifndef LINUX_ZSTD_H
+#define LINUX_ZSTD_H
+
+/**
+ * This is a kernel-style API that wraps the upstream zstd API, which cannot be
+ * used directly because the symbols aren't exported. It exposes the minimal
+ * functionality which is currently required by users of zstd in the kernel.
+ * Expose extra functions from lib/zstd/zstd.h as needed.
+ */
+
+/* ======   Dependency   ====== */
+#include <linux/types.h>
 #include <zstd.h>
+#include <linux/zstd_errors.h>
+
+/* ======   Helper Functions   ====== */
+/**
+ * zstd_compress_bound() - maximum compressed size in worst case scenario
+ * @src_size: The size of the data to compress.
+ *
+ * Return:    The maximum compressed size in the worst case scenario.
+ */
+size_t zstd_compress_bound(size_t src_size);
+
+/**
+ * zstd_is_error() - tells if a size_t function result is an error code
+ * @code:  The function result to check for error.
+ *
+ * Return: Non-zero iff the code is an error.
+ */
+unsigned int zstd_is_error(size_t code);
+
+/**
+ * enum zstd_error_code - zstd error codes
+ */
+typedef ZSTD_ErrorCode zstd_error_code;
+
+/**
+ * zstd_get_error_code() - translates an error function result to an error code
+ * @code:  The function result for which zstd_is_error(code) is true.
+ *
+ * Return: A unique error code for this error.
+ */
+zstd_error_code zstd_get_error_code(size_t code);
+
+/**
+ * zstd_get_error_name() - translates an error function result to a string
+ * @code:  The function result for which zstd_is_error(code) is true.
+ *
+ * Return: An error string corresponding to the error code.
+ */
+const char *zstd_get_error_name(size_t code);
+
+/**
+ * zstd_min_clevel() - minimum allowed compression level
+ *
+ * Return: The minimum allowed compression level.
+ */
+int zstd_min_clevel(void);
+
+/**
+ * zstd_max_clevel() - maximum allowed compression level
+ *
+ * Return: The maximum allowed compression level.
+ */
+int zstd_max_clevel(void);
+
+/* ======   Parameter Selection   ====== */
+
+/**
+ * enum zstd_strategy - zstd compression search strategy
+ *
+ * From faster to stronger. See zstd_lib.h.
+ */
+typedef ZSTD_strategy zstd_strategy;
+
+/**
+ * struct zstd_compression_parameters - zstd compression parameters
+ * @windowLog:    Log of the largest match distance. Larger means more
+ *                compression, and more memory needed during decompression.
+ * @chainLog:     Fully searched segment. Larger means more compression,
+ *                slower, and more memory (useless for fast).
+ * @hashLog:      Dispatch table. Larger means more compression,
+ *                slower, and more memory.
+ * @searchLog:    Number of searches. Larger means more compression and slower.
+ * @searchLength: Match length searched. Larger means faster decompression,
+ *                sometimes less compression.
+ * @targetLength: Acceptable match size for optimal parser (only). Larger means
+ *                more compression, and slower.
+ * @strategy:     The zstd compression strategy.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_compressionParameters zstd_compression_parameters;
+
+/**
+ * struct zstd_frame_parameters - zstd frame parameters
+ * @contentSizeFlag: Controls whether content size will be present in the
+ *                   frame header (when known).
+ * @checksumFlag:    Controls whether a 32-bit checksum is generated at the
+ *                   end of the frame for error detection.
+ * @noDictIDFlag:    Controls whether dictID will be saved into the frame
+ *                   header when using dictionary compression.
+ *
+ * The default value is all fields set to 0. See zstd_lib.h.
+ */
+typedef ZSTD_frameParameters zstd_frame_parameters;
+
+/**
+ * struct zstd_parameters - zstd parameters
+ * @cParams: The compression parameters.
+ * @fParams: The frame parameters.
+ */
+typedef ZSTD_parameters zstd_parameters;
+
+/**
+ * zstd_get_params() - returns zstd_parameters for selected level
+ * @level:              The compression level
+ * @estimated_src_size: The estimated source size to compress or 0
+ *                      if unknown.
+ *
+ * Return:              The selected zstd_parameters.
+ */
+zstd_parameters zstd_get_params(int level,
+       unsigned long long estimated_src_size);
+
+/* ======   Single-pass Compression   ====== */
+
+typedef ZSTD_CCtx zstd_cctx;
+
+/**
+ * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
+ * @parameters: The compression parameters to be used.
+ *
+ * If multiple compression parameters might be used, the caller must call
+ * zstd_cctx_workspace_bound() for each set of parameters and use the maximum
+ * size.
+ *
+ * Return:      A lower bound on the size of the workspace that is passed to
+ *              zstd_init_cctx().
+ */
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
+
+/**
+ * zstd_init_cctx() - initialize a zstd compression context
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
+ *                  determine how large the workspace must be.
+ *
+ * Return:          A zstd compression context or NULL on error.
+ */
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
+
+/**
+ * zstd_compress_cctx() - compress src into dst with the initialized parameters
+ * @cctx:         The context. Must have been initialized with zstd_init_cctx().
+ * @dst:          The buffer to compress src into.
+ * @dst_capacity: The size of the destination buffer. May be any size, but
+ *                ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:          The data to compress.
+ * @src_size:     The size of the data to compress.
+ * @parameters:   The compression parameters to be used.
+ *
+ * Return:        The compressed size or an error, which can be checked using
+ *                zstd_is_error().
+ */
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+       const void *src, size_t src_size, const zstd_parameters *parameters);
+
+/* ======   Single-pass Decompression   ====== */
+
+typedef ZSTD_DCtx zstd_dctx;
+
+/**
+ * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ *         zstd_init_dctx().
+ */
+size_t zstd_dctx_workspace_bound(void);
+
+/**
+ * zstd_init_dctx() - initialize a zstd decompression context
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
+ *                  determine how large the workspace must be.
+ *
+ * Return:          A zstd decompression context or NULL on error.
+ */
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
+
+/**
+ * zstd_decompress_dctx() - decompress zstd compressed src into dst
+ * @dctx:         The decompression context.
+ * @dst:          The buffer to decompress src into.
+ * @dst_capacity: The size of the destination buffer. Must be at least as large
+ *                as the decompressed size. If the caller cannot upper bound the
+ *                decompressed size, then it's better to use the streaming API.
+ * @src:          The zstd compressed data to decompress. Multiple concatenated
+ *                frames and skippable frames are allowed.
+ * @src_size:     The exact size of the data to decompress.
+ *
+ * Return:        The decompressed size or an error, which can be checked using
+ *                zstd_is_error().
+ */
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+       const void *src, size_t src_size);
+
+/* ======   Streaming Buffers   ====== */
+
+/**
+ * struct zstd_in_buffer - input buffer for streaming
+ * @src:  Start of the input buffer.
+ * @size: Size of the input buffer.
+ * @pos:  Position where reading stopped. Will be updated.
+ *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_inBuffer zstd_in_buffer;
+
+/**
+ * struct zstd_out_buffer - output buffer for streaming
+ * @dst:  Start of the output buffer.
+ * @size: Size of the output buffer.
+ * @pos:  Position where writing stopped. Will be updated.
+ *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_outBuffer zstd_out_buffer;
+
+/* ======   Streaming Compression   ====== */
+
+typedef ZSTD_CStream zstd_cstream;
+
+/**
+ * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
+ * @cparams: The compression parameters to be used for compression.
+ *
+ * Return:   A lower bound on the size of the workspace that is passed to
+ *           zstd_init_cstream().
+ */
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
+
+/**
+ * zstd_init_cstream() - initialize a zstd streaming compression context
+ * @parameters        The zstd parameters to use for compression.
+ * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
+ *                    must pass the source size (zero means empty source).
+ *                    Otherwise, the caller may optionally pass the source
+ *                    size, or zero if unknown.
+ * @workspace:        The workspace to emplace the context into. It must outlive
+ *                    the returned context.
+ * @workspace_size:   The size of workspace.
+ *                    Use zstd_cstream_workspace_bound(params->cparams) to
+ *                    determine how large the workspace must be.
+ *
+ * Return:            The zstd streaming compression context or NULL on error.
+ */
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+       unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
+
+/**
+ * zstd_reset_cstream() - reset the context using parameters from creation
+ * @cstream:          The zstd streaming compression context to reset.
+ * @pledged_src_size: Optionally the source size, or zero if unknown.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused. If `pledged_src_size` is non-zero the frame
+ * content size is always written into the frame header.
+ *
+ * Return:            Zero or an error, which can be checked using
+ *                    zstd_is_error().
+ */
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+       unsigned long long pledged_src_size);
+
+/**
+ * zstd_compress_stream() - streaming compress some of input into output
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
+ * @input:   Source buffer. `input->pos` is updated to indicate how much data
+ *           was read. Note that it may not consume the entire input, in which
+ *           case `input->pos < input->size`, and it's up to the caller to
+ *           present remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ *
+ * Return:   A hint for the number of bytes to use as the input for the next
+ *           function call or an error, which can be checked using
+ *           zstd_is_error().
+ */
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+       zstd_in_buffer *input);
+
+/**
+ * zstd_flush_stream() - flush internal buffers into output
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
+ *
+ * zstd_flush_stream() must be called until it returns 0, meaning all the data
+ * has been flushed. Since zstd_flush_stream() causes a block to be ended,
+ * calling it too often will degrade the compression ratio.
+ *
+ * Return:   The number of bytes still present within internal buffers or an
+ *           error, which can be checked using zstd_is_error().
+ */
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
+
+/**
+ * zstd_end_stream() - flush internal buffers into output and end the frame
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
+ *
+ * zstd_end_stream() must be called until it returns 0, meaning all the data has
+ * been flushed and the frame epilogue has been written.
+ *
+ * Return:   The number of bytes still present within internal buffers or an
+ *           error, which can be checked using zstd_is_error().
+ */
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
+
+/* ======   Streaming Decompression   ====== */
+
+typedef ZSTD_DStream zstd_dstream;
+
+/**
+ * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ *
+ * Return:           A lower bound on the size of the workspace that is passed
+ *                   to zstd_init_dstream().
+ */
+size_t zstd_dstream_workspace_bound(size_t max_window_size);
+
+/**
+ * zstd_init_dstream() - initialize a zstd streaming decompression context
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ * @workspace:       The workspace to emplace the context into. It must outlive
+ *                   the returned context.
+ * @workspaceSize:   The size of workspace.
+ *                   Use zstd_dstream_workspace_bound(max_window_size) to
+ *                   determine how large the workspace must be.
+ *
+ * Return:           The zstd streaming decompression context.
+ */
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+       size_t workspace_size);
+
+/**
+ * zstd_reset_dstream() - reset the context using parameters from creation
+ * @dstream: The zstd streaming decompression context to reset.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused.
+ *
+ * Return:   Zero or an error, which can be checked using zstd_is_error().
+ */
+size_t zstd_reset_dstream(zstd_dstream *dstream);
+
+/**
+ * zstd_decompress_stream() - streaming decompress some of input into output
+ * @dstream: The zstd streaming decompression context.
+ * @output:  Destination buffer. `output.pos` is updated to indicate how much
+ *           decompressed data was written.
+ * @input:   Source buffer. `input.pos` is updated to indicate how much data was
+ *           read. Note that it may not consume the entire input, in which case
+ *           `input.pos < input.size`, and it's up to the caller to present
+ *           remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ * zstd_decompress_stream() will not consume the last byte of the frame until
+ * the entire frame is flushed.
+ *
+ * Return:   Returns 0 iff a frame is completely decoded and fully flushed.
+ *           Otherwise returns a hint for the number of bytes to use as the
+ *           input for the next function call or an error, which can be checked
+ *           using zstd_is_error(). The size hint will never load more than the
+ *           frame.
+ */
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+       zstd_in_buffer *input);
+
+/* ======   Frame Inspection Functions ====== */
+
+/**
+ * zstd_find_frame_compressed_size() - returns the size of a compressed frame
+ * @src:      Source buffer. It should point to the start of a zstd encoded
+ *            frame or a skippable frame.
+ * @src_size: The size of the source buffer. It must be at least as large as the
+ *            size of the frame.
+ *
+ * Return:    The compressed size of the frame pointed to by `src` or an error,
+ *            which can be check with zstd_is_error().
+ *            Suitable to pass to ZSTD_decompress() or similar functions.
+ */
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
 
-#define ZSTD_initDCtx(w, s)    ZSTD_initStaticDCtx(w, s)
-#define ZSTD_initCCtx(w, s)    ZSTD_initStaticCCtx(w, s)
+/**
+ * struct zstd_frame_params - zstd frame parameters stored in the frame header
+ * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
+ *                    present.
+ * @windowSize:       The window size, or 0 if the frame is a skippable frame.
+ * @blockSizeMax:     The maximum block size.
+ * @frameType:        The frame type (zstd or skippable)
+ * @headerSize:       The size of the frame header.
+ * @dictID:           The dictionary id, or 0 if not present.
+ * @checksumFlag:     Whether a checksum was used.
+ *
+ * See zstd_lib.h.
+ */
+typedef ZSTD_frameHeader zstd_frame_header;
 
-#define ZSTD_compressCCtx(w, dst, d_len, src, src_len, params) \
-       ZSTD_compressCCtx(w, dst, d_len, src, src_len, 0)
+/**
+ * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
+ * @params:   On success the frame parameters are written here.
+ * @src:      The source buffer. It must point to a zstd or skippable frame.
+ * @src_size: The size of the source buffer.
+ *
+ * Return:    0 on success. If more data is required it returns how many bytes
+ *            must be provided to make forward progress. Otherwise it returns
+ *            an error, which can be checked using zstd_is_error().
+ */
+size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
+       size_t src_size);
 
-#define ZSTD_CCtxWorkspaceBound(p)     ZSTD_estimateCCtxSize(0)
-#define ZSTD_DCtxWorkspaceBound()      ZSTD_estimateDCtxSize()
+#endif  /* LINUX_ZSTD_H */
diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h
new file mode 100644 (file)
index 0000000..58b6dd4
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+
+/*===== dependency =====*/
+#include <linux/types.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#define ZSTDERRORLIB_VISIBILITY 
+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+
+/*-*********************************************
+ *  Error codes list
+ *-*********************************************
+ *  Error codes _values_ are pinned down since v1.3.1 only.
+ *  Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ *  Only values < 100 are considered stable.
+ *
+ *  note 1 : this API shall be used with static linking only.
+ *           dynamic linking is not yet officially supported.
+ *  note 2 : Prefer relying on the enum than on its value whenever possible
+ *           This is the only supported way to use the error list < v1.3.1
+ *  note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+  ZSTD_error_no_error = 0,
+  ZSTD_error_GENERIC  = 1,
+  ZSTD_error_prefix_unknown                = 10,
+  ZSTD_error_version_unsupported           = 12,
+  ZSTD_error_frameParameter_unsupported    = 14,
+  ZSTD_error_frameParameter_windowTooLarge = 16,
+  ZSTD_error_corruption_detected = 20,
+  ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_dictionary_corrupted      = 30,
+  ZSTD_error_dictionary_wrong          = 32,
+  ZSTD_error_dictionaryCreation_failed = 34,
+  ZSTD_error_parameter_unsupported   = 40,
+  ZSTD_error_parameter_outOfBound    = 42,
+  ZSTD_error_tableLog_tooLarge       = 44,
+  ZSTD_error_maxSymbolValue_tooLarge = 46,
+  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_stage_wrong       = 60,
+  ZSTD_error_init_missing      = 62,
+  ZSTD_error_memory_allocation = 64,
+  ZSTD_error_workSpace_tooSmall= 66,
+  ZSTD_error_dstSize_tooSmall = 70,
+  ZSTD_error_srcSize_wrong    = 72,
+  ZSTD_error_dstBuffer_null   = 74,
+  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+  ZSTD_error_frameIndex_tooLarge = 100,
+  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_srcBuffer_wrong     = 105,
+  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+
+#endif /* ZSTD_ERRORS_H_398273423 */
index 98c7866e20b57ded9f8d629d8427d5966f97bfb5..3eaa6d2042861f6ba46020ee0fa2f7d79140ea7e 100644 (file)
@@ -65,16 +65,19 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 #define bch2_bkey_ops_alloc (struct bkey_ops) {                \
        .key_invalid    = bch2_alloc_v1_invalid,        \
        .val_to_text    = bch2_alloc_to_text,           \
+       .atomic_trigger = bch2_mark_alloc,              \
 }
 
 #define bch2_bkey_ops_alloc_v2 (struct bkey_ops) {     \
        .key_invalid    = bch2_alloc_v2_invalid,        \
        .val_to_text    = bch2_alloc_to_text,           \
+       .atomic_trigger = bch2_mark_alloc,              \
 }
 
 #define bch2_bkey_ops_alloc_v3 (struct bkey_ops) {     \
        .key_invalid    = bch2_alloc_v3_invalid,        \
        .val_to_text    = bch2_alloc_to_text,           \
+       .atomic_trigger = bch2_mark_alloc,              \
 }
 
 static inline bool bkey_is_alloc(const struct bkey *k)
index 4fdac545cf88af8f2425f30477efd54bbf568d27..2289a09d98fc1cfd171b5ac38482d400f33e9c48 100644 (file)
@@ -6,6 +6,7 @@
 
 struct bch_fs;
 struct btree;
+struct btree_trans;
 struct bkey;
 enum btree_node_type;
 
@@ -20,6 +21,10 @@ struct bkey_ops {
        void            (*swab)(struct bkey_s);
        bool            (*key_normalize)(struct bch_fs *, struct bkey_s);
        bool            (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
+       int             (*trans_trigger)(struct btree_trans *, struct bkey_s_c,
+                                        struct bkey_i *, unsigned);
+       int             (*atomic_trigger)(struct btree_trans *, struct bkey_s_c,
+                                         struct bkey_s_c, unsigned);
        void            (*compat)(enum btree_id id, unsigned version,
                                  unsigned big_endian, int write,
                                  struct bkey_s);
@@ -57,6 +62,28 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b
 
 bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 
+static inline int bch2_mark_key(struct btree_trans *trans,
+                 struct bkey_s_c old,
+                 struct bkey_s_c new,
+                 unsigned flags)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new.k->type];
+
+       return ops->atomic_trigger
+               ? ops->atomic_trigger(trans, old, new, flags)
+               : 0;
+}
+
+static inline int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
+                       struct bkey_i *new, unsigned flags)
+{
+       const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new->k.type];
+
+       return ops->trans_trigger
+               ? ops->trans_trigger(trans, old, new, flags)
+               : 0;
+}
+
 void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
 
 void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned,
index 4f0ad06a615ab554f5806362f4a752b32dcd9902..e6cea4c687e125fa73afd55b2e3d172ab4f551c7 100644 (file)
@@ -1626,6 +1626,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
 
        if (new & (1U << BTREE_NODE_write_in_flight))
                __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED);
+       else
+               wake_up_bit(&b->flags, BTREE_NODE_write_in_flight);
 }
 
 static void btree_node_write_done(struct bch_fs *c, struct btree *b)
@@ -2094,7 +2096,6 @@ restart:
                        rcu_read_unlock();
                        wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
                        goto restart;
-
                }
        rcu_read_unlock();
 }
index 8186ee7e23ff7d0b263479630e961200af692a59..7fd0379689d935d245c9384a24d16000c87d0e4d 100644 (file)
@@ -189,7 +189,7 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
        if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
            (btree_node_lock_seq_matches(path, b, level) &&
             btree_node_lock_increment(trans, b, level, want))) {
-               mark_btree_node_locked(path, level, want);
+               mark_btree_node_locked(trans, path, level, want);
                return true;
        }
 fail:
@@ -240,7 +240,7 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
 
        return false;
 success:
-       mark_btree_node_intent_locked(path, level);
+       mark_btree_node_intent_locked(trans, path, level);
        return true;
 }
 
@@ -486,14 +486,15 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
         * before interior nodes - now that's handled by
         * bch2_btree_path_traverse_all().
         */
-       trans_for_each_path(trans, linked)
-               if (linked != path &&
-                   linked->cached == path->cached &&
-                   linked->btree_id == path->btree_id &&
-                   linked->locks_want < new_locks_want) {
-                       linked->locks_want = new_locks_want;
-                       btree_path_get_locks(trans, linked, true);
-               }
+       if (!path->cached && !trans->in_traverse_all)
+               trans_for_each_path(trans, linked)
+                       if (linked != path &&
+                           linked->cached == path->cached &&
+                           linked->btree_id == path->btree_id &&
+                           linked->locks_want < new_locks_want) {
+                               linked->locks_want = new_locks_want;
+                               btree_path_get_locks(trans, linked, true);
+                       }
 
        return false;
 }
@@ -1167,7 +1168,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
                            t != BTREE_NODE_UNLOCKED) {
                                btree_node_unlock(path, b->c.level);
                                six_lock_increment(&b->c.lock, t);
-                               mark_btree_node_locked(path, b->c.level, t);
+                               mark_btree_node_locked(trans, path, b->c.level, t);
                        }
 
                        btree_path_level_init(trans, path, b);
@@ -1244,7 +1245,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
                        for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
                                path->l[i].b = NULL;
 
-                       mark_btree_node_locked(path, path->level, lock_type);
+                       mark_btree_node_locked(trans, path, path->level, lock_type);
                        btree_path_level_init(trans, path, b);
                        return 0;
                }
@@ -1409,7 +1410,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
        if (unlikely(ret))
                goto err;
 
-       mark_btree_node_locked(path, level, lock_type);
+       mark_btree_node_locked(trans, path, level, lock_type);
        btree_path_level_init(trans, path, b);
 
        if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
@@ -1442,6 +1443,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
        trans->in_traverse_all = true;
 retry_all:
        trans->restarted = false;
+       trans->traverse_all_idx = U8_MAX;
 
        trans_for_each_path(trans, path)
                path->should_be_locked = false;
@@ -1474,9 +1476,9 @@ retry_all:
        }
 
        /* Now, redo traversals in correct order: */
-       i = 0;
-       while (i < trans->nr_sorted) {
-               path = trans->paths + trans->sorted[i];
+       trans->traverse_all_idx = 0;
+       while (trans->traverse_all_idx < trans->nr_sorted) {
+               path = trans->paths + trans->sorted[trans->traverse_all_idx];
 
                /*
                 * Traversing a path can cause another path to be added at about
@@ -1484,10 +1486,13 @@ retry_all:
                 */
                if (path->uptodate) {
                        ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
-                       if (ret)
+                       if (ret == -EINTR || ret == -ENOMEM)
                                goto retry_all;
+                       if (ret)
+                               goto err;
+                       BUG_ON(path->uptodate);
                } else {
-                       i++;
+                       trans->traverse_all_idx++;
                }
        }
 
@@ -1498,7 +1503,7 @@ retry_all:
         */
        trans_for_each_path(trans, path)
                BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE);
-
+err:
        bch2_btree_cache_cannibalize_unlock(c);
 
        trans->in_traverse_all = false;
@@ -1807,18 +1812,48 @@ free:
        __bch2_path_free(trans, path);
 }
 
+void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
+{
+       struct btree_insert_entry *i;
+
+       pr_buf(buf, "transaction updates for %s journal seq %llu\n",
+              trans->fn, trans->journal_res.seq);
+
+       trans_for_each_update(trans, i) {
+               struct bkey_s_c old = { &i->old_k, i->old_v };
+
+               pr_buf(buf, "update: btree %s %pS\n  old ",
+                      bch2_btree_ids[i->btree_id],
+                      (void *) i->ip_allocated);
+
+               bch2_bkey_val_to_text(buf, trans->c, old);
+               pr_buf(buf, "\n  new ");
+               bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k));
+               pr_buf(buf, "\n");
+       }
+}
+
+noinline __cold
+void bch2_dump_trans_updates(struct btree_trans *trans)
+{
+       struct printbuf buf = PRINTBUF;
+
+       bch2_trans_updates_to_text(&buf, trans);
+       bch_err(trans->c, "%s", buf.buf);
+       printbuf_exit(&buf);
+}
+
 noinline __cold
 void bch2_dump_trans_paths_updates(struct btree_trans *trans)
 {
        struct btree_path *path;
-       struct btree_insert_entry *i;
-       struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+       struct printbuf buf = PRINTBUF;
        unsigned idx;
 
        trans_for_each_path_inorder(trans, path, idx) {
-               printbuf_reset(&buf1);
+               printbuf_reset(&buf);
 
-               bch2_bpos_to_text(&buf1, path->pos);
+               bch2_bpos_to_text(&buf, path->pos);
 
                printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n",
                       path->idx, path->ref, path->intent_ref,
@@ -1826,7 +1861,7 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
                       path->preserve ? " P" : "",
                       bch2_btree_ids[path->btree_id],
                       path->level,
-                      buf1.buf,
+                      buf.buf,
                       path->nodes_locked,
 #ifdef CONFIG_BCACHEFS_DEBUG
                       (void *) path->ip_allocated
@@ -1836,23 +1871,9 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
                       );
        }
 
-       trans_for_each_update(trans, i) {
-               struct bkey u;
-               struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u);
-
-               printbuf_reset(&buf1);
-               printbuf_reset(&buf2);
-               bch2_bkey_val_to_text(&buf1, trans->c, old);
-               bch2_bkey_val_to_text(&buf2, trans->c, bkey_i_to_s_c(i->k));
+       printbuf_exit(&buf);
 
-               printk(KERN_ERR "update: btree %s %pS\n  old %s\n  new %s",
-                      bch2_btree_ids[i->btree_id],
-                      (void *) i->ip_allocated,
-                      buf1.buf, buf2.buf);
-       }
-
-       printbuf_exit(&buf2);
-       printbuf_exit(&buf1);
+       bch2_dump_trans_updates(trans);
 }
 
 static struct btree_path *btree_path_alloc(struct btree_trans *trans,
@@ -2337,11 +2358,12 @@ out:
  * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
  * current position
  */
-struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end)
 {
        struct btree_trans *trans = iter->trans;
        struct bpos search_key = btree_iter_search_key(iter);
        struct bkey_s_c k;
+       struct bpos iter_pos;
        int ret;
 
        if (iter->update_path) {
@@ -2357,6 +2379,24 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                if (!k.k || bkey_err(k))
                        goto out;
 
+               /*
+                * iter->pos should be mononotically increasing, and always be
+                * equal to the key we just returned - except extents can
+                * straddle iter->pos:
+                */
+               if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
+                       iter_pos = k.k->p;
+               else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+                       iter_pos = bkey_start_pos(k.k);
+               else
+                       iter_pos = iter->pos;
+
+               if (bkey_cmp(iter_pos, end) > 0) {
+                       bch2_btree_iter_set_pos(iter, end);
+                       k = bkey_s_c_null;
+                       goto out;
+               }
+
                if (iter->update_path &&
                    bkey_cmp(iter->update_path->pos, k.k->p)) {
                        bch2_path_put(trans, iter->update_path,
@@ -2387,10 +2427,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                        iter->update_path = bch2_btree_path_set_pos(trans,
                                                iter->update_path, pos,
                                                iter->flags & BTREE_ITER_INTENT,
-                                               btree_iter_ip_allocated(iter));
-
-                       BUG_ON(!(iter->update_path->nodes_locked & 1));
-                       iter->update_path->should_be_locked = true;
+                                               _THIS_IP_);
                }
 
                /*
@@ -2414,14 +2451,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
                break;
        }
 
-       /*
-        * iter->pos should be mononotically increasing, and always be equal to
-        * the key we just returned - except extents can straddle iter->pos:
-        */
-       if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
-               iter->pos = k.k->p;
-       else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
-               iter->pos = bkey_start_pos(k.k);
+       iter->pos = iter_pos;
 
        iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
                                iter->flags & BTREE_ITER_INTENT,
@@ -2429,8 +2459,13 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
        BUG_ON(!iter->path->nodes_locked);
 out:
        if (iter->update_path) {
-               BUG_ON(!(iter->update_path->nodes_locked & 1));
-               iter->update_path->should_be_locked = true;
+               if (iter->update_path->uptodate &&
+                   !bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)) {
+                       k = bkey_s_c_err(-EINTR);
+               } else {
+                       BUG_ON(!(iter->update_path->nodes_locked & 1));
+                       iter->update_path->should_be_locked = true;
+               }
        }
        iter->path->should_be_locked = true;
 
@@ -2661,9 +2696,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 
                if (iter->flags & BTREE_ITER_INTENT) {
                        struct btree_iter iter2;
+                       struct bpos end = iter->pos;
+
+                       if (iter->flags & BTREE_ITER_IS_EXTENTS)
+                               end.offset = U64_MAX;
 
                        bch2_trans_copy_iter(&iter2, iter);
-                       k = bch2_btree_iter_peek(&iter2);
+                       k = bch2_btree_iter_peek_upto(&iter2, end);
 
                        if (k.k && !bkey_err(k)) {
                                iter->k = iter2.k;
@@ -2831,6 +2870,11 @@ static inline void btree_path_list_add(struct btree_trans *trans,
 
        path->sorted_idx = pos ? pos->sorted_idx + 1 : 0;
 
+       if (trans->in_traverse_all &&
+           trans->traverse_all_idx != U8_MAX &&
+           trans->traverse_all_idx >= path->sorted_idx)
+               trans->traverse_all_idx++;
+
        array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx);
 
        for (i = path->sorted_idx; i < trans->nr_sorted; i++)
index d612aec915879e99bc3d5a6c826d8b832f9b1f1d..f6700295e1a7af5690b4987983c345236b7112c7 100644 (file)
@@ -209,9 +209,14 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *);
 struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
 struct btree *bch2_btree_iter_next_node(struct btree_iter *);
 
-struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
+struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
 struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
 
+static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
+{
+       return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
+}
+
 struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
 
@@ -306,13 +311,26 @@ static inline int bkey_err(struct bkey_s_c k)
 }
 
 static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
-                                                    unsigned flags)
+                                                       unsigned flags)
 {
        return flags & BTREE_ITER_SLOTS
                ? bch2_btree_iter_peek_slot(iter)
                : bch2_btree_iter_peek(iter);
 }
 
+static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
+                                                            struct bpos end,
+                                                            unsigned flags)
+{
+       if (!(flags & BTREE_ITER_SLOTS))
+               return bch2_btree_iter_peek_upto(iter, end);
+
+       if (bkey_cmp(iter->pos, end) > 0)
+               return bkey_s_c_null;
+
+       return bch2_btree_iter_peek_slot(iter);
+}
+
 static inline int btree_trans_too_many_iters(struct btree_trans *trans)
 {
        return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2
@@ -349,6 +367,14 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
             !((_ret) = bkey_err(_k)) && (_k).k;                        \
             bch2_btree_iter_advance(&(_iter)))
 
+#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id,    \
+                          _start, _end, _flags, _k, _ret)              \
+       for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id),      \
+                                 (_start), (_flags));                  \
+            (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
+            !((_ret) = bkey_err(_k)) && (_k).k;                        \
+            bch2_btree_iter_advance(&(_iter)))
+
 #define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret)   \
        for (;                                                          \
             (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
@@ -363,6 +389,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
 
 /* new multiple iterator interface: */
 
+void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
+void bch2_dump_trans_updates(struct btree_trans *);
 void bch2_dump_trans_paths_updates(struct btree_trans *);
 void __bch2_trans_init(struct btree_trans *, struct bch_fs *,
                       unsigned, size_t, const char *);
index ee89b650f6a40f594a32ddb561f73b61e4f59931..b1b7a30417bc4b2f56034a57dda2e449f13ccd85 100644 (file)
@@ -309,7 +309,7 @@ retry:
                if (!ck)
                        goto retry;
 
-               mark_btree_node_locked(path, 0, SIX_LOCK_intent);
+               mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
                path->locks_want = 1;
        } else {
                enum six_lock_type lock_want = __btree_lock_want(path, 0);
@@ -330,7 +330,7 @@ retry:
                        goto retry;
                }
 
-               mark_btree_node_locked(path, 0, lock_want);
+               mark_btree_node_locked(trans, path, 0, lock_want);
        }
 
        path->l[0].lock_seq     = ck->c.lock.state.seq;
index b4434eca0746c7635c10b291534f526661dc4cf6..67c970d727ac09089c96920084a58e5c132a69bf 100644 (file)
@@ -58,7 +58,8 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
        path->nodes_intent_locked &= ~(1 << level);
 }
 
-static inline void mark_btree_node_locked(struct btree_path *path,
+static inline void mark_btree_node_locked(struct btree_trans *trans,
+                                         struct btree_path *path,
                                          unsigned level,
                                          enum six_lock_type type)
 {
@@ -66,14 +67,17 @@ static inline void mark_btree_node_locked(struct btree_path *path,
        BUILD_BUG_ON(SIX_LOCK_read   != 0);
        BUILD_BUG_ON(SIX_LOCK_intent != 1);
 
+       BUG_ON(trans->in_traverse_all && path->sorted_idx > trans->traverse_all_idx);
+
        path->nodes_locked |= 1 << level;
        path->nodes_intent_locked |= type << level;
 }
 
-static inline void mark_btree_node_intent_locked(struct btree_path *path,
+static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
+                                                struct btree_path *path,
                                                 unsigned level)
 {
-       mark_btree_node_locked(path, level, SIX_LOCK_intent);
+       mark_btree_node_locked(trans, path, level, SIX_LOCK_intent);
 }
 
 static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
index c18e388ba24006906d821a66db2cdb947af8ca46..575635b5fa102187e27f81f36ac3e040d0b7ef67 100644 (file)
@@ -361,7 +361,11 @@ struct btree_insert_entry {
        unsigned long           ip_allocated;
 };
 
+#ifndef CONFIG_LOCKDEP
 #define BTREE_ITER_MAX         64
+#else
+#define BTREE_ITER_MAX         32
+#endif
 
 struct btree_trans_commit_hook;
 typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@@ -388,6 +392,7 @@ struct btree_trans {
 
        u8                      nr_sorted;
        u8                      nr_updates;
+       u8                      traverse_all_idx;
        bool                    used_mempool:1;
        bool                    in_traverse_all:1;
        bool                    restarted:1;
index ee978f33390e5ae28ec5ad0035b6a6ab2eca89e0..47623f3aa610e7d9dee7d9ed653dfc0bf1c09a92 100644 (file)
@@ -213,7 +213,7 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c,
 /**
  * btree_insert_key - insert a key one key into a leaf node
  */
-static bool btree_insert_key_leaf(struct btree_trans *trans,
+static void btree_insert_key_leaf(struct btree_trans *trans,
                                  struct btree_insert_entry *insert)
 {
        struct bch_fs *c = trans->c;
@@ -226,7 +226,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
 
        if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
                                        &insert_l(insert)->iter, insert->k)))
-               return false;
+               return;
 
        i->journal_seq = cpu_to_le64(max(trans->journal_res.seq,
                                         le64_to_cpu(i->journal_seq)));
@@ -247,8 +247,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
        if (u64s_added > live_u64s_added &&
            bch2_maybe_compact_whiteouts(c, b))
                bch2_trans_node_reinit_iter(trans, b);
-
-       return true;
 }
 
 /* Cached btree updates: */
@@ -400,18 +398,16 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct journal *j = &c->journal;
-       bool did_work;
 
        EBUG_ON(trans->journal_res.ref !=
                !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
 
        i->k->k.needs_whiteout = false;
 
-       did_work = !i->cached
-               ? btree_insert_key_leaf(trans, i)
-               : bch2_btree_insert_key_cached(trans, i->path, i->k);
-       if (!did_work)
-               return;
+       if (!i->cached)
+               btree_insert_key_leaf(trans, i);
+       else
+               bch2_btree_insert_key_cached(trans, i->path, i->k);
 
        if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
                bch2_journal_add_keys(j, &trans->journal_res,
@@ -440,7 +436,8 @@ static int run_one_mem_trigger(struct btree_trans *trans,
        if (!btree_node_type_needs_gc(i->btree_id))
                return 0;
 
-       if (old.k->type == new->k.type &&
+       if (bch2_bkey_ops[old.k->type].atomic_trigger ==
+           bch2_bkey_ops[i->k->k.type].atomic_trigger &&
            ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
                ret   = bch2_mark_key(trans, old, bkey_i_to_s_c(new),
                                BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
@@ -485,7 +482,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
 
        if (overwrite) {
                ret = bch2_trans_mark_old(trans, old, i->flags);
-       } else if (old.k->type == i->k->k.type &&
+       } else if (bch2_bkey_ops[old.k->type].trans_trigger ==
+                  bch2_bkey_ops[i->k->k.type].trans_trigger &&
            ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
                i->overwrite_trigger_run = true;
                ret = bch2_trans_mark_key(trans, old, i->k,
@@ -652,6 +650,32 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
 
                if (btree_node_type_needs_gc(i->bkey_type))
                        marking = true;
+
+               /*
+                * Revalidate before calling mem triggers - XXX, ugly:
+                *
+                * - successful btree node splits don't cause transaction
+                *   restarts and will have invalidated the pointer to the bkey
+                *   value
+                * - btree_node_lock_for_insert() -> btree_node_prep_for_write()
+                *   when it has to resort
+                * - btree_key_can_insert_cached() when it has to reallocate
+                *
+                *   Ugly because we currently have no way to tell if the
+                *   pointer's been invalidated, which means it's debatabale
+                *   whether we should be stashing the old key at all.
+                */
+               i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v;
+
+               if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))) {
+                       struct bkey_i *j_k =
+                               bch2_journal_keys_peek(c, i->btree_id, i->level, i->k->k.p);
+
+                       if (j_k && !bpos_cmp(j_k->k.p, i->k->k.p)) {
+                               i->old_k = j_k->k;
+                               i->old_v = &j_k->v;
+                       }
+               }
        }
 
        /*
@@ -1217,7 +1241,7 @@ int bch2_trans_update_extent(struct btree_trans *trans,
                             BTREE_ITER_INTENT|
                             BTREE_ITER_WITH_UPDATES|
                             BTREE_ITER_NOT_EXTENTS);
-       k = bch2_btree_iter_peek(&iter);
+       k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
        if ((ret = bkey_err(k)))
                goto err;
        if (!k.k)
@@ -1369,7 +1393,8 @@ nomerge1:
                        goto out;
                }
 next:
-               k = bch2_btree_iter_next(&iter);
+               bch2_btree_iter_advance(&iter);
+               k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX));
                if ((ret = bkey_err(k)))
                        goto err;
                if (!k.k)
@@ -1629,14 +1654,14 @@ int bch2_btree_delete_at(struct btree_trans *trans,
 
 int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
                                  struct bpos start, struct bpos end,
-                                 unsigned iter_flags,
+                                 unsigned update_flags,
                                  u64 *journal_seq)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret = 0;
 
-       bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT|iter_flags);
+       bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
 retry:
        while ((bch2_trans_begin(trans),
               (k = bch2_btree_iter_peek(&iter)).k) &&
@@ -1679,7 +1704,8 @@ retry:
 
                ret   = bch2_trans_update(trans, &iter, &delete, 0) ?:
                        bch2_trans_commit(trans, &disk_res, journal_seq,
-                                       BTREE_INSERT_NOFAIL);
+                                         BTREE_INSERT_NOFAIL|
+                                         update_flags);
                bch2_disk_reservation_put(trans->c, &disk_res);
                if (ret)
                        break;
@@ -1701,10 +1727,10 @@ retry:
  */
 int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
                            struct bpos start, struct bpos end,
-                           unsigned iter_flags,
+                           unsigned update_flags,
                            u64 *journal_seq)
 {
        return bch2_trans_do(c, NULL, journal_seq, 0,
                             bch2_btree_delete_range_trans(&trans, id, start, end,
-                                                          iter_flags, journal_seq));
+                                                          update_flags, journal_seq));
 }
index 2c3b71b2f04ed2300bb65752b31a20f743ab2be9..d52263759ee586d0d8c6e6a65ce6c062474bc3bb 100644 (file)
@@ -497,20 +497,25 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
        BUG_ON(owned_by_allocator == old.owned_by_allocator);
 }
 
-static int bch2_mark_alloc(struct btree_trans *trans,
-                          struct bkey_s_c old, struct bkey_s_c new,
-                          unsigned flags)
+int bch2_mark_alloc(struct btree_trans *trans,
+                   struct bkey_s_c old, struct bkey_s_c new,
+                   unsigned flags)
 {
        bool gc = flags & BTREE_TRIGGER_GC;
        u64 journal_seq = trans->journal_res.seq;
        struct bch_fs *c = trans->c;
        struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old);
        struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new);
-       struct bch_dev *ca;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev);
        struct bucket *g;
        struct bucket_mark old_m, m;
        int ret = 0;
 
+       if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket ||
+                                      new_u.bucket >= ca->mi.nbuckets, trans,
+                                      "alloc key outside range of device's buckets"))
+               return -EIO;
+
        /*
         * alloc btree is read in by bch2_alloc_read, not gc:
         */
@@ -550,11 +555,6 @@ static int bch2_mark_alloc(struct btree_trans *trans,
                }
        }
 
-       ca = bch_dev_bkey_exists(c, new_u.dev);
-
-       if (new_u.bucket >= ca->mi.nbuckets)
-               return 0;
-
        percpu_down_read(&c->mark_lock);
        if (!gc && new_u.gen != old_u.gen)
                *bucket_gen(ca, new_u.bucket) = new_u.gen;
@@ -929,9 +929,9 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans,
        return 0;
 }
 
-static int bch2_mark_extent(struct btree_trans *trans,
-                           struct bkey_s_c old, struct bkey_s_c new,
-                           unsigned flags)
+int bch2_mark_extent(struct btree_trans *trans,
+                    struct bkey_s_c old, struct bkey_s_c new,
+                    unsigned flags)
 {
        u64 journal_seq = trans->journal_res.seq;
        struct bch_fs *c = trans->c;
@@ -1011,9 +1011,9 @@ static int bch2_mark_extent(struct btree_trans *trans,
        return 0;
 }
 
-static int bch2_mark_stripe(struct btree_trans *trans,
-                           struct bkey_s_c old, struct bkey_s_c new,
-                           unsigned flags)
+int bch2_mark_stripe(struct btree_trans *trans,
+                    struct bkey_s_c old, struct bkey_s_c new,
+                    unsigned flags)
 {
        bool gc = flags & BTREE_TRIGGER_GC;
        u64 journal_seq = trans->journal_res.seq;
@@ -1118,9 +1118,9 @@ static int bch2_mark_stripe(struct btree_trans *trans,
        return 0;
 }
 
-static int bch2_mark_inode(struct btree_trans *trans,
-                          struct bkey_s_c old, struct bkey_s_c new,
-                          unsigned flags)
+int bch2_mark_inode(struct btree_trans *trans,
+                   struct bkey_s_c old, struct bkey_s_c new,
+                   unsigned flags)
 {
        struct bch_fs *c = trans->c;
        struct bch_fs_usage __percpu *fs_usage;
@@ -1149,9 +1149,9 @@ static int bch2_mark_inode(struct btree_trans *trans,
        return 0;
 }
 
-static int bch2_mark_reservation(struct btree_trans *trans,
-                                struct bkey_s_c old, struct bkey_s_c new,
-                                unsigned flags)
+int bch2_mark_reservation(struct btree_trans *trans,
+                         struct bkey_s_c old, struct bkey_s_c new,
+                         unsigned flags)
 {
        struct bch_fs *c = trans->c;
        struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
@@ -1228,9 +1228,9 @@ fsck_err:
        return ret;
 }
 
-static int bch2_mark_reflink_p(struct btree_trans *trans,
-                              struct bkey_s_c old, struct bkey_s_c new,
-                              unsigned flags)
+int bch2_mark_reflink_p(struct btree_trans *trans,
+                       struct bkey_s_c old, struct bkey_s_c new,
+                       unsigned flags)
 {
        struct bch_fs *c = trans->c;
        struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
@@ -1267,39 +1267,6 @@ static int bch2_mark_reflink_p(struct btree_trans *trans,
        return ret;
 }
 
-int bch2_mark_key(struct btree_trans *trans,
-                 struct bkey_s_c old,
-                 struct bkey_s_c new,
-                 unsigned flags)
-{
-       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
-
-       switch (k.k->type) {
-       case KEY_TYPE_alloc:
-       case KEY_TYPE_alloc_v2:
-       case KEY_TYPE_alloc_v3:
-               return bch2_mark_alloc(trans, old, new, flags);
-       case KEY_TYPE_btree_ptr:
-       case KEY_TYPE_btree_ptr_v2:
-       case KEY_TYPE_extent:
-       case KEY_TYPE_reflink_v:
-               return bch2_mark_extent(trans, old, new, flags);
-       case KEY_TYPE_stripe:
-               return bch2_mark_stripe(trans, old, new, flags);
-       case KEY_TYPE_inode:
-       case KEY_TYPE_inode_v2:
-               return bch2_mark_inode(trans, old, new, flags);
-       case KEY_TYPE_reservation:
-               return bch2_mark_reservation(trans, old, new, flags);
-       case KEY_TYPE_reflink_p:
-               return bch2_mark_reflink_p(trans, old, new, flags);
-       case KEY_TYPE_snapshot:
-               return bch2_mark_snapshot(trans, old, new, flags);
-       default:
-               return 0;
-       }
-}
-
 static noinline __cold
 void fs_usage_apply_warn(struct btree_trans *trans,
                         unsigned disk_res_sectors,
@@ -1462,7 +1429,6 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                        struct extent_ptr_decoded p,
                        s64 sectors, enum bch_data_type data_type)
 {
-       struct bch_fs *c = trans->c;
        struct btree_iter iter;
        struct bkey_s_c k;
        struct bkey_i_stripe *s;
@@ -1478,16 +1444,15 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                goto err;
 
        if (k.k->type != KEY_TYPE_stripe) {
-               bch2_fs_inconsistent(c,
+               bch2_trans_inconsistent(trans,
                        "pointer to nonexistent stripe %llu",
                        (u64) p.ec.idx);
-               bch2_inconsistent_error(c);
                ret = -EIO;
                goto err;
        }
 
        if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) {
-               bch2_fs_inconsistent(c,
+               bch2_trans_inconsistent(trans,
                        "stripe pointer doesn't match stripe %llu",
                        (u64) p.ec.idx);
                ret = -EIO;
@@ -1516,10 +1481,14 @@ err:
        return ret;
 }
 
-static int bch2_trans_mark_extent(struct btree_trans *trans,
-                       struct bkey_s_c k, unsigned flags)
+int bch2_trans_mark_extent(struct btree_trans *trans,
+                          struct bkey_s_c old, struct bkey_i *new,
+                          unsigned flags)
 {
        struct bch_fs *c = trans->c;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
+               ? old
+               : bkey_i_to_s_c(new);
        struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
        const union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
@@ -1601,8 +1570,8 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
                goto err;
 
        if (!deleting) {
-               if (bch2_fs_inconsistent_on(u.stripe ||
-                                           u.stripe_redundancy, c,
+               if (bch2_trans_inconsistent_on(u.stripe ||
+                                           u.stripe_redundancy, trans,
                                "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
                                iter.pos.inode, iter.pos.offset, u.gen,
                                bch2_data_types[u.data_type],
@@ -1612,7 +1581,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
                        goto err;
                }
 
-               if (bch2_fs_inconsistent_on(data_type && u.dirty_sectors, c,
+               if (bch2_trans_inconsistent_on(data_type && u.dirty_sectors, trans,
                                "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
                                iter.pos.inode, iter.pos.offset, u.gen,
                                bch2_data_types[u.data_type],
@@ -1625,8 +1594,8 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
                u.stripe                = s.k->p.offset;
                u.stripe_redundancy     = s.v->nr_redundant;
        } else {
-               if (bch2_fs_inconsistent_on(u.stripe != s.k->p.offset ||
-                                           u.stripe_redundancy != s.v->nr_redundant, c,
+               if (bch2_trans_inconsistent_on(u.stripe != s.k->p.offset ||
+                                           u.stripe_redundancy != s.v->nr_redundant, trans,
                                "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
                                iter.pos.inode, iter.pos.offset, u.gen,
                                s.k->p.offset, u.stripe)) {
@@ -1650,9 +1619,9 @@ err:
        return ret;
 }
 
-static int bch2_trans_mark_stripe(struct btree_trans *trans,
-                                 struct bkey_s_c old, struct bkey_i *new,
-                                 unsigned flags)
+int bch2_trans_mark_stripe(struct btree_trans *trans,
+                          struct bkey_s_c old, struct bkey_i *new,
+                          unsigned flags)
 {
        const struct bch_stripe *old_s = NULL;
        struct bch_stripe *new_s = NULL;
@@ -1720,10 +1689,10 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
        return ret;
 }
 
-static int bch2_trans_mark_inode(struct btree_trans *trans,
-                                struct bkey_s_c old,
-                                struct bkey_i *new,
-                                unsigned flags)
+int bch2_trans_mark_inode(struct btree_trans *trans,
+                         struct bkey_s_c old,
+                         struct bkey_i *new,
+                         unsigned flags)
 {
        int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k);
 
@@ -1736,9 +1705,14 @@ static int bch2_trans_mark_inode(struct btree_trans *trans,
        return 0;
 }
 
-static int bch2_trans_mark_reservation(struct btree_trans *trans,
-                                      struct bkey_s_c k, unsigned flags)
+int bch2_trans_mark_reservation(struct btree_trans *trans,
+                               struct bkey_s_c old,
+                               struct bkey_i *new,
+                               unsigned flags)
 {
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
+               ? old
+               : bkey_i_to_s_c(new);
        unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
        s64 sectors = (s64) k.k->size;
        struct replicas_delta_list *d;
@@ -1787,7 +1761,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
        refcount = bkey_refcount(n);
        if (!refcount) {
                bch2_bkey_val_to_text(&buf, c, p.s_c);
-               bch2_fs_inconsistent(c,
+               bch2_trans_inconsistent(trans,
                        "nonexistent indirect extent at %llu while marking\n  %s",
                        *idx, buf.buf);
                ret = -EIO;
@@ -1796,7 +1770,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
 
        if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
                bch2_bkey_val_to_text(&buf, c, p.s_c);
-               bch2_fs_inconsistent(c,
+               bch2_trans_inconsistent(trans,
                        "indirect extent refcount underflow at %llu while marking\n  %s",
                        *idx, buf.buf);
                ret = -EIO;
@@ -1837,9 +1811,14 @@ err:
        return ret;
 }
 
-static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
-                                    struct bkey_s_c k, unsigned flags)
+int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+                             struct bkey_s_c old,
+                             struct bkey_i *new,
+                             unsigned flags)
 {
+       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
+               ? old
+               : bkey_i_to_s_c(new);
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
        u64 idx, end_idx;
        int ret = 0;
@@ -1860,33 +1839,6 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
        return ret;
 }
 
-int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old,
-                       struct bkey_i *new, unsigned flags)
-{
-       struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE
-               ? old
-               : bkey_i_to_s_c(new);
-
-       switch (k.k->type) {
-       case KEY_TYPE_btree_ptr:
-       case KEY_TYPE_btree_ptr_v2:
-       case KEY_TYPE_extent:
-       case KEY_TYPE_reflink_v:
-               return bch2_trans_mark_extent(trans, k, flags);
-       case KEY_TYPE_stripe:
-               return bch2_trans_mark_stripe(trans, old, new, flags);
-       case KEY_TYPE_inode:
-       case KEY_TYPE_inode_v2:
-               return bch2_trans_mark_inode(trans, old, new, flags);
-       case KEY_TYPE_reservation:
-               return bch2_trans_mark_reservation(trans, k, flags);
-       case KEY_TYPE_reflink_p:
-               return bch2_trans_mark_reflink_p(trans, k, flags);
-       default:
-               return 0;
-       }
-}
-
 static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
                                    struct bch_dev *ca, size_t b,
                                    enum bch_data_type type,
index daf79a4f91289c7934d1a034756606d2269519d3..392e03d4c319e8039bf30c4eea4896c1f047b0e6 100644 (file)
@@ -229,6 +229,19 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
                               size_t, enum bch_data_type, unsigned,
                               struct gc_pos, unsigned);
 
+int bch2_mark_alloc(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
+
+int bch2_trans_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trans_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trans_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trans_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trans_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned);
+
 int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned);
 
 int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
index 8e4179d8dc2764d883916cbca64d4a11deb98d4a..7d9ebcc9a4457bcf28a567e4fd28552c7f3d6e9d 100644 (file)
@@ -197,9 +197,11 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
                        goto err;
 
                workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
-               ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
+               ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
 
-               ret = ZSTD_decompressDCtx(ctx,
+               src_len = le32_to_cpup(src_data.b);
+
+               ret = zstd_decompress_dctx(ctx,
                                dst_data,       dst_len,
                                src_data.b + 4, real_src_len);
 
@@ -333,8 +335,8 @@ static int attempt_compress(struct bch_fs *c,
                return strm.total_out;
        }
        case BCH_COMPRESSION_TYPE_zstd: {
-               ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
-                       ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
+               ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
+                       zstd_cctx_workspace_bound(&c->zstd_params.cParams));
 
                /*
                 * ZSTD requires that when we decompress we pass in the exact
@@ -347,11 +349,11 @@ static int attempt_compress(struct bch_fs *c,
                 * factor (7 bytes) from the dst buffer size to account for
                 * that.
                 */
-               size_t len = ZSTD_compressCCtx(ctx,
+               size_t len = zstd_compress_cctx(ctx,
                                dst + 4,        dst_len - 4 - 7,
                                src,            src_len,
-                               c->zstd_params);
-               if (ZSTD_isError(len))
+                               &c->zstd_params);
+               if (zstd_is_error(len))
                        return 0;
 
                *((__le32 *) dst) = cpu_to_le32(len);
@@ -546,7 +548,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
 {
        size_t decompress_workspace_size = 0;
        bool decompress_workspace_needed;
-       ZSTD_parameters params = ZSTD_getParams(0, c->opts.encoded_extent_max, 0);
+       ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max);
        struct {
                unsigned        feature;
                unsigned        type;
@@ -558,8 +560,8 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
                        zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
                        zlib_inflate_workspacesize(), },
                { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
-                       ZSTD_CCtxWorkspaceBound(params.cParams),
-                       ZSTD_DCtxWorkspaceBound() },
+                       zstd_cctx_workspace_bound(&params.cParams),
+                       zstd_dctx_workspace_bound() },
        }, *i;
        int ret = 0;
 
index a43a24409d37d627c8f0214acb87157519717de9..760e4f74715feb62459c41c4a054a4f6dd87aa2c 100644 (file)
@@ -470,16 +470,13 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
        if (ret)
                return ret;
 
-       for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents,
-                          SPOS(dir.inum, 0, snapshot), 0, k, ret) {
-               if (k.k->p.inode > dir.inum)
-                       break;
-
+       for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents,
+                          SPOS(dir.inum, 0, snapshot),
+                          POS(dir.inum, U64_MAX), 0, k, ret)
                if (k.k->type == KEY_TYPE_dirent) {
                        ret = -ENOTEMPTY;
                        break;
                }
-       }
        bch2_trans_iter_exit(trans, &iter);
 
        return ret;
@@ -503,11 +500,9 @@ retry:
        if (ret)
                goto err;
 
-       for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents,
-                          SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) {
-               if (k.k->p.inode > inum.inum)
-                       break;
-
+       for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents,
+                          SPOS(inum.inum, ctx->pos, snapshot),
+                          POS(inum.inum, U64_MAX), 0, k, ret) {
                if (k.k->type != KEY_TYPE_dirent)
                        continue;
 
index 78d468c7680a2f167070297392dbcabba1204f95..9d508a2f3bbcbd813160565211ad8dfe99ebfa2f 100644 (file)
@@ -14,6 +14,8 @@ void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
        .key_invalid    = bch2_stripe_invalid,          \
        .val_to_text    = bch2_stripe_to_text,          \
        .swab           = bch2_ptr_swab,                \
+       .trans_trigger  = bch2_trans_mark_stripe,       \
+       .atomic_trigger = bch2_mark_stripe,             \
 }
 
 static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
index 4ab3cfe1292c744ec77a11d48793336300197f46..6e63c38186f30167a88ec6590a3f0395e4023fdf 100644 (file)
@@ -66,6 +66,26 @@ do {                                                                 \
        _ret;                                                           \
 })
 
+/*
+ * When a transaction update discovers or is causing a fs inconsistency, it's
+ * helpful to also dump the pending updates:
+ */
+#define bch2_trans_inconsistent(trans, ...)                            \
+({                                                                     \
+       bch_err(trans->c, __VA_ARGS__);                                 \
+       bch2_inconsistent_error(trans->c);                              \
+       bch2_dump_trans_updates(trans);                                 \
+})
+
+#define bch2_trans_inconsistent_on(cond, trans, ...)                   \
+({                                                                     \
+       bool _ret = unlikely(!!(cond));                                 \
+                                                                       \
+       if (_ret)                                                       \
+               bch2_trans_inconsistent(trans, __VA_ARGS__);            \
+       _ret;                                                           \
+})
+
 /*
  * Fsck errors: inconsistency errors we detect at mount time, and should ideally
  * be able to repair:
index 9c2567274a2b8d286707d6b1b3594b3d04007ac3..ae650849d98a9c51f4cbf66c2d0b19aa7c1fad78 100644 (file)
@@ -381,6 +381,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
        .key_invalid    = bch2_btree_ptr_invalid,               \
        .val_to_text    = bch2_btree_ptr_to_text,               \
        .swab           = bch2_ptr_swab,                        \
+       .trans_trigger  = bch2_trans_mark_extent,               \
+       .atomic_trigger = bch2_mark_extent,                     \
 }
 
 #define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) {         \
@@ -388,6 +390,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
        .val_to_text    = bch2_btree_ptr_v2_to_text,            \
        .swab           = bch2_ptr_swab,                        \
        .compat         = bch2_btree_ptr_v2_compat,             \
+       .trans_trigger  = bch2_trans_mark_extent,               \
+       .atomic_trigger = bch2_mark_extent,                     \
 }
 
 /* KEY_TYPE_extent: */
@@ -402,6 +406,8 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
        .swab           = bch2_ptr_swab,                        \
        .key_normalize  = bch2_extent_normalize,                \
        .key_merge      = bch2_extent_merge,                    \
+       .trans_trigger  = bch2_trans_mark_extent,               \
+       .atomic_trigger = bch2_mark_extent,                     \
 }
 
 /* KEY_TYPE_reservation: */
@@ -414,6 +420,8 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
        .key_invalid    = bch2_reservation_invalid,             \
        .val_to_text    = bch2_reservation_to_text,             \
        .key_merge      = bch2_reservation_merge,               \
+       .trans_trigger  = bch2_trans_mark_reservation,          \
+       .atomic_trigger = bch2_mark_reservation,                \
 }
 
 /* Extent checksum entries: */
index 1d0871f63e4e71402874373a982262d74b00c24c..b05d6e896f034935c7dbf2f13c61b0154108d038 100644 (file)
 #include <trace/events/bcachefs.h>
 #include <trace/events/writeback.h>
 
+static inline bool bio_full(struct bio *bio, unsigned len)
+{
+       if (bio->bi_vcnt >= bio->bi_max_vecs)
+               return true;
+       if (bio->bi_iter.bi_size > UINT_MAX - len)
+               return true;
+       return false;
+}
+
 static inline struct address_space *faults_disabled_mapping(void)
 {
        return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL);
@@ -1808,11 +1817,11 @@ again:
                 * to check that the address is actually valid, when atomic
                 * usercopies are used, below.
                 */
-               if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
+               if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
                        bytes = min_t(unsigned long, iov_iter_count(iter),
                                      PAGE_SIZE - offset);
 
-                       if (unlikely(iov_iter_fault_in_readable(iter, bytes))) {
+                       if (unlikely(fault_in_iov_iter_readable(iter, bytes))) {
                                ret = -EFAULT;
                                break;
                        }
@@ -1870,7 +1879,7 @@ static void bch2_dio_read_complete(struct closure *cl)
 {
        struct dio_read *dio = container_of(cl, struct dio_read, cl);
 
-       dio->req->ki_complete(dio->req, dio->ret, 0);
+       dio->req->ki_complete(dio->req, dio->ret);
        bio_check_or_release(&dio->rbio.bio, dio->should_dirty);
 }
 
@@ -1919,7 +1928,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
        iter->count -= shorten;
 
        bio = bio_alloc_bioset(GFP_KERNEL,
-                              iov_iter_npages(iter, BIO_MAX_VECS),
+                              bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
                               &c->dio_read_bioset);
 
        bio->bi_end_io = bch2_direct_IO_read_endio;
@@ -1954,7 +1963,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter)
        goto start;
        while (iter->count) {
                bio = bio_alloc_bioset(GFP_KERNEL,
-                                      iov_iter_npages(iter, BIO_MAX_VECS),
+                                      bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
                                       &c->bio_read);
                bio->bi_end_io          = bch2_direct_IO_read_split_endio;
 start:
@@ -2101,7 +2110,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
        while (1) {
                iter_count = dio->iter.count;
 
-               if (kthread)
+               if (kthread && dio->mm)
                        kthread_use_mm(dio->mm);
                BUG_ON(current->faults_disabled_mapping);
                current->faults_disabled_mapping = mapping;
@@ -2111,7 +2120,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
                dropped_locks = fdm_dropped_locks();
 
                current->faults_disabled_mapping = NULL;
-               if (kthread)
+               if (kthread && dio->mm)
                        kthread_unuse_mm(dio->mm);
 
                /*
@@ -2244,7 +2253,7 @@ err:
        inode_dio_end(&inode->v);
 
        if (!sync) {
-               req->ki_complete(req, ret, 0);
+               req->ki_complete(req, ret);
                ret = -EIOCBQUEUED;
        }
        return ret;
@@ -2304,9 +2313,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
        }
 
        bio = bio_alloc_bioset(GFP_KERNEL,
-                              iov_iter_is_bvec(iter)
-                              ? 0
-                              : iov_iter_npages(iter, BIO_MAX_VECS),
+                              bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
                               &c->dio_write_bioset);
        dio = container_of(bio, struct dio_write, op.wbio.bio);
        init_completion(&dio->done);
index 30720c14778dae245ae35d2d0676e26f6429974b..9fc6c39eacdb6fb244efba8317dd2ceaec61abae 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/pagemap.h>
 #include <linux/posix_acl.h>
 #include <linux/random.h>
+#include <linux/seq_file.h>
 #include <linux/statfs.h>
 #include <linux/string.h>
 #include <linux/xattr.h>
@@ -934,9 +935,8 @@ retry:
                             SPOS(ei->v.i_ino, start, snapshot), 0);
 
        while (!(ret = btree_trans_too_many_iters(&trans)) &&
-              (k = bch2_btree_iter_peek(&iter)).k &&
-              !(ret = bkey_err(k)) &&
-              bkey_cmp(iter.pos, end) < 0) {
+              (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
+              !(ret = bkey_err(k))) {
                enum btree_id data_btree = BTREE_ID_extents;
 
                if (!bkey_extent_is_data(k.k) &&
index 78e2db6c938b8791aa1c3b52144a156c8973f616..14b0b595202d789163abe746e2670a45c27831a0 100644 (file)
@@ -606,12 +606,12 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
 
                bch2_btree_iter_set_snapshot(&iter, snapshot);
 
-               k = bch2_btree_iter_peek(&iter);
+               k = bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX));
                ret = bkey_err(k);
                if (ret)
                        goto err;
 
-               if (!k.k || iter.pos.inode != inum.inum)
+               if (!k.k)
                        break;
 
                bkey_init(&delete.k);
index 77957cc7f9dda3eac49a9bd435969c72184c6545..2337ecfc600ea7ac0fa00d69c1545fa722f59a4f 100644 (file)
@@ -13,11 +13,15 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
 #define bch2_bkey_ops_inode (struct bkey_ops) {                \
        .key_invalid    = bch2_inode_invalid,           \
        .val_to_text    = bch2_inode_to_text,           \
+       .trans_trigger  = bch2_trans_mark_inode,        \
+       .atomic_trigger = bch2_mark_inode,              \
 }
 
 #define bch2_bkey_ops_inode_v2 (struct bkey_ops) {     \
        .key_invalid    = bch2_inode_v2_invalid,        \
        .val_to_text    = bch2_inode_to_text,           \
+       .trans_trigger  = bch2_trans_mark_inode,        \
+       .atomic_trigger = bch2_mark_inode,              \
 }
 
 static inline bool bkey_is_inode(const struct bkey *k)
index ded4b6800d4c4c13f58d29556f41a3e7f68dfbbc..eb556ecc511f987f0e1511c53148aa0c31652b20 100644 (file)
@@ -241,6 +241,9 @@ static int journal_entry_open(struct journal *j)
        if (u64s <= 0)
                return cur_entry_journal_full;
 
+       if (fifo_empty(&j->pin) && j->reclaim_thread)
+               wake_up_process(j->reclaim_thread);
+
        /*
         * The fifo_push() needs to happen at the same time as j->seq is
         * incremented for journal_last_seq() to be calculated correctly
@@ -628,31 +631,6 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq)
        return ret ?: ret2 < 0 ? ret2 : 0;
 }
 
-int bch2_journal_meta(struct journal *j)
-{
-       struct journal_buf *buf;
-       struct journal_res res;
-       int ret;
-
-       memset(&res, 0, sizeof(res));
-
-       ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
-       if (ret)
-               return ret;
-
-       buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
-       buf->must_flush = true;
-
-       if (!buf->flush_time) {
-               buf->flush_time = local_clock() ?: 1;
-               buf->expires = jiffies;
-       }
-
-       bch2_journal_res_put(j, &res);
-
-       return bch2_journal_flush_seq(j, res.seq);
-}
-
 /*
  * bch2_journal_flush_async - if there is an open journal entry, or a journal
  * still being written, write it and wait for the write to complete
@@ -705,6 +683,64 @@ out:
        return ret;
 }
 
+int bch2_journal_meta(struct journal *j)
+{
+       struct journal_buf *buf;
+       struct journal_res res;
+       int ret;
+
+       memset(&res, 0, sizeof(res));
+
+       ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+       if (ret)
+               return ret;
+
+       buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
+       buf->must_flush = true;
+
+       if (!buf->flush_time) {
+               buf->flush_time = local_clock() ?: 1;
+               buf->expires = jiffies;
+       }
+
+       bch2_journal_res_put(j, &res);
+
+       return bch2_journal_flush_seq(j, res.seq);
+}
+
+int bch2_journal_log_msg(struct journal *j, const char *fmt, ...)
+{
+       struct jset_entry_log *entry;
+       struct journal_res res = { 0 };
+       unsigned msglen, u64s;
+       va_list args;
+       int ret;
+
+       va_start(args, fmt);
+       msglen = vsnprintf(NULL, 0, fmt, args) + 1;
+       va_end(args);
+
+       u64s = jset_u64s(DIV_ROUND_UP(msglen, sizeof(u64)));
+
+       ret = bch2_journal_res_get(j, &res, u64s, 0);
+       if (ret)
+               return ret;
+
+       entry = container_of(journal_res_entry(j, &res),
+                            struct jset_entry_log, entry);;
+       memset(entry, 0, u64s * sizeof(u64));
+       entry->entry.type = BCH_JSET_ENTRY_log;
+       entry->entry.u64s = u64s - 1;
+
+       va_start(args, fmt);
+       vsnprintf(entry->d, INT_MAX, fmt, args);
+       va_end(args);
+
+       bch2_journal_res_put(j, &res);
+
+       return bch2_journal_flush_seq(j, res.seq);
+}
+
 /* block/unlock the journal: */
 
 void bch2_journal_unblock(struct journal *j)
@@ -802,7 +838,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                 * superblock before inserting into the journal array
                 */
 
-               pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+               pos = ja->discard_idx ?: ja->nr;
                __array_insert_item(ja->buckets,                ja->nr, pos);
                __array_insert_item(ja->bucket_seq,             ja->nr, pos);
                __array_insert_item(journal_buckets->buckets,   ja->nr, pos);
index 1bb0e00df44c00d81da7334bf4e5925e808a4afd..989c33157cd2e83c0c0ac2fd8b3bbdb193244058 100644 (file)
@@ -480,6 +480,7 @@ int bch2_journal_flush_seq(struct journal *, u64);
 int bch2_journal_flush(struct journal *);
 bool bch2_journal_noflush_seq(struct journal *, u64);
 int bch2_journal_meta(struct journal *);
+int bch2_journal_log_msg(struct journal *, const char *, ...);
 
 void bch2_journal_halt(struct journal *);
 
index ec565edbbfc566d4f411c37b263363d8b982a5f2..a920a111dad754602985616e38e45a41b7bafd06 100644 (file)
@@ -670,6 +670,7 @@ static int bch2_journal_reclaim_thread(void *arg)
        struct journal *j = arg;
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        unsigned long delay, now;
+       bool journal_empty;
        int ret = 0;
 
        set_freezable();
@@ -696,10 +697,17 @@ static int bch2_journal_reclaim_thread(void *arg)
                                break;
                        if (j->reclaim_kicked)
                                break;
-                       if (time_after_eq(jiffies, j->next_reclaim))
-                               break;
-                       freezable_schedule_timeout(j->next_reclaim - jiffies);
 
+                       spin_lock(&j->lock);
+                       journal_empty = fifo_empty(&j->pin);
+                       spin_unlock(&j->lock);
+
+                       if (journal_empty)
+                               freezable_schedule();
+                       else if (time_after(j->next_reclaim, jiffies))
+                               freezable_schedule_timeout(j->next_reclaim - jiffies);
+                       else
+                               break;
                }
                __set_current_state(TASK_RUNNING);
        }
@@ -809,10 +817,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
        seq = 0;
 
        spin_lock(&j->lock);
-       while (!ret && seq < j->pin.back) {
+       while (!ret) {
                struct bch_replicas_padded replicas;
 
                seq = max(seq, journal_last_seq(j));
+               if (seq >= j->pin.back)
+                       break;
                bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
                                         journal_seq_pin(j, seq)->devs);
                seq++;
index 6fd458191e41c5c1ef0115c5257888db3428c301..071fcb4a842286a35cbac254dc1668ab5ebfeb51 100644 (file)
@@ -240,6 +240,10 @@ struct journal {
        spinlock_t              err_lock;
 
        struct mutex            reclaim_lock;
+       /*
+        * Used for waiting until journal reclaim has freed up space in the
+        * journal:
+        */
        wait_queue_head_t       reclaim_wait;
        struct task_struct      *reclaim_thread;
        bool                    reclaim_kicked;
index 6c4ffc5abdc5a0a6a46526756604918e74e764b7..887971559214eb681e345328260ec12b67356823 100644 (file)
@@ -578,6 +578,9 @@ static int bch2_journal_replay(struct bch_fs *c)
        bch2_journal_set_replay_done(j);
        bch2_journal_flush_all_pins(j);
        ret = bch2_journal_error(j);
+
+       if (keys->nr && !ret)
+               bch2_journal_log_msg(&c->journal, "journal replay finished");
 err:
        kvfree(keys_sorted);
        return ret;
index 3745873fd88d90947f610de256931cecec4d9181..4da4330014a82167fd48fdd6fc2c19b116581e65 100644 (file)
@@ -10,7 +10,9 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
 #define bch2_bkey_ops_reflink_p (struct bkey_ops) {            \
        .key_invalid    = bch2_reflink_p_invalid,               \
        .val_to_text    = bch2_reflink_p_to_text,               \
-       .key_merge      = bch2_reflink_p_merge,         \
+       .key_merge      = bch2_reflink_p_merge,                 \
+       .trans_trigger  = bch2_trans_mark_reflink_p,            \
+       .atomic_trigger = bch2_mark_reflink_p,                  \
 }
 
 const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
@@ -21,6 +23,8 @@ void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
        .key_invalid    = bch2_reflink_v_invalid,               \
        .val_to_text    = bch2_reflink_v_to_text,               \
        .swab           = bch2_ptr_swab,                        \
+       .trans_trigger  = bch2_trans_mark_extent,               \
+       .atomic_trigger = bch2_mark_extent,                     \
 }
 
 const char *bch2_indirect_inline_data_invalid(const struct bch_fs *,
index 57d636740d2f81ffa8a19adf9dd3d3f036746a65..591bbb9f8beb544de1369204ffc666018ef40740 100644 (file)
@@ -163,12 +163,10 @@ bch2_hash_lookup(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       for_each_btree_key_norestart(trans, *iter, desc.btree_id,
+       for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
                           SPOS(inum.inum, desc.hash_key(info, key), snapshot),
+                          POS(inum.inum, U64_MAX),
                           BTREE_ITER_SLOTS|flags, k, ret) {
-               if (iter->pos.inode != inum.inum)
-                       break;
-
                if (is_visible_key(desc, inum, k)) {
                        if (!desc.cmp_key(k, key))
                                return 0;
@@ -199,15 +197,12 @@ bch2_hash_hole(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       for_each_btree_key_norestart(trans, *iter, desc.btree_id,
+       for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id,
                           SPOS(inum.inum, desc.hash_key(info, key), snapshot),
-                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-               if (iter->pos.inode != inum.inum)
-                       break;
-
+                          POS(inum.inum, U64_MAX),
+                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret)
                if (!is_visible_key(desc, inum, k))
                        return 0;
-       }
        bch2_trans_iter_exit(trans, iter);
 
        return ret ?: -ENOSPC;
@@ -260,14 +255,12 @@ int bch2_hash_set(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       for_each_btree_key_norestart(trans, iter, desc.btree_id,
+       for_each_btree_key_upto_norestart(trans, iter, desc.btree_id,
                           SPOS(inum.inum,
                                desc.hash_bkey(info, bkey_i_to_s_c(insert)),
                                snapshot),
+                          POS(inum.inum, U64_MAX),
                           BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-               if (iter.pos.inode != inum.inum)
-                       break;
-
                if (is_visible_key(desc, inum, k)) {
                        if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert)))
                                goto found;
index 49e38859bff819b1d3c21e54504b46473b5fd4d3..3d6ece515a886f486ba533166269af28766e904d 100644 (file)
@@ -607,23 +607,32 @@ STORE(bch2_fs_opts_dir)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
        const struct bch_option *opt = container_of(attr, struct bch_option, attr);
-       int ret, id = opt - bch2_opt_table;
+       int ret = size, id = opt - bch2_opt_table;
        char *tmp;
        u64 v;
 
+       /*
+        * We don't need to take c->writes for correctness, but it eliminates an
+        * unsightly error message in the dmesg log when we're RO:
+        */
+       if (unlikely(!percpu_ref_tryget(&c->writes)))
+               return -EROFS;
+
        tmp = kstrdup(buf, GFP_KERNEL);
-       if (!tmp)
-               return -ENOMEM;
+       if (!tmp) {
+               ret = -ENOMEM;
+               goto err;
+       }
 
        ret = bch2_opt_parse(c, NULL, opt, strim(tmp), &v);
        kfree(tmp);
 
        if (ret < 0)
-               return ret;
+               goto err;
 
        ret = bch2_opt_check_may_set(c, id, v);
        if (ret < 0)
-               return ret;
+               goto err;
 
        bch2_opt_set_sb(c, opt, v);
        bch2_opt_set_by_id(&c->opts, id, v);
@@ -633,8 +642,9 @@ STORE(bch2_fs_opts_dir)
                bch2_rebalance_add_work(c, S64_MAX);
                rebalance_wakeup(c);
        }
-
-       return size;
+err:
+       percpu_ref_put(&c->writes);
+       return ret;
 }
 SYSFS_OPS(bch2_fs_opts_dir);
 
index 3addf400e177422457c51bee9cad0b029ae2d3fb..4369bfc55a94db49202b1f1591545bf0fce39b72 100644 (file)
@@ -15,15 +15,14 @@ static void delete_test_keys(struct bch_fs *c)
        int ret;
 
        ret = bch2_btree_delete_range(c, BTREE_ID_extents,
-                                     POS_MIN, SPOS_MAX,
-                                     BTREE_ITER_ALL_SNAPSHOTS,
+                                     SPOS(0, 0, U32_MAX), SPOS_MAX,
+                                     0,
                                      NULL);
        BUG_ON(ret);
 
        ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
-                                     POS_MIN, SPOS_MAX,
-                                     BTREE_ITER_ALL_SNAPSHOTS,
-                                     NULL);
+                                     SPOS(0, 0, U32_MAX), SPOS_MAX,
+                                     0, NULL);
        BUG_ON(ret);
 }
 
@@ -814,9 +813,8 @@ static int seq_delete(struct bch_fs *c, u64 nr)
        int ret;
 
        ret = bch2_btree_delete_range(c, BTREE_ID_xattrs,
-                                     POS_MIN, SPOS_MAX,
-                                     BTREE_ITER_ALL_SNAPSHOTS,
-                                     NULL);
+                                     SPOS(0, 0, U32_MAX), SPOS_MAX,
+                                     0, NULL);
        if (ret)
                bch_err(c, "error in seq_delete: %i", ret);
        return ret;
index 2c9e91023bb96a6c80bc4d81e43cce075b046549..d6d7f1bc16b8a927c3e3e269f2ad792796589ad8 100644 (file)
@@ -372,7 +372,7 @@ static inline void pr_time(struct printbuf *out, u64 _time)
 #ifdef __KERNEL__
 static inline void uuid_unparse_lower(u8 *uuid, char *out)
 {
-       sprintf(out, "%plU", uuid);
+       sprintf(out, "%pUb", uuid);
 }
 #else
 #include <uuid/uuid.h>
index c2e9520a0457959f6b00ccfa7982dd7693e5172f..1c680b16b924a3bf71ff8a612c1e20fb1187b57a 100644 (file)
@@ -311,13 +311,9 @@ retry:
        if (ret)
                goto err;
 
-       for_each_btree_key_norestart(&trans, iter, BTREE_ID_xattrs,
-                          SPOS(inum, offset, snapshot), 0, k, ret) {
-               BUG_ON(k.k->p.inode < inum);
-
-               if (k.k->p.inode > inum)
-                       break;
-
+       for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_xattrs,
+                          SPOS(inum, offset, snapshot),
+                          POS(inum, U64_MAX), 0, k, ret) {
                if (k.k->type != KEY_TYPE_xattr)
                        continue;
 
diff --git a/linux/zstd_compress_module.c b/linux/zstd_compress_module.c
new file mode 100644 (file)
index 0000000..35cc5cb
--- /dev/null
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#define ZSTD_FORWARD_IF_ERR(ret)            \
+       do {                                \
+               size_t const __ret = (ret); \
+               if (ZSTD_isError(__ret))    \
+                       return __ret;       \
+       } while (0)
+
+static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
+       unsigned long long pledged_src_size)
+{
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
+               cctx, ZSTD_reset_session_and_parameters));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
+               cctx, pledged_src_size));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_strategy, parameters->cParams.strategy));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
+       return 0;
+}
+
+int zstd_min_clevel(void)
+{
+       return ZSTD_minCLevel();
+}
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+       return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
+
+size_t zstd_compress_bound(size_t src_size)
+{
+       return ZSTD_compressBound(src_size);
+}
+EXPORT_SYMBOL(zstd_compress_bound);
+
+zstd_parameters zstd_get_params(int level,
+       unsigned long long estimated_src_size)
+{
+       return ZSTD_getParams(level, estimated_src_size, 0);
+}
+EXPORT_SYMBOL(zstd_get_params);
+
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
+{
+       return ZSTD_estimateCCtxSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cctx_workspace_bound);
+
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
+{
+       if (workspace == NULL)
+               return NULL;
+       return ZSTD_initStaticCCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cctx);
+
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+       const void *src, size_t src_size, const zstd_parameters *parameters)
+{
+       ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
+       return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_compress_cctx);
+
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
+{
+       return ZSTD_estimateCStreamSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cstream_workspace_bound);
+
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+       unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
+{
+       zstd_cstream *cstream;
+
+       if (workspace == NULL)
+               return NULL;
+
+       cstream = ZSTD_initStaticCStream(workspace, workspace_size);
+       if (cstream == NULL)
+               return NULL;
+
+       /* 0 means unknown in linux zstd API but means 0 in new zstd API */
+       if (pledged_src_size == 0)
+               pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
+
+       if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
+               return NULL;
+
+       return cstream;
+}
+EXPORT_SYMBOL(zstd_init_cstream);
+
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+       unsigned long long pledged_src_size)
+{
+       return ZSTD_resetCStream(cstream, pledged_src_size);
+}
+EXPORT_SYMBOL(zstd_reset_cstream);
+
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+       zstd_in_buffer *input)
+{
+       return ZSTD_compressStream(cstream, output, input);
+}
+EXPORT_SYMBOL(zstd_compress_stream);
+
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+       return ZSTD_flushStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_flush_stream);
+
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+       return ZSTD_endStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_end_stream);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/linux/zstd_decompress_module.c b/linux/zstd_decompress_module.c
new file mode 100644 (file)
index 0000000..7e8cd44
--- /dev/null
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+/* Common symbols. zstd_compress must depend on zstd_decompress. */
+
+unsigned int zstd_is_error(size_t code)
+{
+       return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+       return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+       return ZSTD_getErrorName(code);
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+/* Decompression symbols. */
+
+size_t zstd_dctx_workspace_bound(void)
+{
+       return ZSTD_estimateDCtxSize();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+       if (workspace == NULL)
+               return NULL;
+       return ZSTD_initStaticDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+       const void *src, size_t src_size)
+{
+       return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+       return ZSTD_estimateDStreamSize(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+       size_t workspace_size)
+{
+       if (workspace == NULL)
+               return NULL;
+       (void)max_window_size;
+       return ZSTD_initStaticDStream(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+       return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+       zstd_in_buffer *input)
+{
+       return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+       return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+       size_t src_size)
+{
+       return ZSTD_getFrameHeader(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");