From d34e731082d8fcd710c2af6377a3b7fa927c8451 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Mar 2022 19:14:01 -0400 Subject: [PATCH] Update bcachefs sources to e48731a188 bcachefs: Fix BTREE_TRIGGER_WANTS_OLD_AND_NEW --- .bcachefs_revision | 2 +- include/linux/zstd.h | 449 +++++++++++++++++++++++++++++++- include/linux/zstd_errors.h | 77 ++++++ libbcachefs/alloc_background.h | 3 + libbcachefs/bkey_methods.h | 27 ++ libbcachefs/btree_io.c | 3 +- libbcachefs/btree_iter.c | 156 +++++++---- libbcachefs/btree_iter.h | 32 ++- libbcachefs/btree_key_cache.c | 4 +- libbcachefs/btree_locking.h | 10 +- libbcachefs/btree_types.h | 5 + libbcachefs/btree_update_leaf.c | 64 +++-- libbcachefs/buckets.c | 168 +++++------- libbcachefs/buckets.h | 13 + libbcachefs/compress.c | 22 +- libbcachefs/dirent.c | 17 +- libbcachefs/ec.h | 2 + libbcachefs/error.h | 20 ++ libbcachefs/extents.h | 8 + libbcachefs/fs-io.c | 29 ++- libbcachefs/fs.c | 6 +- libbcachefs/inode.c | 4 +- libbcachefs/inode.h | 4 + libbcachefs/journal.c | 88 +++++-- libbcachefs/journal.h | 1 + libbcachefs/journal_reclaim.c | 18 +- libbcachefs/journal_types.h | 4 + libbcachefs/recovery.c | 3 + libbcachefs/reflink.h | 6 +- libbcachefs/str_hash.h | 21 +- libbcachefs/sysfs.c | 24 +- libbcachefs/tests.c | 14 +- libbcachefs/util.h | 2 +- libbcachefs/xattr.c | 10 +- linux/zstd_compress_module.c | 157 +++++++++++ linux/zstd_decompress_module.c | 103 ++++++++ 36 files changed, 1273 insertions(+), 303 deletions(-) create mode 100644 include/linux/zstd_errors.h create mode 100644 linux/zstd_compress_module.c create mode 100644 linux/zstd_decompress_module.c diff --git a/.bcachefs_revision b/.bcachefs_revision index 8e1032a..74f5970 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -e318fabeb424d4b8fdd46329125c30aaa4f9006a +e48731a188639563444d475622782b7963df4b47 diff --git a/include/linux/zstd.h b/include/linux/zstd.h index 0dd1b02..b0fa1ed 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -1,10 +1,447 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of https://github.com/facebook/zstd) and + * the GPLv2 (found in the COPYING file in the root directory of + * https://github.com/facebook/zstd). You may select, at your option, one of the + * above-listed licenses. + */ + +#ifndef LINUX_ZSTD_H +#define LINUX_ZSTD_H + +/** + * This is a kernel-style API that wraps the upstream zstd API, which cannot be + * used directly because the symbols aren't exported. It exposes the minimal + * functionality which is currently required by users of zstd in the kernel. + * Expose extra functions from lib/zstd/zstd.h as needed. + */ + +/* ====== Dependency ====== */ +#include #include +#include + +/* ====== Helper Functions ====== */ +/** + * zstd_compress_bound() - maximum compressed size in worst case scenario + * @src_size: The size of the data to compress. + * + * Return: The maximum compressed size in the worst case scenario. + */ +size_t zstd_compress_bound(size_t src_size); + +/** + * zstd_is_error() - tells if a size_t function result is an error code + * @code: The function result to check for error. + * + * Return: Non-zero iff the code is an error. + */ +unsigned int zstd_is_error(size_t code); + +/** + * enum zstd_error_code - zstd error codes + */ +typedef ZSTD_ErrorCode zstd_error_code; + +/** + * zstd_get_error_code() - translates an error function result to an error code + * @code: The function result for which zstd_is_error(code) is true. + * + * Return: A unique error code for this error. + */ +zstd_error_code zstd_get_error_code(size_t code); + +/** + * zstd_get_error_name() - translates an error function result to a string + * @code: The function result for which zstd_is_error(code) is true. + * + * Return: An error string corresponding to the error code. + */ +const char *zstd_get_error_name(size_t code); + +/** + * zstd_min_clevel() - minimum allowed compression level + * + * Return: The minimum allowed compression level. + */ +int zstd_min_clevel(void); + +/** + * zstd_max_clevel() - maximum allowed compression level + * + * Return: The maximum allowed compression level. + */ +int zstd_max_clevel(void); + +/* ====== Parameter Selection ====== */ + +/** + * enum zstd_strategy - zstd compression search strategy + * + * From faster to stronger. See zstd_lib.h. + */ +typedef ZSTD_strategy zstd_strategy; + +/** + * struct zstd_compression_parameters - zstd compression parameters + * @windowLog: Log of the largest match distance. Larger means more + * compression, and more memory needed during decompression. + * @chainLog: Fully searched segment. Larger means more compression, + * slower, and more memory (useless for fast). + * @hashLog: Dispatch table. Larger means more compression, + * slower, and more memory. + * @searchLog: Number of searches. Larger means more compression and slower. + * @searchLength: Match length searched. Larger means faster decompression, + * sometimes less compression. + * @targetLength: Acceptable match size for optimal parser (only). Larger means + * more compression, and slower. + * @strategy: The zstd compression strategy. + * + * See zstd_lib.h. + */ +typedef ZSTD_compressionParameters zstd_compression_parameters; + +/** + * struct zstd_frame_parameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the + * frame header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the + * end of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame + * header when using dictionary compression. + * + * The default value is all fields set to 0. See zstd_lib.h. + */ +typedef ZSTD_frameParameters zstd_frame_parameters; + +/** + * struct zstd_parameters - zstd parameters + * @cParams: The compression parameters. + * @fParams: The frame parameters. + */ +typedef ZSTD_parameters zstd_parameters; + +/** + * zstd_get_params() - returns zstd_parameters for selected level + * @level: The compression level + * @estimated_src_size: The estimated source size to compress or 0 + * if unknown. + * + * Return: The selected zstd_parameters. + */ +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size); + +/* ====== Single-pass Compression ====== */ + +typedef ZSTD_CCtx zstd_cctx; + +/** + * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx + * @parameters: The compression parameters to be used. + * + * If multiple compression parameters might be used, the caller must call + * zstd_cctx_workspace_bound() for each set of parameters and use the maximum + * size. + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cctx(). + */ +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); + +/** + * zstd_init_cctx() - initialize a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to + * determine how large the workspace must be. + * + * Return: A zstd compression context or NULL on error. + */ +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size); + +/** + * zstd_compress_cctx() - compress src into dst with the initialized parameters + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @dst: The buffer to compress src into. + * @dst_capacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @src_size: The size of the data to compress. + * @parameters: The compression parameters to be used. + * + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters); + +/* ====== Single-pass Decompression ====== */ + +typedef ZSTD_DCtx zstd_dctx; + +/** + * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_dctx(). + */ +size_t zstd_dctx_workspace_bound(void); + +/** + * zstd_init_dctx() - initialize a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to + * determine how large the workspace must be. + * + * Return: A zstd decompression context or NULL on error. + */ +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size); + +/** + * zstd_decompress_dctx() - decompress zstd compressed src into dst + * @dctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dst_capacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @src_size: The exact size of the data to decompress. + * + * Return: The decompressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size); + +/* ====== Streaming Buffers ====== */ + +/** + * struct zstd_in_buffer - input buffer for streaming + * @src: Start of the input buffer. + * @size: Size of the input buffer. + * @pos: Position where reading stopped. Will be updated. + * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. + */ +typedef ZSTD_inBuffer zstd_in_buffer; + +/** + * struct zstd_out_buffer - output buffer for streaming + * @dst: Start of the output buffer. + * @size: Size of the output buffer. + * @pos: Position where writing stopped. Will be updated. + * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. + */ +typedef ZSTD_outBuffer zstd_out_buffer; + +/* ====== Streaming Compression ====== */ + +typedef ZSTD_CStream zstd_cstream; + +/** + * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream + * @cparams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cstream(). + */ +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); + +/** + * zstd_init_cstream() - initialize a zstd streaming compression context + * @parameters The zstd parameters to use for compression. + * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller + * must pass the source size (zero means empty source). + * Otherwise, the caller may optionally pass the source + * size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. + * Use zstd_cstream_workspace_bound(params->cparams) to + * determine how large the workspace must be. + * + * Return: The zstd streaming compression context or NULL on error. + */ +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size); + +/** + * zstd_reset_cstream() - reset the context using parameters from creation + * @cstream: The zstd streaming compression context to reset. + * @pledged_src_size: Optionally the source size, or zero if unknown. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. If `pledged_src_size` is non-zero the frame + * content size is always written into the frame header. + * + * Return: Zero or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size); + +/** + * zstd_compress_stream() - streaming compress some of input into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data + * was read. Note that it may not consume the entire input, in which + * case `input->pos < input->size`, and it's up to the caller to + * present remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input); + +/** + * zstd_flush_stream() - flush internal buffers into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * zstd_flush_stream() must be called until it returns 0, meaning all the data + * has been flushed. Since zstd_flush_stream() causes a block to be ended, + * calling it too often will degrade the compression ratio. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). + */ +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output); + +/** + * zstd_end_stream() - flush internal buffers into output and end the frame + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * zstd_end_stream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). + */ +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output); + +/* ====== Streaming Decompression ====== */ + +typedef ZSTD_DStream zstd_dstream; + +/** + * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream + * @max_window_size: The maximum window size allowed for compressed frames. + * + * Return: A lower bound on the size of the workspace that is passed + * to zstd_init_dstream(). + */ +size_t zstd_dstream_workspace_bound(size_t max_window_size); + +/** + * zstd_init_dstream() - initialize a zstd streaming decompression context + * @max_window_size: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use zstd_dstream_workspace_bound(max_window_size) to + * determine how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size); + +/** + * zstd_reset_dstream() - reset the context using parameters from creation + * @dstream: The zstd streaming decompression context to reset. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. + * + * Return: Zero or an error, which can be checked using zstd_is_error(). + */ +size_t zstd_reset_dstream(zstd_dstream *dstream); + +/** + * zstd_decompress_stream() - streaming decompress some of input into output + * @dstream: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * zstd_decompress_stream() will not consume the last byte of the frame until + * the entire frame is flushed. + * + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the + * input for the next function call or an error, which can be checked + * using zstd_is_error(). The size hint will never load more than the + * frame. + */ +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input); + +/* ====== Frame Inspection Functions ====== */ + +/** + * zstd_find_frame_compressed_size() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded + * frame or a skippable frame. + * @src_size: The size of the source buffer. It must be at least as large as the + * size of the frame. + * + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with zstd_is_error(). + * Suitable to pass to ZSTD_decompress() or similar functions. + */ +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); -#define ZSTD_initDCtx(w, s) ZSTD_initStaticDCtx(w, s) -#define ZSTD_initCCtx(w, s) ZSTD_initStaticCCtx(w, s) +/** + * struct zstd_frame_params - zstd frame parameters stored in the frame header + * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not + * present. + * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @blockSizeMax: The maximum block size. + * @frameType: The frame type (zstd or skippable) + * @headerSize: The size of the frame header. + * @dictID: The dictionary id, or 0 if not present. + * @checksumFlag: Whether a checksum was used. + * + * See zstd_lib.h. + */ +typedef ZSTD_frameHeader zstd_frame_header; -#define ZSTD_compressCCtx(w, dst, d_len, src, src_len, params) \ - ZSTD_compressCCtx(w, dst, d_len, src, src_len, 0) +/** + * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame + * @params: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @src_size: The size of the source buffer. + * + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using zstd_is_error(). + */ +size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, + size_t src_size); -#define ZSTD_CCtxWorkspaceBound(p) ZSTD_estimateCCtxSize(0) -#define ZSTD_DCtxWorkspaceBound() ZSTD_estimateDCtxSize() +#endif /* LINUX_ZSTD_H */ diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h new file mode 100644 index 0000000..58b6dd4 --- /dev/null +++ b/include/linux/zstd_errors.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#define ZSTDERRORLIB_VISIBILITY +#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + + +#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 98c7866..3eaa6d2 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -65,16 +65,19 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc (struct bkey_ops) { \ .key_invalid = bch2_alloc_v1_invalid, \ .val_to_text = bch2_alloc_to_text, \ + .atomic_trigger = bch2_mark_alloc, \ } #define bch2_bkey_ops_alloc_v2 (struct bkey_ops) { \ .key_invalid = bch2_alloc_v2_invalid, \ .val_to_text = bch2_alloc_to_text, \ + .atomic_trigger = bch2_mark_alloc, \ } #define bch2_bkey_ops_alloc_v3 (struct bkey_ops) { \ .key_invalid = bch2_alloc_v3_invalid, \ .val_to_text = bch2_alloc_to_text, \ + .atomic_trigger = bch2_mark_alloc, \ } static inline bool bkey_is_alloc(const struct bkey *k) diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h index 4fdac54..2289a09 100644 --- a/libbcachefs/bkey_methods.h +++ b/libbcachefs/bkey_methods.h @@ -6,6 +6,7 @@ struct bch_fs; struct btree; +struct btree_trans; struct bkey; enum btree_node_type; @@ -20,6 +21,10 @@ struct bkey_ops { void (*swab)(struct bkey_s); bool (*key_normalize)(struct bch_fs *, struct bkey_s); bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c); + int (*trans_trigger)(struct btree_trans *, struct bkey_s_c, + struct bkey_i *, unsigned); + int (*atomic_trigger)(struct btree_trans *, struct bkey_s_c, + struct bkey_s_c, unsigned); void (*compat)(enum btree_id id, unsigned version, unsigned big_endian, int write, struct bkey_s); @@ -57,6 +62,28 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); +static inline int bch2_mark_key(struct btree_trans *trans, + struct bkey_s_c old, + struct bkey_s_c new, + unsigned flags) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new.k->type]; + + return ops->atomic_trigger + ? ops->atomic_trigger(trans, old, new, flags) + : 0; +} + +static inline int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, + struct bkey_i *new, unsigned flags) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[old.k->type ?: new->k.type]; + + return ops->trans_trigger + ? ops->trans_trigger(trans, old, new, flags) + : 0; +} + void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); void __bch2_bkey_compat(unsigned, enum btree_id, unsigned, unsigned, diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 4f0ad06..e6cea4c 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -1626,6 +1626,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b) if (new & (1U << BTREE_NODE_write_in_flight)) __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED); + else + wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } static void btree_node_write_done(struct bch_fs *c, struct btree *b) @@ -2094,7 +2096,6 @@ restart: rcu_read_unlock(); wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE); goto restart; - } rcu_read_unlock(); } diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 8186ee7..7fd0379 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -189,7 +189,7 @@ bool __bch2_btree_node_relock(struct btree_trans *trans, if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) || (btree_node_lock_seq_matches(path, b, level) && btree_node_lock_increment(trans, b, level, want))) { - mark_btree_node_locked(path, level, want); + mark_btree_node_locked(trans, path, level, want); return true; } fail: @@ -240,7 +240,7 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans, return false; success: - mark_btree_node_intent_locked(path, level); + mark_btree_node_intent_locked(trans, path, level); return true; } @@ -486,14 +486,15 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans, * before interior nodes - now that's handled by * bch2_btree_path_traverse_all(). */ - trans_for_each_path(trans, linked) - if (linked != path && - linked->cached == path->cached && - linked->btree_id == path->btree_id && - linked->locks_want < new_locks_want) { - linked->locks_want = new_locks_want; - btree_path_get_locks(trans, linked, true); - } + if (!path->cached && !trans->in_traverse_all) + trans_for_each_path(trans, linked) + if (linked != path && + linked->cached == path->cached && + linked->btree_id == path->btree_id && + linked->locks_want < new_locks_want) { + linked->locks_want = new_locks_want; + btree_path_get_locks(trans, linked, true); + } return false; } @@ -1167,7 +1168,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b) t != BTREE_NODE_UNLOCKED) { btree_node_unlock(path, b->c.level); six_lock_increment(&b->c.lock, t); - mark_btree_node_locked(path, b->c.level, t); + mark_btree_node_locked(trans, path, b->c.level, t); } btree_path_level_init(trans, path, b); @@ -1244,7 +1245,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans, for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++) path->l[i].b = NULL; - mark_btree_node_locked(path, path->level, lock_type); + mark_btree_node_locked(trans, path, path->level, lock_type); btree_path_level_init(trans, path, b); return 0; } @@ -1409,7 +1410,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans, if (unlikely(ret)) goto err; - mark_btree_node_locked(path, level, lock_type); + mark_btree_node_locked(trans, path, level, lock_type); btree_path_level_init(trans, path, b); if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) && @@ -1442,6 +1443,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans) trans->in_traverse_all = true; retry_all: trans->restarted = false; + trans->traverse_all_idx = U8_MAX; trans_for_each_path(trans, path) path->should_be_locked = false; @@ -1474,9 +1476,9 @@ retry_all: } /* Now, redo traversals in correct order: */ - i = 0; - while (i < trans->nr_sorted) { - path = trans->paths + trans->sorted[i]; + trans->traverse_all_idx = 0; + while (trans->traverse_all_idx < trans->nr_sorted) { + path = trans->paths + trans->sorted[trans->traverse_all_idx]; /* * Traversing a path can cause another path to be added at about @@ -1484,10 +1486,13 @@ retry_all: */ if (path->uptodate) { ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_); - if (ret) + if (ret == -EINTR || ret == -ENOMEM) goto retry_all; + if (ret) + goto err; + BUG_ON(path->uptodate); } else { - i++; + trans->traverse_all_idx++; } } @@ -1498,7 +1503,7 @@ retry_all: */ trans_for_each_path(trans, path) BUG_ON(path->uptodate >= BTREE_ITER_NEED_TRAVERSE); - +err: bch2_btree_cache_cannibalize_unlock(c); trans->in_traverse_all = false; @@ -1807,18 +1812,48 @@ free: __bch2_path_free(trans, path); } +void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) +{ + struct btree_insert_entry *i; + + pr_buf(buf, "transaction updates for %s journal seq %llu\n", + trans->fn, trans->journal_res.seq); + + trans_for_each_update(trans, i) { + struct bkey_s_c old = { &i->old_k, i->old_v }; + + pr_buf(buf, "update: btree %s %pS\n old ", + bch2_btree_ids[i->btree_id], + (void *) i->ip_allocated); + + bch2_bkey_val_to_text(buf, trans->c, old); + pr_buf(buf, "\n new "); + bch2_bkey_val_to_text(buf, trans->c, bkey_i_to_s_c(i->k)); + pr_buf(buf, "\n"); + } +} + +noinline __cold +void bch2_dump_trans_updates(struct btree_trans *trans) +{ + struct printbuf buf = PRINTBUF; + + bch2_trans_updates_to_text(&buf, trans); + bch_err(trans->c, "%s", buf.buf); + printbuf_exit(&buf); +} + noinline __cold void bch2_dump_trans_paths_updates(struct btree_trans *trans) { struct btree_path *path; - struct btree_insert_entry *i; - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; + struct printbuf buf = PRINTBUF; unsigned idx; trans_for_each_path_inorder(trans, path, idx) { - printbuf_reset(&buf1); + printbuf_reset(&buf); - bch2_bpos_to_text(&buf1, path->pos); + bch2_bpos_to_text(&buf, path->pos); printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n", path->idx, path->ref, path->intent_ref, @@ -1826,7 +1861,7 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) path->preserve ? " P" : "", bch2_btree_ids[path->btree_id], path->level, - buf1.buf, + buf.buf, path->nodes_locked, #ifdef CONFIG_BCACHEFS_DEBUG (void *) path->ip_allocated @@ -1836,23 +1871,9 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans) ); } - trans_for_each_update(trans, i) { - struct bkey u; - struct bkey_s_c old = bch2_btree_path_peek_slot(i->path, &u); - - printbuf_reset(&buf1); - printbuf_reset(&buf2); - bch2_bkey_val_to_text(&buf1, trans->c, old); - bch2_bkey_val_to_text(&buf2, trans->c, bkey_i_to_s_c(i->k)); + printbuf_exit(&buf); - printk(KERN_ERR "update: btree %s %pS\n old %s\n new %s", - bch2_btree_ids[i->btree_id], - (void *) i->ip_allocated, - buf1.buf, buf2.buf); - } - - printbuf_exit(&buf2); - printbuf_exit(&buf1); + bch2_dump_trans_updates(trans); } static struct btree_path *btree_path_alloc(struct btree_trans *trans, @@ -2337,11 +2358,12 @@ out: * bch2_btree_iter_peek: returns first key greater than or equal to iterator's * current position */ -struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) +struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos end) { struct btree_trans *trans = iter->trans; struct bpos search_key = btree_iter_search_key(iter); struct bkey_s_c k; + struct bpos iter_pos; int ret; if (iter->update_path) { @@ -2357,6 +2379,24 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) if (!k.k || bkey_err(k)) goto out; + /* + * iter->pos should be mononotically increasing, and always be + * equal to the key we just returned - except extents can + * straddle iter->pos: + */ + if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) + iter_pos = k.k->p; + else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) + iter_pos = bkey_start_pos(k.k); + else + iter_pos = iter->pos; + + if (bkey_cmp(iter_pos, end) > 0) { + bch2_btree_iter_set_pos(iter, end); + k = bkey_s_c_null; + goto out; + } + if (iter->update_path && bkey_cmp(iter->update_path->pos, k.k->p)) { bch2_path_put(trans, iter->update_path, @@ -2387,10 +2427,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) iter->update_path = bch2_btree_path_set_pos(trans, iter->update_path, pos, iter->flags & BTREE_ITER_INTENT, - btree_iter_ip_allocated(iter)); - - BUG_ON(!(iter->update_path->nodes_locked & 1)); - iter->update_path->should_be_locked = true; + _THIS_IP_); } /* @@ -2414,14 +2451,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) break; } - /* - * iter->pos should be mononotically increasing, and always be equal to - * the key we just returned - except extents can straddle iter->pos: - */ - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) - iter->pos = k.k->p; - else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) - iter->pos = bkey_start_pos(k.k); + iter->pos = iter_pos; iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p, iter->flags & BTREE_ITER_INTENT, @@ -2429,8 +2459,13 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) BUG_ON(!iter->path->nodes_locked); out: if (iter->update_path) { - BUG_ON(!(iter->update_path->nodes_locked & 1)); - iter->update_path->should_be_locked = true; + if (iter->update_path->uptodate && + !bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)) { + k = bkey_s_c_err(-EINTR); + } else { + BUG_ON(!(iter->update_path->nodes_locked & 1)); + iter->update_path->should_be_locked = true; + } } iter->path->should_be_locked = true; @@ -2661,9 +2696,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) if (iter->flags & BTREE_ITER_INTENT) { struct btree_iter iter2; + struct bpos end = iter->pos; + + if (iter->flags & BTREE_ITER_IS_EXTENTS) + end.offset = U64_MAX; bch2_trans_copy_iter(&iter2, iter); - k = bch2_btree_iter_peek(&iter2); + k = bch2_btree_iter_peek_upto(&iter2, end); if (k.k && !bkey_err(k)) { iter->k = iter2.k; @@ -2831,6 +2870,11 @@ static inline void btree_path_list_add(struct btree_trans *trans, path->sorted_idx = pos ? pos->sorted_idx + 1 : 0; + if (trans->in_traverse_all && + trans->traverse_all_idx != U8_MAX && + trans->traverse_all_idx >= path->sorted_idx) + trans->traverse_all_idx++; + array_insert_item(trans->sorted, trans->nr_sorted, path->sorted_idx, path->idx); for (i = path->sorted_idx; i < trans->nr_sorted; i++) diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index d612aec..f670029 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -209,9 +209,14 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *); struct btree *bch2_btree_iter_peek_node(struct btree_iter *); struct btree *bch2_btree_iter_next_node(struct btree_iter *); -struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); +static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) +{ + return bch2_btree_iter_peek_upto(iter, SPOS_MAX); +} + struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *); struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *); @@ -306,13 +311,26 @@ static inline int bkey_err(struct bkey_s_c k) } static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, - unsigned flags) + unsigned flags) { return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : bch2_btree_iter_peek(iter); } +static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter, + struct bpos end, + unsigned flags) +{ + if (!(flags & BTREE_ITER_SLOTS)) + return bch2_btree_iter_peek_upto(iter, end); + + if (bkey_cmp(iter->pos, end) > 0) + return bkey_s_c_null; + + return bch2_btree_iter_peek_slot(iter); +} + static inline int btree_trans_too_many_iters(struct btree_trans *trans) { return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2 @@ -349,6 +367,14 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, !((_ret) = bkey_err(_k)) && (_k).k; \ bch2_btree_iter_advance(&(_iter))) +#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, \ + _start, _end, _flags, _k, _ret) \ + for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \ + (_start), (_flags)); \ + (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\ + !((_ret) = bkey_err(_k)) && (_k).k; \ + bch2_btree_iter_advance(&(_iter))) + #define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret) \ for (; \ (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\ @@ -363,6 +389,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, /* new multiple iterator interface: */ +void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); +void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); void __bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t, const char *); diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index ee89b65..b1b7a30 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -309,7 +309,7 @@ retry: if (!ck) goto retry; - mark_btree_node_locked(path, 0, SIX_LOCK_intent); + mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent); path->locks_want = 1; } else { enum six_lock_type lock_want = __btree_lock_want(path, 0); @@ -330,7 +330,7 @@ retry: goto retry; } - mark_btree_node_locked(path, 0, lock_want); + mark_btree_node_locked(trans, path, 0, lock_want); } path->l[0].lock_seq = ck->c.lock.state.seq; diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index b4434ec..67c970d 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -58,7 +58,8 @@ static inline void mark_btree_node_unlocked(struct btree_path *path, path->nodes_intent_locked &= ~(1 << level); } -static inline void mark_btree_node_locked(struct btree_path *path, +static inline void mark_btree_node_locked(struct btree_trans *trans, + struct btree_path *path, unsigned level, enum six_lock_type type) { @@ -66,14 +67,17 @@ static inline void mark_btree_node_locked(struct btree_path *path, BUILD_BUG_ON(SIX_LOCK_read != 0); BUILD_BUG_ON(SIX_LOCK_intent != 1); + BUG_ON(trans->in_traverse_all && path->sorted_idx > trans->traverse_all_idx); + path->nodes_locked |= 1 << level; path->nodes_intent_locked |= type << level; } -static inline void mark_btree_node_intent_locked(struct btree_path *path, +static inline void mark_btree_node_intent_locked(struct btree_trans *trans, + struct btree_path *path, unsigned level) { - mark_btree_node_locked(path, level, SIX_LOCK_intent); + mark_btree_node_locked(trans, path, level, SIX_LOCK_intent); } static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level) diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index c18e388..575635b 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -361,7 +361,11 @@ struct btree_insert_entry { unsigned long ip_allocated; }; +#ifndef CONFIG_LOCKDEP #define BTREE_ITER_MAX 64 +#else +#define BTREE_ITER_MAX 32 +#endif struct btree_trans_commit_hook; typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *); @@ -388,6 +392,7 @@ struct btree_trans { u8 nr_sorted; u8 nr_updates; + u8 traverse_all_idx; bool used_mempool:1; bool in_traverse_all:1; bool restarted:1; diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index ee978f3..47623f3 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -213,7 +213,7 @@ inline void bch2_btree_add_journal_pin(struct bch_fs *c, /** * btree_insert_key - insert a key one key into a leaf node */ -static bool btree_insert_key_leaf(struct btree_trans *trans, +static void btree_insert_key_leaf(struct btree_trans *trans, struct btree_insert_entry *insert) { struct bch_fs *c = trans->c; @@ -226,7 +226,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b, &insert_l(insert)->iter, insert->k))) - return false; + return; i->journal_seq = cpu_to_le64(max(trans->journal_res.seq, le64_to_cpu(i->journal_seq))); @@ -247,8 +247,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, if (u64s_added > live_u64s_added && bch2_maybe_compact_whiteouts(c, b)) bch2_trans_node_reinit_iter(trans, b); - - return true; } /* Cached btree updates: */ @@ -400,18 +398,16 @@ static inline void do_btree_insert_one(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct journal *j = &c->journal; - bool did_work; EBUG_ON(trans->journal_res.ref != !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)); i->k->k.needs_whiteout = false; - did_work = !i->cached - ? btree_insert_key_leaf(trans, i) - : bch2_btree_insert_key_cached(trans, i->path, i->k); - if (!did_work) - return; + if (!i->cached) + btree_insert_key_leaf(trans, i); + else + bch2_btree_insert_key_cached(trans, i->path, i->k); if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) { bch2_journal_add_keys(j, &trans->journal_res, @@ -440,7 +436,8 @@ static int run_one_mem_trigger(struct btree_trans *trans, if (!btree_node_type_needs_gc(i->btree_id)) return 0; - if (old.k->type == new->k.type && + if (bch2_bkey_ops[old.k->type].atomic_trigger == + bch2_bkey_ops[i->k->k.type].atomic_trigger && ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { ret = bch2_mark_key(trans, old, bkey_i_to_s_c(new), BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags); @@ -485,7 +482,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ if (overwrite) { ret = bch2_trans_mark_old(trans, old, i->flags); - } else if (old.k->type == i->k->k.type && + } else if (bch2_bkey_ops[old.k->type].trans_trigger == + bch2_bkey_ops[i->k->k.type].trans_trigger && ((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) { i->overwrite_trigger_run = true; ret = bch2_trans_mark_key(trans, old, i->k, @@ -652,6 +650,32 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, if (btree_node_type_needs_gc(i->bkey_type)) marking = true; + + /* + * Revalidate before calling mem triggers - XXX, ugly: + * + * - successful btree node splits don't cause transaction + * restarts and will have invalidated the pointer to the bkey + * value + * - btree_node_lock_for_insert() -> btree_node_prep_for_write() + * when it has to resort + * - btree_key_can_insert_cached() when it has to reallocate + * + * Ugly because we currently have no way to tell if the + * pointer's been invalidated, which means it's debatabale + * whether we should be stashing the old key at all. + */ + i->old_v = bch2_btree_path_peek_slot(i->path, &i->old_k).v; + + if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))) { + struct bkey_i *j_k = + bch2_journal_keys_peek(c, i->btree_id, i->level, i->k->k.p); + + if (j_k && !bpos_cmp(j_k->k.p, i->k->k.p)) { + i->old_k = j_k->k; + i->old_v = &j_k->v; + } + } } /* @@ -1217,7 +1241,7 @@ int bch2_trans_update_extent(struct btree_trans *trans, BTREE_ITER_INTENT| BTREE_ITER_WITH_UPDATES| BTREE_ITER_NOT_EXTENTS); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -1369,7 +1393,8 @@ nomerge1: goto out; } next: - k = bch2_btree_iter_next(&iter); + bch2_btree_iter_advance(&iter); + k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); if ((ret = bkey_err(k))) goto err; if (!k.k) @@ -1629,14 +1654,14 @@ int bch2_btree_delete_at(struct btree_trans *trans, int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, struct bpos start, struct bpos end, - unsigned iter_flags, + unsigned update_flags, u64 *journal_seq) { struct btree_iter iter; struct bkey_s_c k; int ret = 0; - bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT|iter_flags); + bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); retry: while ((bch2_trans_begin(trans), (k = bch2_btree_iter_peek(&iter)).k) && @@ -1679,7 +1704,8 @@ retry: ret = bch2_trans_update(trans, &iter, &delete, 0) ?: bch2_trans_commit(trans, &disk_res, journal_seq, - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + update_flags); bch2_disk_reservation_put(trans->c, &disk_res); if (ret) break; @@ -1701,10 +1727,10 @@ retry: */ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, struct bpos start, struct bpos end, - unsigned iter_flags, + unsigned update_flags, u64 *journal_seq) { return bch2_trans_do(c, NULL, journal_seq, 0, bch2_btree_delete_range_trans(&trans, id, start, end, - iter_flags, journal_seq)); + update_flags, journal_seq)); } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 2c3b71b..d522637 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -497,20 +497,25 @@ void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, BUG_ON(owned_by_allocator == old.owned_by_allocator); } -static int bch2_mark_alloc(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +int bch2_mark_alloc(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; struct bkey_alloc_unpacked old_u = bch2_alloc_unpack(old); struct bkey_alloc_unpacked new_u = bch2_alloc_unpack(new); - struct bch_dev *ca; + struct bch_dev *ca = bch_dev_bkey_exists(c, new_u.dev); struct bucket *g; struct bucket_mark old_m, m; int ret = 0; + if (bch2_trans_inconsistent_on(new_u.bucket < ca->mi.first_bucket || + new_u.bucket >= ca->mi.nbuckets, trans, + "alloc key outside range of device's buckets")) + return -EIO; + /* * alloc btree is read in by bch2_alloc_read, not gc: */ @@ -550,11 +555,6 @@ static int bch2_mark_alloc(struct btree_trans *trans, } } - ca = bch_dev_bkey_exists(c, new_u.dev); - - if (new_u.bucket >= ca->mi.nbuckets) - return 0; - percpu_down_read(&c->mark_lock); if (!gc && new_u.gen != old_u.gen) *bucket_gen(ca, new_u.bucket) = new_u.gen; @@ -929,9 +929,9 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans, return 0; } -static int bch2_mark_extent(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +int bch2_mark_extent(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { u64 journal_seq = trans->journal_res.seq; struct bch_fs *c = trans->c; @@ -1011,9 +1011,9 @@ static int bch2_mark_extent(struct btree_trans *trans, return 0; } -static int bch2_mark_stripe(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +int bch2_mark_stripe(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { bool gc = flags & BTREE_TRIGGER_GC; u64 journal_seq = trans->journal_res.seq; @@ -1118,9 +1118,9 @@ static int bch2_mark_stripe(struct btree_trans *trans, return 0; } -static int bch2_mark_inode(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +int bch2_mark_inode(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { struct bch_fs *c = trans->c; struct bch_fs_usage __percpu *fs_usage; @@ -1149,9 +1149,9 @@ static int bch2_mark_inode(struct btree_trans *trans, return 0; } -static int bch2_mark_reservation(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +int bch2_mark_reservation(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { struct bch_fs *c = trans->c; struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; @@ -1228,9 +1228,9 @@ fsck_err: return ret; } -static int bch2_mark_reflink_p(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_s_c new, - unsigned flags) +int bch2_mark_reflink_p(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_s_c new, + unsigned flags) { struct bch_fs *c = trans->c; struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; @@ -1267,39 +1267,6 @@ static int bch2_mark_reflink_p(struct btree_trans *trans, return ret; } -int bch2_mark_key(struct btree_trans *trans, - struct bkey_s_c old, - struct bkey_s_c new, - unsigned flags) -{ - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new; - - switch (k.k->type) { - case KEY_TYPE_alloc: - case KEY_TYPE_alloc_v2: - case KEY_TYPE_alloc_v3: - return bch2_mark_alloc(trans, old, new, flags); - case KEY_TYPE_btree_ptr: - case KEY_TYPE_btree_ptr_v2: - case KEY_TYPE_extent: - case KEY_TYPE_reflink_v: - return bch2_mark_extent(trans, old, new, flags); - case KEY_TYPE_stripe: - return bch2_mark_stripe(trans, old, new, flags); - case KEY_TYPE_inode: - case KEY_TYPE_inode_v2: - return bch2_mark_inode(trans, old, new, flags); - case KEY_TYPE_reservation: - return bch2_mark_reservation(trans, old, new, flags); - case KEY_TYPE_reflink_p: - return bch2_mark_reflink_p(trans, old, new, flags); - case KEY_TYPE_snapshot: - return bch2_mark_snapshot(trans, old, new, flags); - default: - return 0; - } -} - static noinline __cold void fs_usage_apply_warn(struct btree_trans *trans, unsigned disk_res_sectors, @@ -1462,7 +1429,6 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type) { - struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; struct bkey_i_stripe *s; @@ -1478,16 +1444,15 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans, goto err; if (k.k->type != KEY_TYPE_stripe) { - bch2_fs_inconsistent(c, + bch2_trans_inconsistent(trans, "pointer to nonexistent stripe %llu", (u64) p.ec.idx); - bch2_inconsistent_error(c); ret = -EIO; goto err; } if (!bch2_ptr_matches_stripe(bkey_s_c_to_stripe(k).v, p)) { - bch2_fs_inconsistent(c, + bch2_trans_inconsistent(trans, "stripe pointer doesn't match stripe %llu", (u64) p.ec.idx); ret = -EIO; @@ -1516,10 +1481,14 @@ err: return ret; } -static int bch2_trans_mark_extent(struct btree_trans *trans, - struct bkey_s_c k, unsigned flags) +int bch2_trans_mark_extent(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_i *new, + unsigned flags) { struct bch_fs *c = trans->c; + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE + ? old + : bkey_i_to_s_c(new); struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -1601,8 +1570,8 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, goto err; if (!deleting) { - if (bch2_fs_inconsistent_on(u.stripe || - u.stripe_redundancy, c, + if (bch2_trans_inconsistent_on(u.stripe || + u.stripe_redundancy, trans, "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", iter.pos.inode, iter.pos.offset, u.gen, bch2_data_types[u.data_type], @@ -1612,7 +1581,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, goto err; } - if (bch2_fs_inconsistent_on(data_type && u.dirty_sectors, c, + if (bch2_trans_inconsistent_on(data_type && u.dirty_sectors, trans, "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", iter.pos.inode, iter.pos.offset, u.gen, bch2_data_types[u.data_type], @@ -1625,8 +1594,8 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, u.stripe = s.k->p.offset; u.stripe_redundancy = s.v->nr_redundant; } else { - if (bch2_fs_inconsistent_on(u.stripe != s.k->p.offset || - u.stripe_redundancy != s.v->nr_redundant, c, + if (bch2_trans_inconsistent_on(u.stripe != s.k->p.offset || + u.stripe_redundancy != s.v->nr_redundant, trans, "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", iter.pos.inode, iter.pos.offset, u.gen, s.k->p.offset, u.stripe)) { @@ -1650,9 +1619,9 @@ err: return ret; } -static int bch2_trans_mark_stripe(struct btree_trans *trans, - struct bkey_s_c old, struct bkey_i *new, - unsigned flags) +int bch2_trans_mark_stripe(struct btree_trans *trans, + struct bkey_s_c old, struct bkey_i *new, + unsigned flags) { const struct bch_stripe *old_s = NULL; struct bch_stripe *new_s = NULL; @@ -1720,10 +1689,10 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans, return ret; } -static int bch2_trans_mark_inode(struct btree_trans *trans, - struct bkey_s_c old, - struct bkey_i *new, - unsigned flags) +int bch2_trans_mark_inode(struct btree_trans *trans, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) { int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k); @@ -1736,9 +1705,14 @@ static int bch2_trans_mark_inode(struct btree_trans *trans, return 0; } -static int bch2_trans_mark_reservation(struct btree_trans *trans, - struct bkey_s_c k, unsigned flags) +int bch2_trans_mark_reservation(struct btree_trans *trans, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) { + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE + ? old + : bkey_i_to_s_c(new); unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; s64 sectors = (s64) k.k->size; struct replicas_delta_list *d; @@ -1787,7 +1761,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, refcount = bkey_refcount(n); if (!refcount) { bch2_bkey_val_to_text(&buf, c, p.s_c); - bch2_fs_inconsistent(c, + bch2_trans_inconsistent(trans, "nonexistent indirect extent at %llu while marking\n %s", *idx, buf.buf); ret = -EIO; @@ -1796,7 +1770,7 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) { bch2_bkey_val_to_text(&buf, c, p.s_c); - bch2_fs_inconsistent(c, + bch2_trans_inconsistent(trans, "indirect extent refcount underflow at %llu while marking\n %s", *idx, buf.buf); ret = -EIO; @@ -1837,9 +1811,14 @@ err: return ret; } -static int bch2_trans_mark_reflink_p(struct btree_trans *trans, - struct bkey_s_c k, unsigned flags) +int bch2_trans_mark_reflink_p(struct btree_trans *trans, + struct bkey_s_c old, + struct bkey_i *new, + unsigned flags) { + struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE + ? old + : bkey_i_to_s_c(new); struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); u64 idx, end_idx; int ret = 0; @@ -1860,33 +1839,6 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans, return ret; } -int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c old, - struct bkey_i *new, unsigned flags) -{ - struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE - ? old - : bkey_i_to_s_c(new); - - switch (k.k->type) { - case KEY_TYPE_btree_ptr: - case KEY_TYPE_btree_ptr_v2: - case KEY_TYPE_extent: - case KEY_TYPE_reflink_v: - return bch2_trans_mark_extent(trans, k, flags); - case KEY_TYPE_stripe: - return bch2_trans_mark_stripe(trans, old, new, flags); - case KEY_TYPE_inode: - case KEY_TYPE_inode_v2: - return bch2_trans_mark_inode(trans, old, new, flags); - case KEY_TYPE_reservation: - return bch2_trans_mark_reservation(trans, k, flags); - case KEY_TYPE_reflink_p: - return bch2_trans_mark_reflink_p(trans, k, flags); - default: - return 0; - } -} - static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, size_t b, enum bch_data_type type, diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index daf79a4..392e03d 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -229,6 +229,19 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, size_t, enum bch_data_type, unsigned, struct gc_pos, unsigned); +int bch2_mark_alloc(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); +int bch2_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); + +int bch2_trans_mark_extent(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trans_mark_stripe(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trans_mark_inode(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trans_mark_reservation(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); +int bch2_trans_mark_reflink_p(struct btree_trans *, struct bkey_s_c, struct bkey_i *, unsigned); + int bch2_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c, unsigned); int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index 8e4179d..7d9ebcc 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -197,9 +197,11 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src, goto err; workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO); - ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound()); + ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound()); - ret = ZSTD_decompressDCtx(ctx, + src_len = le32_to_cpup(src_data.b); + + ret = zstd_decompress_dctx(ctx, dst_data, dst_len, src_data.b + 4, real_src_len); @@ -333,8 +335,8 @@ static int attempt_compress(struct bch_fs *c, return strm.total_out; } case BCH_COMPRESSION_TYPE_zstd: { - ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace, - ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams)); + ZSTD_CCtx *ctx = zstd_init_cctx(workspace, + zstd_cctx_workspace_bound(&c->zstd_params.cParams)); /* * ZSTD requires that when we decompress we pass in the exact @@ -347,11 +349,11 @@ static int attempt_compress(struct bch_fs *c, * factor (7 bytes) from the dst buffer size to account for * that. */ - size_t len = ZSTD_compressCCtx(ctx, + size_t len = zstd_compress_cctx(ctx, dst + 4, dst_len - 4 - 7, src, src_len, - c->zstd_params); - if (ZSTD_isError(len)) + &c->zstd_params); + if (zstd_is_error(len)) return 0; *((__le32 *) dst) = cpu_to_le32(len); @@ -546,7 +548,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) { size_t decompress_workspace_size = 0; bool decompress_workspace_needed; - ZSTD_parameters params = ZSTD_getParams(0, c->opts.encoded_extent_max, 0); + ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max); struct { unsigned feature; unsigned type; @@ -558,8 +560,8 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), zlib_inflate_workspacesize(), }, { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd, - ZSTD_CCtxWorkspaceBound(params.cParams), - ZSTD_DCtxWorkspaceBound() }, + zstd_cctx_workspace_bound(¶ms.cParams), + zstd_dctx_workspace_bound() }, }, *i; int ret = 0; diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c index a43a244..760e4f7 100644 --- a/libbcachefs/dirent.c +++ b/libbcachefs/dirent.c @@ -470,16 +470,13 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir) if (ret) return ret; - for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents, - SPOS(dir.inum, 0, snapshot), 0, k, ret) { - if (k.k->p.inode > dir.inum) - break; - + for_each_btree_key_upto_norestart(trans, iter, BTREE_ID_dirents, + SPOS(dir.inum, 0, snapshot), + POS(dir.inum, U64_MAX), 0, k, ret) if (k.k->type == KEY_TYPE_dirent) { ret = -ENOTEMPTY; break; } - } bch2_trans_iter_exit(trans, &iter); return ret; @@ -503,11 +500,9 @@ retry: if (ret) goto err; - for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents, - SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) { - if (k.k->p.inode > inum.inum) - break; - + for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_dirents, + SPOS(inum.inum, ctx->pos, snapshot), + POS(inum.inum, U64_MAX), 0, k, ret) { if (k.k->type != KEY_TYPE_dirent) continue; diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h index 78d468c..9d508a2 100644 --- a/libbcachefs/ec.h +++ b/libbcachefs/ec.h @@ -14,6 +14,8 @@ void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, .key_invalid = bch2_stripe_invalid, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ + .trans_trigger = bch2_trans_mark_stripe, \ + .atomic_trigger = bch2_mark_stripe, \ } static inline unsigned stripe_csums_per_device(const struct bch_stripe *s) diff --git a/libbcachefs/error.h b/libbcachefs/error.h index 4ab3cfe..6e63c38 100644 --- a/libbcachefs/error.h +++ b/libbcachefs/error.h @@ -66,6 +66,26 @@ do { \ _ret; \ }) +/* + * When a transaction update discovers or is causing a fs inconsistency, it's + * helpful to also dump the pending updates: + */ +#define bch2_trans_inconsistent(trans, ...) \ +({ \ + bch_err(trans->c, __VA_ARGS__); \ + bch2_inconsistent_error(trans->c); \ + bch2_dump_trans_updates(trans); \ +}) + +#define bch2_trans_inconsistent_on(cond, trans, ...) \ +({ \ + bool _ret = unlikely(!!(cond)); \ + \ + if (_ret) \ + bch2_trans_inconsistent(trans, __VA_ARGS__); \ + _ret; \ +}) + /* * Fsck errors: inconsistency errors we detect at mount time, and should ideally * be able to repair: diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 9c25672..ae65084 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -381,6 +381,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, .key_invalid = bch2_btree_ptr_invalid, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ + .trans_trigger = bch2_trans_mark_extent, \ + .atomic_trigger = bch2_mark_extent, \ } #define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) { \ @@ -388,6 +390,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ + .trans_trigger = bch2_trans_mark_extent, \ + .atomic_trigger = bch2_mark_extent, \ } /* KEY_TYPE_extent: */ @@ -402,6 +406,8 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ .key_merge = bch2_extent_merge, \ + .trans_trigger = bch2_trans_mark_extent, \ + .atomic_trigger = bch2_mark_extent, \ } /* KEY_TYPE_reservation: */ @@ -414,6 +420,8 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); .key_invalid = bch2_reservation_invalid, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ + .trans_trigger = bch2_trans_mark_reservation, \ + .atomic_trigger = bch2_mark_reservation, \ } /* Extent checksum entries: */ diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c index 1d0871f..b05d6e8 100644 --- a/libbcachefs/fs-io.c +++ b/libbcachefs/fs-io.c @@ -35,6 +35,15 @@ #include #include +static inline bool bio_full(struct bio *bio, unsigned len) +{ + if (bio->bi_vcnt >= bio->bi_max_vecs) + return true; + if (bio->bi_iter.bi_size > UINT_MAX - len) + return true; + return false; +} + static inline struct address_space *faults_disabled_mapping(void) { return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); @@ -1808,11 +1817,11 @@ again: * to check that the address is actually valid, when atomic * usercopies are used, below. */ - if (unlikely(iov_iter_fault_in_readable(iter, bytes))) { + if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { bytes = min_t(unsigned long, iov_iter_count(iter), PAGE_SIZE - offset); - if (unlikely(iov_iter_fault_in_readable(iter, bytes))) { + if (unlikely(fault_in_iov_iter_readable(iter, bytes))) { ret = -EFAULT; break; } @@ -1870,7 +1879,7 @@ static void bch2_dio_read_complete(struct closure *cl) { struct dio_read *dio = container_of(cl, struct dio_read, cl); - dio->req->ki_complete(dio->req, dio->ret, 0); + dio->req->ki_complete(dio->req, dio->ret); bio_check_or_release(&dio->rbio.bio, dio->should_dirty); } @@ -1919,7 +1928,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) iter->count -= shorten; bio = bio_alloc_bioset(GFP_KERNEL, - iov_iter_npages(iter, BIO_MAX_VECS), + bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), &c->dio_read_bioset); bio->bi_end_io = bch2_direct_IO_read_endio; @@ -1954,7 +1963,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) goto start; while (iter->count) { bio = bio_alloc_bioset(GFP_KERNEL, - iov_iter_npages(iter, BIO_MAX_VECS), + bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), &c->bio_read); bio->bi_end_io = bch2_direct_IO_read_split_endio; start: @@ -2101,7 +2110,7 @@ static long bch2_dio_write_loop(struct dio_write *dio) while (1) { iter_count = dio->iter.count; - if (kthread) + if (kthread && dio->mm) kthread_use_mm(dio->mm); BUG_ON(current->faults_disabled_mapping); current->faults_disabled_mapping = mapping; @@ -2111,7 +2120,7 @@ static long bch2_dio_write_loop(struct dio_write *dio) dropped_locks = fdm_dropped_locks(); current->faults_disabled_mapping = NULL; - if (kthread) + if (kthread && dio->mm) kthread_unuse_mm(dio->mm); /* @@ -2244,7 +2253,7 @@ err: inode_dio_end(&inode->v); if (!sync) { - req->ki_complete(req, ret, 0); + req->ki_complete(req, ret); ret = -EIOCBQUEUED; } return ret; @@ -2304,9 +2313,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) } bio = bio_alloc_bioset(GFP_KERNEL, - iov_iter_is_bvec(iter) - ? 0 - : iov_iter_npages(iter, BIO_MAX_VECS), + bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), &c->dio_write_bioset); dio = container_of(bio, struct dio_write, op.wbio.bio); init_completion(&dio->done); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index 30720c1..9fc6c39 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -934,9 +935,8 @@ retry: SPOS(ei->v.i_ino, start, snapshot), 0); while (!(ret = btree_trans_too_many_iters(&trans)) && - (k = bch2_btree_iter_peek(&iter)).k && - !(ret = bkey_err(k)) && - bkey_cmp(iter.pos, end) < 0) { + (k = bch2_btree_iter_peek_upto(&iter, end)).k && + !(ret = bkey_err(k))) { enum btree_id data_btree = BTREE_ID_extents; if (!bkey_extent_is_data(k.k) && diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 78e2db6..14b0b59 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -606,12 +606,12 @@ static int bch2_inode_delete_keys(struct btree_trans *trans, bch2_btree_iter_set_snapshot(&iter, snapshot); - k = bch2_btree_iter_peek(&iter); + k = bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX)); ret = bkey_err(k); if (ret) goto err; - if (!k.k || iter.pos.inode != inum.inum) + if (!k.k) break; bkey_init(&delete.k); diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h index 77957cc..2337ecf 100644 --- a/libbcachefs/inode.h +++ b/libbcachefs/inode.h @@ -13,11 +13,15 @@ void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode (struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ .val_to_text = bch2_inode_to_text, \ + .trans_trigger = bch2_trans_mark_inode, \ + .atomic_trigger = bch2_mark_inode, \ } #define bch2_bkey_ops_inode_v2 (struct bkey_ops) { \ .key_invalid = bch2_inode_v2_invalid, \ .val_to_text = bch2_inode_to_text, \ + .trans_trigger = bch2_trans_mark_inode, \ + .atomic_trigger = bch2_mark_inode, \ } static inline bool bkey_is_inode(const struct bkey *k) diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index ded4b68..eb556ec 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -241,6 +241,9 @@ static int journal_entry_open(struct journal *j) if (u64s <= 0) return cur_entry_journal_full; + if (fifo_empty(&j->pin) && j->reclaim_thread) + wake_up_process(j->reclaim_thread); + /* * The fifo_push() needs to happen at the same time as j->seq is * incremented for journal_last_seq() to be calculated correctly @@ -628,31 +631,6 @@ int bch2_journal_flush_seq(struct journal *j, u64 seq) return ret ?: ret2 < 0 ? ret2 : 0; } -int bch2_journal_meta(struct journal *j) -{ - struct journal_buf *buf; - struct journal_res res; - int ret; - - memset(&res, 0, sizeof(res)); - - ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); - if (ret) - return ret; - - buf = j->buf + (res.seq & JOURNAL_BUF_MASK); - buf->must_flush = true; - - if (!buf->flush_time) { - buf->flush_time = local_clock() ?: 1; - buf->expires = jiffies; - } - - bch2_journal_res_put(j, &res); - - return bch2_journal_flush_seq(j, res.seq); -} - /* * bch2_journal_flush_async - if there is an open journal entry, or a journal * still being written, write it and wait for the write to complete @@ -705,6 +683,64 @@ out: return ret; } +int bch2_journal_meta(struct journal *j) +{ + struct journal_buf *buf; + struct journal_res res; + int ret; + + memset(&res, 0, sizeof(res)); + + ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); + if (ret) + return ret; + + buf = j->buf + (res.seq & JOURNAL_BUF_MASK); + buf->must_flush = true; + + if (!buf->flush_time) { + buf->flush_time = local_clock() ?: 1; + buf->expires = jiffies; + } + + bch2_journal_res_put(j, &res); + + return bch2_journal_flush_seq(j, res.seq); +} + +int bch2_journal_log_msg(struct journal *j, const char *fmt, ...) +{ + struct jset_entry_log *entry; + struct journal_res res = { 0 }; + unsigned msglen, u64s; + va_list args; + int ret; + + va_start(args, fmt); + msglen = vsnprintf(NULL, 0, fmt, args) + 1; + va_end(args); + + u64s = jset_u64s(DIV_ROUND_UP(msglen, sizeof(u64))); + + ret = bch2_journal_res_get(j, &res, u64s, 0); + if (ret) + return ret; + + entry = container_of(journal_res_entry(j, &res), + struct jset_entry_log, entry);; + memset(entry, 0, u64s * sizeof(u64)); + entry->entry.type = BCH_JSET_ENTRY_log; + entry->entry.u64s = u64s - 1; + + va_start(args, fmt); + vsnprintf(entry->d, INT_MAX, fmt, args); + va_end(args); + + bch2_journal_res_put(j, &res); + + return bch2_journal_flush_seq(j, res.seq); +} + /* block/unlock the journal: */ void bch2_journal_unblock(struct journal *j) @@ -802,7 +838,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, * superblock before inserting into the journal array */ - pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0; + pos = ja->discard_idx ?: ja->nr; __array_insert_item(ja->buckets, ja->nr, pos); __array_insert_item(ja->bucket_seq, ja->nr, pos); __array_insert_item(journal_buckets->buckets, ja->nr, pos); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 1bb0e00..989c331 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -480,6 +480,7 @@ int bch2_journal_flush_seq(struct journal *, u64); int bch2_journal_flush(struct journal *); bool bch2_journal_noflush_seq(struct journal *, u64); int bch2_journal_meta(struct journal *); +int bch2_journal_log_msg(struct journal *, const char *, ...); void bch2_journal_halt(struct journal *); diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index ec565ed..a920a11 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -670,6 +670,7 @@ static int bch2_journal_reclaim_thread(void *arg) struct journal *j = arg; struct bch_fs *c = container_of(j, struct bch_fs, journal); unsigned long delay, now; + bool journal_empty; int ret = 0; set_freezable(); @@ -696,10 +697,17 @@ static int bch2_journal_reclaim_thread(void *arg) break; if (j->reclaim_kicked) break; - if (time_after_eq(jiffies, j->next_reclaim)) - break; - freezable_schedule_timeout(j->next_reclaim - jiffies); + spin_lock(&j->lock); + journal_empty = fifo_empty(&j->pin); + spin_unlock(&j->lock); + + if (journal_empty) + freezable_schedule(); + else if (time_after(j->next_reclaim, jiffies)) + freezable_schedule_timeout(j->next_reclaim - jiffies); + else + break; } __set_current_state(TASK_RUNNING); } @@ -809,10 +817,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) seq = 0; spin_lock(&j->lock); - while (!ret && seq < j->pin.back) { + while (!ret) { struct bch_replicas_padded replicas; seq = max(seq, journal_last_seq(j)); + if (seq >= j->pin.back) + break; bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, journal_seq_pin(j, seq)->devs); seq++; diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index 6fd4581..071fcb4 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -240,6 +240,10 @@ struct journal { spinlock_t err_lock; struct mutex reclaim_lock; + /* + * Used for waiting until journal reclaim has freed up space in the + * journal: + */ wait_queue_head_t reclaim_wait; struct task_struct *reclaim_thread; bool reclaim_kicked; diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 6c4ffc5..8879715 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -578,6 +578,9 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_journal_set_replay_done(j); bch2_journal_flush_all_pins(j); ret = bch2_journal_error(j); + + if (keys->nr && !ret) + bch2_journal_log_msg(&c->journal, "journal replay finished"); err: kvfree(keys_sorted); return ret; diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h index 3745873..4da4330 100644 --- a/libbcachefs/reflink.h +++ b/libbcachefs/reflink.h @@ -10,7 +10,9 @@ bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_reflink_p (struct bkey_ops) { \ .key_invalid = bch2_reflink_p_invalid, \ .val_to_text = bch2_reflink_p_to_text, \ - .key_merge = bch2_reflink_p_merge, \ + .key_merge = bch2_reflink_p_merge, \ + .trans_trigger = bch2_trans_mark_reflink_p, \ + .atomic_trigger = bch2_mark_reflink_p, \ } const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c); @@ -21,6 +23,8 @@ void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, .key_invalid = bch2_reflink_v_invalid, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ + .trans_trigger = bch2_trans_mark_extent, \ + .atomic_trigger = bch2_mark_extent, \ } const char *bch2_indirect_inline_data_invalid(const struct bch_fs *, diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h index 57d6367..591bbb9 100644 --- a/libbcachefs/str_hash.h +++ b/libbcachefs/str_hash.h @@ -163,12 +163,10 @@ bch2_hash_lookup(struct btree_trans *trans, if (ret) return ret; - for_each_btree_key_norestart(trans, *iter, desc.btree_id, + for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, SPOS(inum.inum, desc.hash_key(info, key), snapshot), + POS(inum.inum, U64_MAX), BTREE_ITER_SLOTS|flags, k, ret) { - if (iter->pos.inode != inum.inum) - break; - if (is_visible_key(desc, inum, k)) { if (!desc.cmp_key(k, key)) return 0; @@ -199,15 +197,12 @@ bch2_hash_hole(struct btree_trans *trans, if (ret) return ret; - for_each_btree_key_norestart(trans, *iter, desc.btree_id, + for_each_btree_key_upto_norestart(trans, *iter, desc.btree_id, SPOS(inum.inum, desc.hash_key(info, key), snapshot), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter->pos.inode != inum.inum) - break; - + POS(inum.inum, U64_MAX), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) if (!is_visible_key(desc, inum, k)) return 0; - } bch2_trans_iter_exit(trans, iter); return ret ?: -ENOSPC; @@ -260,14 +255,12 @@ int bch2_hash_set(struct btree_trans *trans, if (ret) return ret; - for_each_btree_key_norestart(trans, iter, desc.btree_id, + for_each_btree_key_upto_norestart(trans, iter, desc.btree_id, SPOS(inum.inum, desc.hash_bkey(info, bkey_i_to_s_c(insert)), snapshot), + POS(inum.inum, U64_MAX), BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { - if (iter.pos.inode != inum.inum) - break; - if (is_visible_key(desc, inum, k)) { if (!desc.cmp_bkey(k, bkey_i_to_s_c(insert))) goto found; diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 49e3885..3d6ece5 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -607,23 +607,32 @@ STORE(bch2_fs_opts_dir) { struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir); const struct bch_option *opt = container_of(attr, struct bch_option, attr); - int ret, id = opt - bch2_opt_table; + int ret = size, id = opt - bch2_opt_table; char *tmp; u64 v; + /* + * We don't need to take c->writes for correctness, but it eliminates an + * unsightly error message in the dmesg log when we're RO: + */ + if (unlikely(!percpu_ref_tryget(&c->writes))) + return -EROFS; + tmp = kstrdup(buf, GFP_KERNEL); - if (!tmp) - return -ENOMEM; + if (!tmp) { + ret = -ENOMEM; + goto err; + } ret = bch2_opt_parse(c, NULL, opt, strim(tmp), &v); kfree(tmp); if (ret < 0) - return ret; + goto err; ret = bch2_opt_check_may_set(c, id, v); if (ret < 0) - return ret; + goto err; bch2_opt_set_sb(c, opt, v); bch2_opt_set_by_id(&c->opts, id, v); @@ -633,8 +642,9 @@ STORE(bch2_fs_opts_dir) bch2_rebalance_add_work(c, S64_MAX); rebalance_wakeup(c); } - - return size; +err: + percpu_ref_put(&c->writes); + return ret; } SYSFS_OPS(bch2_fs_opts_dir); diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c index 3addf40..4369bfc 100644 --- a/libbcachefs/tests.c +++ b/libbcachefs/tests.c @@ -15,15 +15,14 @@ static void delete_test_keys(struct bch_fs *c) int ret; ret = bch2_btree_delete_range(c, BTREE_ID_extents, - POS_MIN, SPOS_MAX, - BTREE_ITER_ALL_SNAPSHOTS, + SPOS(0, 0, U32_MAX), SPOS_MAX, + 0, NULL); BUG_ON(ret); ret = bch2_btree_delete_range(c, BTREE_ID_xattrs, - POS_MIN, SPOS_MAX, - BTREE_ITER_ALL_SNAPSHOTS, - NULL); + SPOS(0, 0, U32_MAX), SPOS_MAX, + 0, NULL); BUG_ON(ret); } @@ -814,9 +813,8 @@ static int seq_delete(struct bch_fs *c, u64 nr) int ret; ret = bch2_btree_delete_range(c, BTREE_ID_xattrs, - POS_MIN, SPOS_MAX, - BTREE_ITER_ALL_SNAPSHOTS, - NULL); + SPOS(0, 0, U32_MAX), SPOS_MAX, + 0, NULL); if (ret) bch_err(c, "error in seq_delete: %i", ret); return ret; diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 2c9e910..d6d7f1b 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -372,7 +372,7 @@ static inline void pr_time(struct printbuf *out, u64 _time) #ifdef __KERNEL__ static inline void uuid_unparse_lower(u8 *uuid, char *out) { - sprintf(out, "%plU", uuid); + sprintf(out, "%pUb", uuid); } #else #include diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c index c2e9520..1c680b1 100644 --- a/libbcachefs/xattr.c +++ b/libbcachefs/xattr.c @@ -311,13 +311,9 @@ retry: if (ret) goto err; - for_each_btree_key_norestart(&trans, iter, BTREE_ID_xattrs, - SPOS(inum, offset, snapshot), 0, k, ret) { - BUG_ON(k.k->p.inode < inum); - - if (k.k->p.inode > inum) - break; - + for_each_btree_key_upto_norestart(&trans, iter, BTREE_ID_xattrs, + SPOS(inum, offset, snapshot), + POS(inum, U64_MAX), 0, k, ret) { if (k.k->type != KEY_TYPE_xattr) continue; diff --git a/linux/zstd_compress_module.c b/linux/zstd_compress_module.c new file mode 100644 index 0000000..35cc5cb --- /dev/null +++ b/linux/zstd_compress_module.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +#define ZSTD_FORWARD_IF_ERR(ret) \ + do { \ + size_t const __ret = (ret); \ + if (ZSTD_isError(__ret)) \ + return __ret; \ + } while (0) + +static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters, + unsigned long long pledged_src_size) +{ + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset( + cctx, ZSTD_reset_session_and_parameters)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize( + cctx, pledged_src_size)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_windowLog, parameters->cParams.windowLog)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_hashLog, parameters->cParams.hashLog)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_chainLog, parameters->cParams.chainLog)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_searchLog, parameters->cParams.searchLog)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_minMatch, parameters->cParams.minMatch)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_targetLength, parameters->cParams.targetLength)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_strategy, parameters->cParams.strategy)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag)); + return 0; +} + +int zstd_min_clevel(void) +{ + return ZSTD_minCLevel(); +} +EXPORT_SYMBOL(zstd_min_clevel); + +int zstd_max_clevel(void) +{ + return ZSTD_maxCLevel(); +} +EXPORT_SYMBOL(zstd_max_clevel); + +size_t zstd_compress_bound(size_t src_size) +{ + return ZSTD_compressBound(src_size); +} +EXPORT_SYMBOL(zstd_compress_bound); + +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size) +{ + return ZSTD_getParams(level, estimated_src_size, 0); +} +EXPORT_SYMBOL(zstd_get_params); + +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams) +{ + return ZSTD_estimateCCtxSize_usingCParams(*cparams); +} +EXPORT_SYMBOL(zstd_cctx_workspace_bound); + +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + return ZSTD_initStaticCCtx(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_cctx); + +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters) +{ + ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size)); + return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size); +} +EXPORT_SYMBOL(zstd_compress_cctx); + +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams) +{ + return ZSTD_estimateCStreamSize_usingCParams(*cparams); +} +EXPORT_SYMBOL(zstd_cstream_workspace_bound); + +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size) +{ + zstd_cstream *cstream; + + if (workspace == NULL) + return NULL; + + cstream = ZSTD_initStaticCStream(workspace, workspace_size); + if (cstream == NULL) + return NULL; + + /* 0 means unknown in linux zstd API but means 0 in new zstd API */ + if (pledged_src_size == 0) + pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN; + + if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size))) + return NULL; + + return cstream; +} +EXPORT_SYMBOL(zstd_init_cstream); + +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size) +{ + return ZSTD_resetCStream(cstream, pledged_src_size); +} +EXPORT_SYMBOL(zstd_reset_cstream); + +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input) +{ + return ZSTD_compressStream(cstream, output, input); +} +EXPORT_SYMBOL(zstd_compress_stream); + +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output) +{ + return ZSTD_flushStream(cstream, output); +} +EXPORT_SYMBOL(zstd_flush_stream); + +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output) +{ + return ZSTD_endStream(cstream, output); +} +EXPORT_SYMBOL(zstd_end_stream); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Compressor"); diff --git a/linux/zstd_decompress_module.c b/linux/zstd_decompress_module.c new file mode 100644 index 0000000..7e8cd44 --- /dev/null +++ b/linux/zstd_decompress_module.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +/* Common symbols. zstd_compress must depend on zstd_decompress. */ + +unsigned int zstd_is_error(size_t code) +{ + return ZSTD_isError(code); +} +EXPORT_SYMBOL(zstd_is_error); + +zstd_error_code zstd_get_error_code(size_t code) +{ + return ZSTD_getErrorCode(code); +} +EXPORT_SYMBOL(zstd_get_error_code); + +const char *zstd_get_error_name(size_t code) +{ + return ZSTD_getErrorName(code); +} +EXPORT_SYMBOL(zstd_get_error_name); + +/* Decompression symbols. */ + +size_t zstd_dctx_workspace_bound(void) +{ + return ZSTD_estimateDCtxSize(); +} +EXPORT_SYMBOL(zstd_dctx_workspace_bound); + +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + return ZSTD_initStaticDCtx(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_dctx); + +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size) +{ + return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size); +} +EXPORT_SYMBOL(zstd_decompress_dctx); + +size_t zstd_dstream_workspace_bound(size_t max_window_size) +{ + return ZSTD_estimateDStreamSize(max_window_size); +} +EXPORT_SYMBOL(zstd_dstream_workspace_bound); + +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + (void)max_window_size; + return ZSTD_initStaticDStream(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_dstream); + +size_t zstd_reset_dstream(zstd_dstream *dstream) +{ + return ZSTD_resetDStream(dstream); +} +EXPORT_SYMBOL(zstd_reset_dstream); + +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input) +{ + return ZSTD_decompressStream(dstream, output, input); +} +EXPORT_SYMBOL(zstd_decompress_stream); + +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size) +{ + return ZSTD_findFrameCompressedSize(src, src_size); +} +EXPORT_SYMBOL(zstd_find_frame_compressed_size); + +size_t zstd_get_frame_header(zstd_frame_header *header, const void *src, + size_t src_size) +{ + return ZSTD_getFrameHeader(header, src, src_size); +} +EXPORT_SYMBOL(zstd_get_frame_header); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Decompressor"); -- 2.39.2