1 // SPDX-License-Identifier: GPL-2.0
10 #include <linux/sched/mm.h>
11 #include <linux/zlib.h>
12 #include <linux/zstd.h>
26 static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
30 BUG_ON(size > c->sb.encoded_extent_max << 9);
32 b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
34 return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
36 b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
38 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
43 static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
46 struct bvec_iter iter;
47 void *expected_start = NULL;
49 __bio_for_each_bvec(bv, bio, iter, start) {
51 expected_start != page_address(bv.bv_page) + bv.bv_offset)
54 expected_start = page_address(bv.bv_page) +
55 bv.bv_offset + bv.bv_len;
61 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
62 struct bvec_iter start, int rw)
66 struct bvec_iter iter;
67 unsigned nr_pages = 0, flags;
68 struct page *stack_pages[16];
69 struct page **pages = NULL;
72 BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
74 if (!IS_ENABLED(CONFIG_HIGHMEM) &&
75 bio_phys_contig(bio, start))
76 return (struct bbuf) {
77 .b = page_address(bio_iter_page(bio, start)) +
78 bio_iter_offset(bio, start),
79 .type = BB_NONE, .rw = rw
82 /* check if we can map the pages contiguously: */
83 __bio_for_each_segment(bv, bio, iter, start) {
84 if (iter.bi_size != start.bi_size &&
88 if (bv.bv_len < iter.bi_size &&
89 bv.bv_offset + bv.bv_len < PAGE_SIZE)
95 BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
97 pages = nr_pages > ARRAY_SIZE(stack_pages)
98 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
104 __bio_for_each_segment(bv, bio, iter, start)
105 pages[nr_pages++] = bv.bv_page;
107 flags = memalloc_nofs_save();
108 data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
109 memalloc_nofs_restore(flags);
111 if (pages != stack_pages)
115 return (struct bbuf) {
116 .b = data + bio_iter_offset(bio, start),
117 .type = BB_VMAP, .rw = rw
120 ret = __bounce_alloc(c, start.bi_size, rw);
123 memcpy_from_bio(ret.b, bio, start);
128 static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
130 return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
133 static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
139 vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
145 mempool_free(buf.b, &c->compression_bounce[buf.rw]);
150 static inline void zlib_set_workspace(z_stream *strm, void *workspace)
153 strm->workspace = workspace;
157 static int __bio_uncompress(struct bch_fs *c, struct bio *src,
158 void *dst_data, struct bch_extent_crc_unpacked crc)
160 struct bbuf src_data = { NULL };
161 size_t src_len = src->bi_iter.bi_size;
162 size_t dst_len = crc.uncompressed_size << 9;
166 src_data = bio_map_or_bounce(c, src, READ);
168 switch (crc.compression_type) {
169 case BCH_COMPRESSION_TYPE_lz4_old:
170 case BCH_COMPRESSION_TYPE_lz4:
171 ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
172 src_len, dst_len, dst_len);
176 case BCH_COMPRESSION_TYPE_gzip: {
178 .next_in = src_data.b,
180 .next_out = dst_data,
181 .avail_out = dst_len,
184 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
186 zlib_set_workspace(&strm, workspace);
187 zlib_inflateInit2(&strm, -MAX_WBITS);
188 ret = zlib_inflate(&strm, Z_FINISH);
190 mempool_free(workspace, &c->decompress_workspace);
192 if (ret != Z_STREAM_END)
196 case BCH_COMPRESSION_TYPE_zstd: {
198 size_t real_src_len = le32_to_cpup(src_data.b);
200 if (real_src_len > src_len - 4)
203 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
204 ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
206 ret = ZSTD_decompressDCtx(ctx,
208 src_data.b + 4, real_src_len);
210 mempool_free(workspace, &c->decompress_workspace);
221 bio_unmap_or_unbounce(c, src_data);
228 int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
229 struct bch_extent_crc_unpacked *crc)
231 struct bbuf data = { NULL };
232 size_t dst_len = crc->uncompressed_size << 9;
234 /* bio must own its pages: */
235 BUG_ON(!bio->bi_vcnt);
236 BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
238 if (crc->uncompressed_size > c->sb.encoded_extent_max ||
239 crc->compressed_size > c->sb.encoded_extent_max) {
240 bch_err(c, "error rewriting existing data: extent too big");
244 data = __bounce_alloc(c, dst_len, WRITE);
246 if (__bio_uncompress(c, bio, data.b, *crc)) {
247 bch_err(c, "error rewriting existing data: decompression error");
248 bio_unmap_or_unbounce(c, data);
253 * XXX: don't have a good way to assert that the bio was allocated with
254 * enough space, we depend on bch2_move_extent doing the right thing
256 bio->bi_iter.bi_size = crc->live_size << 9;
258 memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
261 crc->compression_type = 0;
262 crc->compressed_size = crc->live_size;
263 crc->uncompressed_size = crc->live_size;
265 crc->csum = (struct bch_csum) { 0, 0 };
267 bio_unmap_or_unbounce(c, data);
271 int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
272 struct bio *dst, struct bvec_iter dst_iter,
273 struct bch_extent_crc_unpacked crc)
275 struct bbuf dst_data = { NULL };
276 size_t dst_len = crc.uncompressed_size << 9;
279 if (crc.uncompressed_size > c->sb.encoded_extent_max ||
280 crc.compressed_size > c->sb.encoded_extent_max)
283 dst_data = dst_len == dst_iter.bi_size
284 ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
285 : __bounce_alloc(c, dst_len, WRITE);
287 ret = __bio_uncompress(c, src, dst_data.b, crc);
291 if (dst_data.type != BB_NONE &&
292 dst_data.type != BB_VMAP)
293 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
295 bio_unmap_or_unbounce(c, dst_data);
299 static int attempt_compress(struct bch_fs *c,
301 void *dst, size_t dst_len,
302 void *src, size_t src_len,
303 enum bch_compression_type compression_type)
305 switch (compression_type) {
306 case BCH_COMPRESSION_TYPE_lz4: {
308 int ret = LZ4_compress_destSize(
318 case BCH_COMPRESSION_TYPE_gzip: {
323 .avail_out = dst_len,
326 zlib_set_workspace(&strm, workspace);
327 zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
328 Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
331 if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
334 if (zlib_deflateEnd(&strm) != Z_OK)
337 return strm.total_out;
339 case BCH_COMPRESSION_TYPE_zstd: {
340 ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
341 ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
343 size_t len = ZSTD_compressCCtx(ctx,
344 dst + 4, dst_len - 4,
347 if (ZSTD_isError(len))
350 *((__le32 *) dst) = cpu_to_le32(len);
358 static unsigned __bio_compress(struct bch_fs *c,
359 struct bio *dst, size_t *dst_len,
360 struct bio *src, size_t *src_len,
361 enum bch_compression_type compression_type)
363 struct bbuf src_data = { NULL }, dst_data = { NULL };
368 BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
369 BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
371 /* If it's only one block, don't bother trying to compress: */
372 if (bio_sectors(src) <= c->opts.block_size)
375 dst_data = bio_map_or_bounce(c, dst, WRITE);
376 src_data = bio_map_or_bounce(c, src, READ);
378 workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
380 *src_len = src->bi_iter.bi_size;
381 *dst_len = dst->bi_iter.bi_size;
384 * XXX: this algorithm sucks when the compression code doesn't tell us
385 * how much would fit, like LZ4 does:
388 if (*src_len <= block_bytes(c)) {
393 ret = attempt_compress(c, workspace,
394 dst_data.b, *dst_len,
395 src_data.b, *src_len,
403 /* Didn't fit: should we retry with a smaller amount? */
404 if (*src_len <= *dst_len) {
410 * If ret is negative, it's a hint as to how much data would fit
412 BUG_ON(-ret >= *src_len);
417 *src_len -= (*src_len - *dst_len) / 2;
418 *src_len = round_down(*src_len, block_bytes(c));
421 mempool_free(workspace, &c->compress_workspace[compression_type]);
426 /* Didn't get smaller: */
427 if (round_up(*dst_len, block_bytes(c)) >= *src_len)
430 pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
432 memset(dst_data.b + *dst_len, 0, pad);
435 if (dst_data.type != BB_NONE &&
436 dst_data.type != BB_VMAP)
437 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
439 BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
440 BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
441 BUG_ON(*dst_len & (block_bytes(c) - 1));
442 BUG_ON(*src_len & (block_bytes(c) - 1));
444 bio_unmap_or_unbounce(c, src_data);
445 bio_unmap_or_unbounce(c, dst_data);
446 return compression_type;
448 compression_type = BCH_COMPRESSION_TYPE_incompressible;
452 unsigned bch2_bio_compress(struct bch_fs *c,
453 struct bio *dst, size_t *dst_len,
454 struct bio *src, size_t *src_len,
455 unsigned compression_type)
457 unsigned orig_dst = dst->bi_iter.bi_size;
458 unsigned orig_src = src->bi_iter.bi_size;
460 /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
461 src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
462 c->sb.encoded_extent_max << 9);
463 /* Don't generate a bigger output than input: */
464 dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
466 if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
467 compression_type = BCH_COMPRESSION_TYPE_lz4;
470 __bio_compress(c, dst, dst_len, src, src_len, compression_type);
472 dst->bi_iter.bi_size = orig_dst;
473 src->bi_iter.bi_size = orig_src;
474 return compression_type;
477 static int __bch2_fs_compress_init(struct bch_fs *, u64);
479 #define BCH_FEATURE_none 0
481 static const unsigned bch2_compression_opt_to_feature[] = {
482 #define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
483 BCH_COMPRESSION_OPTS()
487 #undef BCH_FEATURE_none
489 static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
493 if ((c->sb.features & f) == f)
496 mutex_lock(&c->sb_lock);
498 if ((c->sb.features & f) == f) {
499 mutex_unlock(&c->sb_lock);
503 ret = __bch2_fs_compress_init(c, c->sb.features|f);
505 mutex_unlock(&c->sb_lock);
509 c->disk_sb.sb->features[0] |= cpu_to_le64(f);
511 mutex_unlock(&c->sb_lock);
516 int bch2_check_set_has_compressed_data(struct bch_fs *c,
517 unsigned compression_type)
519 BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
521 return compression_type
522 ? __bch2_check_set_has_compressed_data(c,
523 1ULL << bch2_compression_opt_to_feature[compression_type])
527 void bch2_fs_compress_exit(struct bch_fs *c)
531 mempool_exit(&c->decompress_workspace);
532 for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
533 mempool_exit(&c->compress_workspace[i]);
534 mempool_exit(&c->compression_bounce[WRITE]);
535 mempool_exit(&c->compression_bounce[READ]);
538 static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
540 size_t max_extent = c->sb.encoded_extent_max << 9;
541 size_t decompress_workspace_size = 0;
542 bool decompress_workspace_needed;
543 ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
547 size_t compress_workspace;
548 size_t decompress_workspace;
549 } compression_types[] = {
550 { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
551 { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
552 zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
553 zlib_inflate_workspacesize(), },
554 { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
555 ZSTD_CCtxWorkspaceBound(params.cParams),
556 ZSTD_DCtxWorkspaceBound() },
560 pr_verbose_init(c->opts, "");
562 c->zstd_params = params;
564 for (i = compression_types;
565 i < compression_types + ARRAY_SIZE(compression_types);
567 if (features & (1 << i->feature))
568 goto have_compressed;
573 if (!mempool_initialized(&c->compression_bounce[READ])) {
574 ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
580 if (!mempool_initialized(&c->compression_bounce[WRITE])) {
581 ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
587 for (i = compression_types;
588 i < compression_types + ARRAY_SIZE(compression_types);
590 decompress_workspace_size =
591 max(decompress_workspace_size, i->decompress_workspace);
593 if (!(features & (1 << i->feature)))
596 if (i->decompress_workspace)
597 decompress_workspace_needed = true;
599 if (mempool_initialized(&c->compress_workspace[i->type]))
602 ret = mempool_init_kvpmalloc_pool(
603 &c->compress_workspace[i->type],
604 1, i->compress_workspace);
609 if (!mempool_initialized(&c->decompress_workspace)) {
610 ret = mempool_init_kvpmalloc_pool(
611 &c->decompress_workspace,
612 1, decompress_workspace_size);
617 pr_verbose_init(c->opts, "ret %i", ret);
621 int bch2_fs_compress_init(struct bch_fs *c)
623 u64 f = c->sb.features;
625 if (c->opts.compression)
626 f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
628 if (c->opts.background_compression)
629 f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
631 return __bch2_fs_compress_init(c, f);