1 // SPDX-License-Identifier: GPL-2.0
10 #include <linux/zlib.h>
11 #include <linux/zstd.h>
25 static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
29 BUG_ON(size > c->opts.encoded_extent_max);
31 b = kmalloc(size, GFP_NOFS|__GFP_NOWARN);
33 return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
35 b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS);
37 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
42 static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
45 struct bvec_iter iter;
46 void *expected_start = NULL;
48 __bio_for_each_bvec(bv, bio, iter, start) {
50 expected_start != page_address(bv.bv_page) + bv.bv_offset)
53 expected_start = page_address(bv.bv_page) +
54 bv.bv_offset + bv.bv_len;
60 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
61 struct bvec_iter start, int rw)
65 struct bvec_iter iter;
66 unsigned nr_pages = 0;
67 struct page *stack_pages[16];
68 struct page **pages = NULL;
71 BUG_ON(start.bi_size > c->opts.encoded_extent_max);
73 if (!PageHighMem(bio_iter_page(bio, start)) &&
74 bio_phys_contig(bio, start))
75 return (struct bbuf) {
76 .b = page_address(bio_iter_page(bio, start)) +
77 bio_iter_offset(bio, start),
78 .type = BB_NONE, .rw = rw
81 /* check if we can map the pages contiguously: */
82 __bio_for_each_segment(bv, bio, iter, start) {
83 if (iter.bi_size != start.bi_size &&
87 if (bv.bv_len < iter.bi_size &&
88 bv.bv_offset + bv.bv_len < PAGE_SIZE)
94 BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
96 pages = nr_pages > ARRAY_SIZE(stack_pages)
97 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
103 __bio_for_each_segment(bv, bio, iter, start)
104 pages[nr_pages++] = bv.bv_page;
106 data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
107 if (pages != stack_pages)
111 return (struct bbuf) {
112 .b = data + bio_iter_offset(bio, start),
113 .type = BB_VMAP, .rw = rw
116 ret = __bounce_alloc(c, start.bi_size, rw);
119 memcpy_from_bio(ret.b, bio, start);
124 static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
126 return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
129 static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
135 vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
141 mempool_free(buf.b, &c->compression_bounce[buf.rw]);
146 static inline void zlib_set_workspace(z_stream *strm, void *workspace)
149 strm->workspace = workspace;
153 static int __bio_uncompress(struct bch_fs *c, struct bio *src,
154 void *dst_data, struct bch_extent_crc_unpacked crc)
156 struct bbuf src_data = { NULL };
157 size_t src_len = src->bi_iter.bi_size;
158 size_t dst_len = crc.uncompressed_size << 9;
162 src_data = bio_map_or_bounce(c, src, READ);
164 switch (crc.compression_type) {
165 case BCH_COMPRESSION_TYPE_lz4_old:
166 case BCH_COMPRESSION_TYPE_lz4:
167 ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
168 src_len, dst_len, dst_len);
172 case BCH_COMPRESSION_TYPE_gzip: {
174 .next_in = src_data.b,
176 .next_out = dst_data,
177 .avail_out = dst_len,
180 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
182 zlib_set_workspace(&strm, workspace);
183 zlib_inflateInit2(&strm, -MAX_WBITS);
184 ret = zlib_inflate(&strm, Z_FINISH);
186 mempool_free(workspace, &c->decompress_workspace);
188 if (ret != Z_STREAM_END)
192 case BCH_COMPRESSION_TYPE_zstd: {
194 size_t real_src_len = le32_to_cpup(src_data.b);
196 if (real_src_len > src_len - 4)
199 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
200 ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
202 ret = zstd_decompress_dctx(ctx,
204 src_data.b + 4, real_src_len);
206 mempool_free(workspace, &c->decompress_workspace);
217 bio_unmap_or_unbounce(c, src_data);
224 int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
225 struct bch_extent_crc_unpacked *crc)
227 struct bbuf data = { NULL };
228 size_t dst_len = crc->uncompressed_size << 9;
230 /* bio must own its pages: */
231 BUG_ON(!bio->bi_vcnt);
232 BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
234 if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
235 crc->compressed_size << 9 > c->opts.encoded_extent_max) {
236 bch_err(c, "error rewriting existing data: extent too big");
240 data = __bounce_alloc(c, dst_len, WRITE);
242 if (__bio_uncompress(c, bio, data.b, *crc)) {
243 if (!c->opts.no_data_io)
244 bch_err(c, "error rewriting existing data: decompression error");
245 bio_unmap_or_unbounce(c, data);
250 * XXX: don't have a good way to assert that the bio was allocated with
251 * enough space, we depend on bch2_move_extent doing the right thing
253 bio->bi_iter.bi_size = crc->live_size << 9;
255 memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
258 crc->compression_type = 0;
259 crc->compressed_size = crc->live_size;
260 crc->uncompressed_size = crc->live_size;
262 crc->csum = (struct bch_csum) { 0, 0 };
264 bio_unmap_or_unbounce(c, data);
268 int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
269 struct bio *dst, struct bvec_iter dst_iter,
270 struct bch_extent_crc_unpacked crc)
272 struct bbuf dst_data = { NULL };
273 size_t dst_len = crc.uncompressed_size << 9;
276 if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
277 crc.compressed_size << 9 > c->opts.encoded_extent_max)
280 dst_data = dst_len == dst_iter.bi_size
281 ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
282 : __bounce_alloc(c, dst_len, WRITE);
284 ret = __bio_uncompress(c, src, dst_data.b, crc);
288 if (dst_data.type != BB_NONE &&
289 dst_data.type != BB_VMAP)
290 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
292 bio_unmap_or_unbounce(c, dst_data);
296 static int attempt_compress(struct bch_fs *c,
298 void *dst, size_t dst_len,
299 void *src, size_t src_len,
300 struct bch_compression_opt compression)
302 enum bch_compression_type compression_type =
303 __bch2_compression_opt_to_type[compression.type];
305 switch (compression_type) {
306 case BCH_COMPRESSION_TYPE_lz4:
307 if (compression.level < LZ4HC_MIN_CLEVEL) {
309 int ret = LZ4_compress_destSize(
318 int ret = LZ4_compress_HC(
326 case BCH_COMPRESSION_TYPE_gzip: {
331 .avail_out = dst_len,
334 zlib_set_workspace(&strm, workspace);
335 zlib_deflateInit2(&strm,
337 ? clamp_t(unsigned, compression.level,
338 Z_BEST_SPEED, Z_BEST_COMPRESSION)
339 : Z_DEFAULT_COMPRESSION,
340 Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
343 if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
346 if (zlib_deflateEnd(&strm) != Z_OK)
349 return strm.total_out;
351 case BCH_COMPRESSION_TYPE_zstd: {
354 * zstd max compression level is 22, our max level is 15
356 unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
357 ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
358 ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
359 zstd_cctx_workspace_bound(¶ms.cParams));
362 * ZSTD requires that when we decompress we pass in the exact
363 * compressed size - rounding it up to the nearest sector
364 * doesn't work, so we use the first 4 bytes of the buffer for
367 * Additionally, the ZSTD code seems to have a bug where it will
368 * write just past the end of the buffer - so subtract a fudge
369 * factor (7 bytes) from the dst buffer size to account for
372 size_t len = zstd_compress_cctx(ctx,
373 dst + 4, dst_len - 4 - 7,
376 if (zstd_is_error(len))
379 *((__le32 *) dst) = cpu_to_le32(len);
387 static unsigned __bio_compress(struct bch_fs *c,
388 struct bio *dst, size_t *dst_len,
389 struct bio *src, size_t *src_len,
390 struct bch_compression_opt compression)
392 struct bbuf src_data = { NULL }, dst_data = { NULL };
394 enum bch_compression_type compression_type =
395 __bch2_compression_opt_to_type[compression.type];
399 BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
400 BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
402 /* If it's only one block, don't bother trying to compress: */
403 if (src->bi_iter.bi_size <= c->opts.block_size)
404 return BCH_COMPRESSION_TYPE_incompressible;
406 dst_data = bio_map_or_bounce(c, dst, WRITE);
407 src_data = bio_map_or_bounce(c, src, READ);
409 workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS);
411 *src_len = src->bi_iter.bi_size;
412 *dst_len = dst->bi_iter.bi_size;
415 * XXX: this algorithm sucks when the compression code doesn't tell us
416 * how much would fit, like LZ4 does:
419 if (*src_len <= block_bytes(c)) {
424 ret = attempt_compress(c, workspace,
425 dst_data.b, *dst_len,
426 src_data.b, *src_len,
434 /* Didn't fit: should we retry with a smaller amount? */
435 if (*src_len <= *dst_len) {
441 * If ret is negative, it's a hint as to how much data would fit
443 BUG_ON(-ret >= *src_len);
448 *src_len -= (*src_len - *dst_len) / 2;
449 *src_len = round_down(*src_len, block_bytes(c));
452 mempool_free(workspace, &c->compress_workspace[compression_type]);
457 /* Didn't get smaller: */
458 if (round_up(*dst_len, block_bytes(c)) >= *src_len)
461 pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
463 memset(dst_data.b + *dst_len, 0, pad);
466 if (dst_data.type != BB_NONE &&
467 dst_data.type != BB_VMAP)
468 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
470 BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
471 BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
472 BUG_ON(*dst_len & (block_bytes(c) - 1));
473 BUG_ON(*src_len & (block_bytes(c) - 1));
474 ret = compression_type;
476 bio_unmap_or_unbounce(c, src_data);
477 bio_unmap_or_unbounce(c, dst_data);
480 ret = BCH_COMPRESSION_TYPE_incompressible;
484 unsigned bch2_bio_compress(struct bch_fs *c,
485 struct bio *dst, size_t *dst_len,
486 struct bio *src, size_t *src_len,
487 unsigned compression_opt)
489 unsigned orig_dst = dst->bi_iter.bi_size;
490 unsigned orig_src = src->bi_iter.bi_size;
491 unsigned compression_type;
493 /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
494 src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
495 c->opts.encoded_extent_max);
496 /* Don't generate a bigger output than input: */
497 dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
500 __bio_compress(c, dst, dst_len, src, src_len,
501 bch2_compression_decode(compression_opt));
503 dst->bi_iter.bi_size = orig_dst;
504 src->bi_iter.bi_size = orig_src;
505 return compression_type;
508 static int __bch2_fs_compress_init(struct bch_fs *, u64);
510 #define BCH_FEATURE_none 0
512 static const unsigned bch2_compression_opt_to_feature[] = {
513 #define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
514 BCH_COMPRESSION_OPTS()
518 #undef BCH_FEATURE_none
520 static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
524 if ((c->sb.features & f) == f)
527 mutex_lock(&c->sb_lock);
529 if ((c->sb.features & f) == f) {
530 mutex_unlock(&c->sb_lock);
534 ret = __bch2_fs_compress_init(c, c->sb.features|f);
536 mutex_unlock(&c->sb_lock);
540 c->disk_sb.sb->features[0] |= cpu_to_le64(f);
542 mutex_unlock(&c->sb_lock);
547 int bch2_check_set_has_compressed_data(struct bch_fs *c,
548 unsigned compression_opt)
550 unsigned compression_type = bch2_compression_decode(compression_opt).type;
552 BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
554 return compression_type
555 ? __bch2_check_set_has_compressed_data(c,
556 1ULL << bch2_compression_opt_to_feature[compression_type])
560 void bch2_fs_compress_exit(struct bch_fs *c)
564 mempool_exit(&c->decompress_workspace);
565 for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
566 mempool_exit(&c->compress_workspace[i]);
567 mempool_exit(&c->compression_bounce[WRITE]);
568 mempool_exit(&c->compression_bounce[READ]);
571 static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
573 size_t decompress_workspace_size = 0;
574 bool decompress_workspace_needed;
575 ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
576 c->opts.encoded_extent_max);
579 enum bch_compression_type type;
580 size_t compress_workspace;
581 size_t decompress_workspace;
582 } compression_types[] = {
583 { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
584 max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS) },
585 { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
586 zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
587 zlib_inflate_workspacesize(), },
588 { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
589 zstd_cctx_workspace_bound(¶ms.cParams),
590 zstd_dctx_workspace_bound() },
592 bool have_compressed = false;
594 c->zstd_params = params;
596 for (i = compression_types;
597 i < compression_types + ARRAY_SIZE(compression_types);
599 have_compressed |= (features & (1 << i->feature)) != 0;
601 if (!have_compressed)
604 if (!mempool_initialized(&c->compression_bounce[READ]) &&
605 mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
606 1, c->opts.encoded_extent_max))
607 return -BCH_ERR_ENOMEM_compression_bounce_read_init;
609 if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
610 mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
611 1, c->opts.encoded_extent_max))
612 return -BCH_ERR_ENOMEM_compression_bounce_write_init;
614 for (i = compression_types;
615 i < compression_types + ARRAY_SIZE(compression_types);
617 decompress_workspace_size =
618 max(decompress_workspace_size, i->decompress_workspace);
620 if (!(features & (1 << i->feature)))
623 if (i->decompress_workspace)
624 decompress_workspace_needed = true;
626 if (mempool_initialized(&c->compress_workspace[i->type]))
629 if (mempool_init_kvpmalloc_pool(
630 &c->compress_workspace[i->type],
631 1, i->compress_workspace))
632 return -BCH_ERR_ENOMEM_compression_workspace_init;
635 if (!mempool_initialized(&c->decompress_workspace) &&
636 mempool_init_kvpmalloc_pool(&c->decompress_workspace,
637 1, decompress_workspace_size))
638 return -BCH_ERR_ENOMEM_decompression_workspace_init;
643 static u64 compression_opt_to_feature(unsigned v)
645 unsigned type = bch2_compression_decode(v).type;
646 return 1ULL << bch2_compression_opt_to_feature[type];
649 int bch2_fs_compress_init(struct bch_fs *c)
651 u64 f = c->sb.features;
653 f |= compression_opt_to_feature(c->opts.compression);
654 f |= compression_opt_to_feature(c->opts.background_compression);
656 return __bch2_fs_compress_init(c, f);
659 int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
660 struct printbuf *err)
662 char *val = kstrdup(_val, GFP_KERNEL);
663 char *p = val, *type_str, *level_str;
664 struct bch_compression_opt opt = { 0 };
670 type_str = strsep(&p, ":");
673 ret = match_string(bch2_compression_opts, -1, type_str);
675 prt_str(err, "invalid compression type");
684 ret = kstrtouint(level_str, 10, &level);
685 if (!ret && !opt.type && level)
687 if (!ret && level > 15)
690 prt_str(err, "invalid compression level");
697 *res = bch2_compression_encode(opt);
703 void bch2_opt_compression_to_text(struct printbuf *out,
708 struct bch_compression_opt opt = bch2_compression_decode(v);
710 prt_str(out, bch2_compression_opts[opt.type]);
712 prt_printf(out, ":%u", opt.level);