10 #include <linux/zlib.h>
11 #include <linux/zstd.h>
26 static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
30 BUG_ON(size > c->sb.encoded_extent_max << 9);
32 b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
34 return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
36 b = mempool_alloc(&c->compression_bounce[rw], GFP_NOWAIT);
37 b = b ? page_address(b) : NULL;
39 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
43 return (struct bbuf) { .b = b, .type = BB_VMALLOC, .rw = rw };
45 b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
46 b = b ? page_address(b) : NULL;
48 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
53 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
54 struct bvec_iter start, int rw)
58 struct bvec_iter iter;
59 unsigned nr_pages = 0;
60 struct page *stack_pages[16];
61 struct page **pages = NULL;
63 unsigned prev_end = PAGE_SIZE;
66 BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
68 #ifndef CONFIG_HIGHMEM
69 __bio_for_each_contig_segment(bv, bio, iter, start) {
70 if (bv.bv_len == start.bi_size)
71 return (struct bbuf) {
72 .b = page_address(bv.bv_page) + bv.bv_offset,
73 .type = BB_NONE, .rw = rw
77 __bio_for_each_segment(bv, bio, iter, start) {
78 if ((!first && bv.bv_offset) ||
79 prev_end != PAGE_SIZE)
82 prev_end = bv.bv_offset + bv.bv_len;
86 BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
88 pages = nr_pages > ARRAY_SIZE(stack_pages)
89 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
95 __bio_for_each_segment(bv, bio, iter, start)
96 pages[nr_pages++] = bv.bv_page;
98 data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
99 if (pages != stack_pages)
103 return (struct bbuf) {
104 .b = data + bio_iter_offset(bio, start),
105 .type = BB_VMAP, .rw = rw
108 ret = __bounce_alloc(c, start.bi_size, rw);
111 memcpy_from_bio(ret.b, bio, start);
116 static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
118 return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
121 static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
127 vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
136 mempool_free(virt_to_page(buf.b),
137 &c->compression_bounce[buf.rw]);
142 static inline void zlib_set_workspace(z_stream *strm, void *workspace)
145 strm->workspace = workspace;
149 static int __bio_uncompress(struct bch_fs *c, struct bio *src,
150 void *dst_data, struct bch_extent_crc_unpacked crc)
152 struct bbuf src_data = { NULL };
153 size_t src_len = src->bi_iter.bi_size;
154 size_t dst_len = crc.uncompressed_size << 9;
158 src_data = bio_map_or_bounce(c, src, READ);
160 switch (crc.compression_type) {
161 case BCH_COMPRESSION_LZ4_OLD:
162 ret = bch2_lz4_decompress(src_data.b, &src_len,
167 case BCH_COMPRESSION_LZ4:
168 ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
169 src_len, dst_len, dst_len);
173 case BCH_COMPRESSION_GZIP: {
175 .next_in = src_data.b,
177 .next_out = dst_data,
178 .avail_out = dst_len,
181 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
183 zlib_set_workspace(&strm, workspace);
184 zlib_inflateInit2(&strm, -MAX_WBITS);
185 ret = zlib_inflate(&strm, Z_FINISH);
187 mempool_free(workspace, &c->decompress_workspace);
189 if (ret != Z_STREAM_END)
193 case BCH_COMPRESSION_ZSTD: {
197 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
198 ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
200 src_len = le32_to_cpup(src_data.b);
202 len = ZSTD_decompressDCtx(ctx,
204 src_data.b + 4, src_len);
206 mempool_free(workspace, &c->decompress_workspace);
217 bio_unmap_or_unbounce(c, src_data);
224 int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
225 struct bch_extent_crc_unpacked *crc)
227 struct bbuf data = { NULL };
228 size_t dst_len = crc->uncompressed_size << 9;
230 /* bio must own its pages: */
231 BUG_ON(!bio->bi_vcnt);
232 BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
234 if (crc->uncompressed_size > c->sb.encoded_extent_max ||
235 crc->compressed_size > c->sb.encoded_extent_max) {
236 bch_err(c, "error rewriting existing data: extent too big");
240 data = __bounce_alloc(c, dst_len, WRITE);
242 if (__bio_uncompress(c, bio, data.b, *crc)) {
243 bch_err(c, "error rewriting existing data: decompression error");
244 bio_unmap_or_unbounce(c, data);
249 * might have to free existing pages and retry allocation from mempool -
250 * do this _after_ decompressing:
252 bch2_bio_alloc_more_pages_pool(c, bio, crc->live_size << 9);
254 memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
257 crc->compression_type = 0;
258 crc->compressed_size = crc->live_size;
259 crc->uncompressed_size = crc->live_size;
261 crc->csum = (struct bch_csum) { 0, 0 };
263 bio_unmap_or_unbounce(c, data);
267 int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
268 struct bio *dst, struct bvec_iter dst_iter,
269 struct bch_extent_crc_unpacked crc)
271 struct bbuf dst_data = { NULL };
272 size_t dst_len = crc.uncompressed_size << 9;
275 if (crc.uncompressed_size > c->sb.encoded_extent_max ||
276 crc.compressed_size > c->sb.encoded_extent_max)
279 dst_data = dst_len == dst_iter.bi_size
280 ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
281 : __bounce_alloc(c, dst_len, WRITE);
283 ret = __bio_uncompress(c, src, dst_data.b, crc);
287 if (dst_data.type != BB_NONE)
288 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
290 bio_unmap_or_unbounce(c, dst_data);
294 static int attempt_compress(struct bch_fs *c,
296 void *dst, size_t dst_len,
297 void *src, size_t src_len,
298 unsigned compression_type)
300 switch (compression_type) {
301 case BCH_COMPRESSION_LZ4: {
303 int ret = LZ4_compress_destSize(
313 case BCH_COMPRESSION_GZIP: {
318 .avail_out = dst_len,
321 zlib_set_workspace(&strm, workspace);
322 zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
323 Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
326 if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
329 if (zlib_deflateEnd(&strm) != Z_OK)
332 return strm.total_out;
334 case BCH_COMPRESSION_ZSTD: {
335 ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
336 ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
338 size_t len = ZSTD_compressCCtx(ctx,
339 dst + 4, dst_len - 4,
342 if (ZSTD_isError(len))
345 *((__le32 *) dst) = cpu_to_le32(len);
353 static unsigned __bio_compress(struct bch_fs *c,
354 struct bio *dst, size_t *dst_len,
355 struct bio *src, size_t *src_len,
356 unsigned compression_type)
358 struct bbuf src_data = { NULL }, dst_data = { NULL };
363 /* If it's only one block, don't bother trying to compress: */
364 if (bio_sectors(src) <= c->opts.block_size)
367 dst_data = bio_map_or_bounce(c, dst, WRITE);
368 src_data = bio_map_or_bounce(c, src, READ);
370 workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
372 *src_len = src->bi_iter.bi_size;
373 *dst_len = dst->bi_iter.bi_size;
376 * XXX: this algorithm sucks when the compression code doesn't tell us
377 * how much would fit, like LZ4 does:
380 if (*src_len <= block_bytes(c)) {
385 ret = attempt_compress(c, workspace,
386 dst_data.b, *dst_len,
387 src_data.b, *src_len,
395 /* Didn't fit: should we retry with a smaller amount? */
396 if (*src_len <= *dst_len) {
402 * If ret is negative, it's a hint as to how much data would fit
404 BUG_ON(-ret >= *src_len);
409 *src_len -= (*src_len - *dst_len) / 2;
410 *src_len = round_down(*src_len, block_bytes(c));
413 mempool_free(workspace, &c->compress_workspace[compression_type]);
418 /* Didn't get smaller: */
419 if (round_up(*dst_len, block_bytes(c)) >= *src_len)
422 pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
424 memset(dst_data.b + *dst_len, 0, pad);
427 if (dst_data.type != BB_NONE)
428 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
430 BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
431 BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
432 BUG_ON(*dst_len & (block_bytes(c) - 1));
433 BUG_ON(*src_len & (block_bytes(c) - 1));
435 bio_unmap_or_unbounce(c, src_data);
436 bio_unmap_or_unbounce(c, dst_data);
437 return compression_type;
439 compression_type = 0;
443 unsigned bch2_bio_compress(struct bch_fs *c,
444 struct bio *dst, size_t *dst_len,
445 struct bio *src, size_t *src_len,
446 unsigned compression_type)
448 unsigned orig_dst = dst->bi_iter.bi_size;
449 unsigned orig_src = src->bi_iter.bi_size;
451 /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
452 src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
453 c->sb.encoded_extent_max << 9);
454 /* Don't generate a bigger output than input: */
455 dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
457 if (compression_type == BCH_COMPRESSION_LZ4_OLD)
458 compression_type = BCH_COMPRESSION_LZ4;
461 __bio_compress(c, dst, dst_len, src, src_len, compression_type);
463 dst->bi_iter.bi_size = orig_dst;
464 src->bi_iter.bi_size = orig_src;
465 return compression_type;
468 #define BCH_FEATURE_NONE 0
470 static const unsigned bch2_compression_opt_to_feature[] = {
471 #define x(t) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
472 BCH_COMPRESSION_TYPES()
476 #undef BCH_FEATURE_NONE
478 /* doesn't write superblock: */
479 int bch2_check_set_has_compressed_data(struct bch_fs *c,
480 unsigned compression_type)
485 pr_verbose_init(c->opts, "");
487 BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
489 if (!compression_type)
492 f = bch2_compression_opt_to_feature[compression_type];
493 if (bch2_sb_test_feature(c->disk_sb, f))
496 bch2_sb_set_feature(c->disk_sb, f);
497 ret = bch2_fs_compress_init(c);
499 pr_verbose_init(c->opts, "ret %i", ret);
503 void bch2_fs_compress_exit(struct bch_fs *c)
507 mempool_exit(&c->decompress_workspace);
508 for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
509 mempool_exit(&c->compress_workspace[i]);
510 mempool_exit(&c->compression_bounce[WRITE]);
511 mempool_exit(&c->compression_bounce[READ]);
514 static void *mempool_kvpmalloc(gfp_t gfp_mask, void *pool_data)
516 size_t size = (size_t)pool_data;
517 return kvpmalloc(size, gfp_mask);
520 void mempool_kvpfree(void *element, void *pool_data)
522 size_t size = (size_t)pool_data;
523 kvpfree(element, size);
526 static int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
528 return !mempool_initialized(pool)
529 ? mempool_init(pool, min_nr, mempool_kvpmalloc,
530 mempool_kvpfree, (void *) size)
534 int bch2_fs_compress_init(struct bch_fs *c)
536 size_t max_extent = c->sb.encoded_extent_max << 9;
537 size_t order = get_order(max_extent);
538 size_t decompress_workspace_size = 0;
539 bool decompress_workspace_needed;
540 ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
544 size_t compress_workspace;
545 size_t decompress_workspace;
546 } compression_types[] = {
547 { BCH_FEATURE_LZ4, BCH_COMPRESSION_LZ4, LZ4_MEM_COMPRESS, 0 },
548 { BCH_FEATURE_GZIP, BCH_COMPRESSION_GZIP,
549 zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
550 zlib_inflate_workspacesize(), },
551 { BCH_FEATURE_ZSTD, BCH_COMPRESSION_ZSTD,
552 ZSTD_CCtxWorkspaceBound(params.cParams),
553 ZSTD_DCtxWorkspaceBound() },
557 pr_verbose_init(c->opts, "");
559 c->zstd_params = params;
561 for (i = compression_types;
562 i < compression_types + ARRAY_SIZE(compression_types);
564 if (bch2_sb_test_feature(c->disk_sb, i->feature))
565 goto have_compressed;
570 if (!mempool_initialized(&c->compression_bounce[READ])) {
571 ret = mempool_init_page_pool(&c->compression_bounce[READ],
577 if (!mempool_initialized(&c->compression_bounce[WRITE])) {
578 ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
584 for (i = compression_types;
585 i < compression_types + ARRAY_SIZE(compression_types);
587 decompress_workspace_size =
588 max(decompress_workspace_size, i->decompress_workspace);
590 if (!bch2_sb_test_feature(c->disk_sb, i->feature))
593 if (i->decompress_workspace)
594 decompress_workspace_needed = true;
596 ret = mempool_init_kvpmalloc_pool(
597 &c->compress_workspace[i->type],
598 1, i->compress_workspace);
603 ret = mempool_init_kmalloc_pool(
604 &c->decompress_workspace,
605 1, decompress_workspace_size);
609 pr_verbose_init(c->opts, "ret %i", ret);