]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/compress.c
Update bcachefs sources to edf5f38218 bcachefs: Refactor superblock code
[bcachefs-tools-debian] / libbcachefs / compress.c
1 #include "bcachefs.h"
2 #include "checksum.h"
3 #include "compress.h"
4 #include "extents.h"
5 #include "io.h"
6 #include "super-io.h"
7
8 #include "lz4.h"
9 #include <linux/lz4.h>
10 #include <linux/zlib.h>
11 #include <linux/zstd.h>
12
13 /* Bounce buffer: */
14 struct bbuf {
15         void            *b;
16         enum {
17                 BB_NONE,
18                 BB_VMAP,
19                 BB_KMALLOC,
20                 BB_VMALLOC,
21                 BB_MEMPOOL,
22         }               type;
23         int             rw;
24 };
25
26 static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
27 {
28         void *b;
29
30         BUG_ON(size > c->sb.encoded_extent_max << 9);
31
32         b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
33         if (b)
34                 return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
35
36         b = mempool_alloc(&c->compression_bounce[rw], GFP_NOWAIT);
37         b = b ? page_address(b) : NULL;
38         if (b)
39                 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
40
41         b = vmalloc(size);
42         if (b)
43                 return (struct bbuf) { .b = b, .type = BB_VMALLOC, .rw = rw };
44
45         b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
46         b = b ? page_address(b) : NULL;
47         if (b)
48                 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
49
50         BUG();
51 }
52
53 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
54                                        struct bvec_iter start, int rw)
55 {
56         struct bbuf ret;
57         struct bio_vec bv;
58         struct bvec_iter iter;
59         unsigned nr_pages = 0;
60         struct page *stack_pages[16];
61         struct page **pages = NULL;
62         bool first = true;
63         unsigned prev_end = PAGE_SIZE;
64         void *data;
65
66         BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
67
68 #ifndef CONFIG_HIGHMEM
69         __bio_for_each_contig_segment(bv, bio, iter, start) {
70                 if (bv.bv_len == start.bi_size)
71                         return (struct bbuf) {
72                                 .b = page_address(bv.bv_page) + bv.bv_offset,
73                                 .type = BB_NONE, .rw = rw
74                         };
75         }
76 #endif
77         __bio_for_each_segment(bv, bio, iter, start) {
78                 if ((!first && bv.bv_offset) ||
79                     prev_end != PAGE_SIZE)
80                         goto bounce;
81
82                 prev_end = bv.bv_offset + bv.bv_len;
83                 nr_pages++;
84         }
85
86         BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
87
88         pages = nr_pages > ARRAY_SIZE(stack_pages)
89                 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
90                 : stack_pages;
91         if (!pages)
92                 goto bounce;
93
94         nr_pages = 0;
95         __bio_for_each_segment(bv, bio, iter, start)
96                 pages[nr_pages++] = bv.bv_page;
97
98         data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
99         if (pages != stack_pages)
100                 kfree(pages);
101
102         if (data)
103                 return (struct bbuf) {
104                         .b = data + bio_iter_offset(bio, start),
105                         .type = BB_VMAP, .rw = rw
106                 };
107 bounce:
108         ret = __bounce_alloc(c, start.bi_size, rw);
109
110         if (rw == READ)
111                 memcpy_from_bio(ret.b, bio, start);
112
113         return ret;
114 }
115
116 static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
117 {
118         return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
119 }
120
121 static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
122 {
123         switch (buf.type) {
124         case BB_NONE:
125                 break;
126         case BB_VMAP:
127                 vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
128                 break;
129         case BB_KMALLOC:
130                 kfree(buf.b);
131                 break;
132         case BB_VMALLOC:
133                 vfree(buf.b);
134                 break;
135         case BB_MEMPOOL:
136                 mempool_free(virt_to_page(buf.b),
137                              &c->compression_bounce[buf.rw]);
138                 break;
139         }
140 }
141
142 static inline void zlib_set_workspace(z_stream *strm, void *workspace)
143 {
144 #ifdef __KERNEL__
145         strm->workspace = workspace;
146 #endif
147 }
148
149 static int __bio_uncompress(struct bch_fs *c, struct bio *src,
150                             void *dst_data, struct bch_extent_crc_unpacked crc)
151 {
152         struct bbuf src_data = { NULL };
153         size_t src_len = src->bi_iter.bi_size;
154         size_t dst_len = crc.uncompressed_size << 9;
155         void *workspace;
156         int ret;
157
158         src_data = bio_map_or_bounce(c, src, READ);
159
160         switch (crc.compression_type) {
161         case BCH_COMPRESSION_LZ4_OLD:
162                 ret = bch2_lz4_decompress(src_data.b, &src_len,
163                                      dst_data, dst_len);
164                 if (ret)
165                         goto err;
166                 break;
167         case BCH_COMPRESSION_LZ4:
168                 ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
169                                                   src_len, dst_len, dst_len);
170                 if (ret != dst_len)
171                         goto err;
172                 break;
173         case BCH_COMPRESSION_GZIP: {
174                 z_stream strm = {
175                         .next_in        = src_data.b,
176                         .avail_in       = src_len,
177                         .next_out       = dst_data,
178                         .avail_out      = dst_len,
179                 };
180
181                 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
182
183                 zlib_set_workspace(&strm, workspace);
184                 zlib_inflateInit2(&strm, -MAX_WBITS);
185                 ret = zlib_inflate(&strm, Z_FINISH);
186
187                 mempool_free(workspace, &c->decompress_workspace);
188
189                 if (ret != Z_STREAM_END)
190                         goto err;
191                 break;
192         }
193         case BCH_COMPRESSION_ZSTD: {
194                 ZSTD_DCtx *ctx;
195                 size_t len;
196
197                 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
198                 ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
199
200                 src_len = le32_to_cpup(src_data.b);
201
202                 len = ZSTD_decompressDCtx(ctx,
203                                 dst_data,       dst_len,
204                                 src_data.b + 4, src_len);
205
206                 mempool_free(workspace, &c->decompress_workspace);
207
208                 if (len != dst_len)
209                         goto err;
210                 break;
211         }
212         default:
213                 BUG();
214         }
215         ret = 0;
216 out:
217         bio_unmap_or_unbounce(c, src_data);
218         return ret;
219 err:
220         ret = -EIO;
221         goto out;
222 }
223
224 int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
225                                 struct bch_extent_crc_unpacked *crc)
226 {
227         struct bbuf data = { NULL };
228         size_t dst_len = crc->uncompressed_size << 9;
229
230         /* bio must own its pages: */
231         BUG_ON(!bio->bi_vcnt);
232         BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
233
234         if (crc->uncompressed_size      > c->sb.encoded_extent_max ||
235             crc->compressed_size        > c->sb.encoded_extent_max) {
236                 bch_err(c, "error rewriting existing data: extent too big");
237                 return -EIO;
238         }
239
240         data = __bounce_alloc(c, dst_len, WRITE);
241
242         if (__bio_uncompress(c, bio, data.b, *crc)) {
243                 bch_err(c, "error rewriting existing data: decompression error");
244                 bio_unmap_or_unbounce(c, data);
245                 return -EIO;
246         }
247
248         /*
249          * might have to free existing pages and retry allocation from mempool -
250          * do this _after_ decompressing:
251          */
252         bch2_bio_alloc_more_pages_pool(c, bio, crc->live_size << 9);
253
254         memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
255
256         crc->csum_type          = 0;
257         crc->compression_type   = 0;
258         crc->compressed_size    = crc->live_size;
259         crc->uncompressed_size  = crc->live_size;
260         crc->offset             = 0;
261         crc->csum               = (struct bch_csum) { 0, 0 };
262
263         bio_unmap_or_unbounce(c, data);
264         return 0;
265 }
266
267 int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
268                        struct bio *dst, struct bvec_iter dst_iter,
269                        struct bch_extent_crc_unpacked crc)
270 {
271         struct bbuf dst_data = { NULL };
272         size_t dst_len = crc.uncompressed_size << 9;
273         int ret = -ENOMEM;
274
275         if (crc.uncompressed_size       > c->sb.encoded_extent_max ||
276             crc.compressed_size         > c->sb.encoded_extent_max)
277                 return -EIO;
278
279         dst_data = dst_len == dst_iter.bi_size
280                 ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
281                 : __bounce_alloc(c, dst_len, WRITE);
282
283         ret = __bio_uncompress(c, src, dst_data.b, crc);
284         if (ret)
285                 goto err;
286
287         if (dst_data.type != BB_NONE)
288                 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
289 err:
290         bio_unmap_or_unbounce(c, dst_data);
291         return ret;
292 }
293
294 static int attempt_compress(struct bch_fs *c,
295                             void *workspace,
296                             void *dst, size_t dst_len,
297                             void *src, size_t src_len,
298                             unsigned compression_type)
299 {
300         switch (compression_type) {
301         case BCH_COMPRESSION_LZ4: {
302                 int len = src_len;
303                 int ret = LZ4_compress_destSize(
304                                 src,            dst,
305                                 &len,           dst_len,
306                                 workspace);
307
308                 if (len < src_len)
309                         return -len;
310
311                 return ret;
312         }
313         case BCH_COMPRESSION_GZIP: {
314                 z_stream strm = {
315                         .next_in        = src,
316                         .avail_in       = src_len,
317                         .next_out       = dst,
318                         .avail_out      = dst_len,
319                 };
320
321                 zlib_set_workspace(&strm, workspace);
322                 zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
323                                   Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
324                                   Z_DEFAULT_STRATEGY);
325
326                 if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
327                         return 0;
328
329                 if (zlib_deflateEnd(&strm) != Z_OK)
330                         return 0;
331
332                 return strm.total_out;
333         }
334         case BCH_COMPRESSION_ZSTD: {
335                 ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
336                         ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
337
338                 size_t len = ZSTD_compressCCtx(ctx,
339                                 dst + 4,        dst_len - 4,
340                                 src,            src_len,
341                                 c->zstd_params);
342                 if (ZSTD_isError(len))
343                         return 0;
344
345                 *((__le32 *) dst) = cpu_to_le32(len);
346                 return len + 4;
347         }
348         default:
349                 BUG();
350         }
351 }
352
353 static unsigned __bio_compress(struct bch_fs *c,
354                                struct bio *dst, size_t *dst_len,
355                                struct bio *src, size_t *src_len,
356                                unsigned compression_type)
357 {
358         struct bbuf src_data = { NULL }, dst_data = { NULL };
359         void *workspace;
360         unsigned pad;
361         int ret = 0;
362
363         BUG_ON(compression_type >= BCH_COMPRESSION_NR);
364         BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
365
366         /* If it's only one block, don't bother trying to compress: */
367         if (bio_sectors(src) <= c->opts.block_size)
368                 return 0;
369
370         dst_data = bio_map_or_bounce(c, dst, WRITE);
371         src_data = bio_map_or_bounce(c, src, READ);
372
373         workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
374
375         *src_len = src->bi_iter.bi_size;
376         *dst_len = dst->bi_iter.bi_size;
377
378         /*
379          * XXX: this algorithm sucks when the compression code doesn't tell us
380          * how much would fit, like LZ4 does:
381          */
382         while (1) {
383                 if (*src_len <= block_bytes(c)) {
384                         ret = -1;
385                         break;
386                 }
387
388                 ret = attempt_compress(c, workspace,
389                                        dst_data.b,      *dst_len,
390                                        src_data.b,      *src_len,
391                                        compression_type);
392                 if (ret > 0) {
393                         *dst_len = ret;
394                         ret = 0;
395                         break;
396                 }
397
398                 /* Didn't fit: should we retry with a smaller amount?  */
399                 if (*src_len <= *dst_len) {
400                         ret = -1;
401                         break;
402                 }
403
404                 /*
405                  * If ret is negative, it's a hint as to how much data would fit
406                  */
407                 BUG_ON(-ret >= *src_len);
408
409                 if (ret < 0)
410                         *src_len = -ret;
411                 else
412                         *src_len -= (*src_len - *dst_len) / 2;
413                 *src_len = round_down(*src_len, block_bytes(c));
414         }
415
416         mempool_free(workspace, &c->compress_workspace[compression_type]);
417
418         if (ret)
419                 goto err;
420
421         /* Didn't get smaller: */
422         if (round_up(*dst_len, block_bytes(c)) >= *src_len)
423                 goto err;
424
425         pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
426
427         memset(dst_data.b + *dst_len, 0, pad);
428         *dst_len += pad;
429
430         if (dst_data.type != BB_NONE)
431                 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
432
433         BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
434         BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
435         BUG_ON(*dst_len & (block_bytes(c) - 1));
436         BUG_ON(*src_len & (block_bytes(c) - 1));
437 out:
438         bio_unmap_or_unbounce(c, src_data);
439         bio_unmap_or_unbounce(c, dst_data);
440         return compression_type;
441 err:
442         compression_type = 0;
443         goto out;
444 }
445
446 unsigned bch2_bio_compress(struct bch_fs *c,
447                            struct bio *dst, size_t *dst_len,
448                            struct bio *src, size_t *src_len,
449                            unsigned compression_type)
450 {
451         unsigned orig_dst = dst->bi_iter.bi_size;
452         unsigned orig_src = src->bi_iter.bi_size;
453
454         /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
455         src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
456                                      c->sb.encoded_extent_max << 9);
457         /* Don't generate a bigger output than input: */
458         dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
459
460         if (compression_type == BCH_COMPRESSION_LZ4_OLD)
461                 compression_type = BCH_COMPRESSION_LZ4;
462
463         compression_type =
464                 __bio_compress(c, dst, dst_len, src, src_len, compression_type);
465
466         dst->bi_iter.bi_size = orig_dst;
467         src->bi_iter.bi_size = orig_src;
468         return compression_type;
469 }
470
471 static int __bch2_fs_compress_init(struct bch_fs *, u64);
472
473 #define BCH_FEATURE_NONE        0
474
475 static const unsigned bch2_compression_opt_to_feature[] = {
476 #define x(t) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
477         BCH_COMPRESSION_TYPES()
478 #undef x
479 };
480
481 #undef BCH_FEATURE_NONE
482
483 int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
484 {
485         int ret = 0;
486
487         if ((c->sb.features & f) == f)
488                 return 0;
489
490         mutex_lock(&c->sb_lock);
491
492         if ((c->sb.features & f) == f) {
493                 mutex_unlock(&c->sb_lock);
494                 return 0;
495         }
496
497         ret = __bch2_fs_compress_init(c, c->sb.features|f);
498         if (ret) {
499                 mutex_unlock(&c->sb_lock);
500                 return ret;
501         }
502
503         c->disk_sb.sb->features[0] |= cpu_to_le64(f);
504         bch2_write_super(c);
505         mutex_unlock(&c->sb_lock);
506
507         return 0;
508 }
509
510 int bch2_check_set_has_compressed_data(struct bch_fs *c,
511                                        unsigned compression_type)
512 {
513         BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
514
515         return compression_type
516                 ? __bch2_check_set_has_compressed_data(c,
517                                 1ULL << bch2_compression_opt_to_feature[compression_type])
518                 : 0;
519 }
520
521 void bch2_fs_compress_exit(struct bch_fs *c)
522 {
523         unsigned i;
524
525         mempool_exit(&c->decompress_workspace);
526         for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
527                 mempool_exit(&c->compress_workspace[i]);
528         mempool_exit(&c->compression_bounce[WRITE]);
529         mempool_exit(&c->compression_bounce[READ]);
530 }
531
532 static void *mempool_kvpmalloc(gfp_t gfp_mask, void *pool_data)
533 {
534         size_t size = (size_t)pool_data;
535         return kvpmalloc(size, gfp_mask);
536 }
537
538 void mempool_kvpfree(void *element, void *pool_data)
539 {
540         size_t size = (size_t)pool_data;
541         kvpfree(element, size);
542 }
543
544 static int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
545 {
546         return !mempool_initialized(pool)
547                 ? mempool_init(pool, min_nr, mempool_kvpmalloc,
548                                mempool_kvpfree, (void *) size)
549                 : 0;
550 }
551
552 static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
553 {
554         size_t max_extent = c->sb.encoded_extent_max << 9;
555         size_t order = get_order(max_extent);
556         size_t decompress_workspace_size = 0;
557         bool decompress_workspace_needed;
558         ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
559         struct {
560                 unsigned        feature;
561                 unsigned        type;
562                 size_t          compress_workspace;
563                 size_t          decompress_workspace;
564         } compression_types[] = {
565                 { BCH_FEATURE_LZ4, BCH_COMPRESSION_LZ4, LZ4_MEM_COMPRESS, 0 },
566                 { BCH_FEATURE_GZIP, BCH_COMPRESSION_GZIP,
567                         zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
568                         zlib_inflate_workspacesize(), },
569                 { BCH_FEATURE_ZSTD, BCH_COMPRESSION_ZSTD,
570                         ZSTD_CCtxWorkspaceBound(params.cParams),
571                         ZSTD_DCtxWorkspaceBound() },
572         }, *i;
573         int ret = 0;
574
575         pr_verbose_init(c->opts, "");
576
577         c->zstd_params = params;
578
579         for (i = compression_types;
580              i < compression_types + ARRAY_SIZE(compression_types);
581              i++)
582                 if (features & (1 << i->feature))
583                         goto have_compressed;
584
585         goto out;
586 have_compressed:
587
588         if (!mempool_initialized(&c->compression_bounce[READ])) {
589                 ret = mempool_init_page_pool(&c->compression_bounce[READ],
590                                              1, order);
591                 if (ret)
592                         goto out;
593         }
594
595         if (!mempool_initialized(&c->compression_bounce[WRITE])) {
596                 ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
597                                              1, order);
598                 if (ret)
599                         goto out;
600         }
601
602         for (i = compression_types;
603              i < compression_types + ARRAY_SIZE(compression_types);
604              i++) {
605                 decompress_workspace_size =
606                         max(decompress_workspace_size, i->decompress_workspace);
607
608                 if (!(features & (1 << i->feature)))
609                         continue;
610
611                 if (i->decompress_workspace)
612                         decompress_workspace_needed = true;
613
614                 ret = mempool_init_kvpmalloc_pool(
615                                 &c->compress_workspace[i->type],
616                                 1, i->compress_workspace);
617                 if (ret)
618                         goto out;
619         }
620
621         ret = mempool_init_kmalloc_pool(
622                         &c->decompress_workspace,
623                         1, decompress_workspace_size);
624         if (ret)
625                 goto out;
626 out:
627         pr_verbose_init(c->opts, "ret %i", ret);
628         return ret;
629 }
630
631 int bch2_fs_compress_init(struct bch_fs *c)
632 {
633         u64 f = c->sb.features;
634
635         if (c->opts.compression)
636                 f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
637
638         if (c->opts.background_compression)
639                 f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
640
641         return __bch2_fs_compress_init(c, f);
642
643 }