]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/compress.c
Update bcachefs sources to c9b4a210f9 fixup! bcachefs: Fixes for going RO
[bcachefs-tools-debian] / libbcachefs / compress.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "checksum.h"
4 #include "compress.h"
5 #include "extents.h"
6 #include "io.h"
7 #include "super-io.h"
8
9 #include <linux/lz4.h>
10 #include <linux/sched/mm.h>
11 #include <linux/zlib.h>
12 #include <linux/zstd.h>
13
14 /* Bounce buffer: */
15 struct bbuf {
16         void            *b;
17         enum {
18                 BB_NONE,
19                 BB_VMAP,
20                 BB_KMALLOC,
21                 BB_MEMPOOL,
22         }               type;
23         int             rw;
24 };
25
26 static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
27 {
28         void *b;
29
30         BUG_ON(size > c->sb.encoded_extent_max << 9);
31
32         b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
33         if (b)
34                 return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
35
36         b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
37         if (b)
38                 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
39
40         BUG();
41 }
42
43 static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
44 {
45         struct bio_vec bv;
46         struct bvec_iter iter;
47         void *expected_start = NULL;
48
49         __bio_for_each_bvec(bv, bio, iter, start) {
50                 if (expected_start &&
51                     expected_start != page_address(bv.bv_page) + bv.bv_offset)
52                         return false;
53
54                 expected_start = page_address(bv.bv_page) +
55                         bv.bv_offset + bv.bv_len;
56         }
57
58         return true;
59 }
60
61 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
62                                        struct bvec_iter start, int rw)
63 {
64         struct bbuf ret;
65         struct bio_vec bv;
66         struct bvec_iter iter;
67         unsigned nr_pages = 0, flags;
68         struct page *stack_pages[16];
69         struct page **pages = NULL;
70         void *data;
71
72         BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
73
74         if (!IS_ENABLED(CONFIG_HIGHMEM) &&
75             bio_phys_contig(bio, start))
76                 return (struct bbuf) {
77                         .b = page_address(bio_iter_page(bio, start)) +
78                                 bio_iter_offset(bio, start),
79                         .type = BB_NONE, .rw = rw
80                 };
81
82         /* check if we can map the pages contiguously: */
83         __bio_for_each_segment(bv, bio, iter, start) {
84                 if (iter.bi_size != start.bi_size &&
85                     bv.bv_offset)
86                         goto bounce;
87
88                 if (bv.bv_len < iter.bi_size &&
89                     bv.bv_offset + bv.bv_len < PAGE_SIZE)
90                         goto bounce;
91
92                 nr_pages++;
93         }
94
95         BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
96
97         pages = nr_pages > ARRAY_SIZE(stack_pages)
98                 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
99                 : stack_pages;
100         if (!pages)
101                 goto bounce;
102
103         nr_pages = 0;
104         __bio_for_each_segment(bv, bio, iter, start)
105                 pages[nr_pages++] = bv.bv_page;
106
107         flags = memalloc_nofs_save();
108         data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
109         memalloc_nofs_restore(flags);
110
111         if (pages != stack_pages)
112                 kfree(pages);
113
114         if (data)
115                 return (struct bbuf) {
116                         .b = data + bio_iter_offset(bio, start),
117                         .type = BB_VMAP, .rw = rw
118                 };
119 bounce:
120         ret = __bounce_alloc(c, start.bi_size, rw);
121
122         if (rw == READ)
123                 memcpy_from_bio(ret.b, bio, start);
124
125         return ret;
126 }
127
128 static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
129 {
130         return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
131 }
132
133 static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
134 {
135         switch (buf.type) {
136         case BB_NONE:
137                 break;
138         case BB_VMAP:
139                 vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
140                 break;
141         case BB_KMALLOC:
142                 kfree(buf.b);
143                 break;
144         case BB_MEMPOOL:
145                 mempool_free(buf.b, &c->compression_bounce[buf.rw]);
146                 break;
147         }
148 }
149
150 static inline void zlib_set_workspace(z_stream *strm, void *workspace)
151 {
152 #ifdef __KERNEL__
153         strm->workspace = workspace;
154 #endif
155 }
156
157 static int __bio_uncompress(struct bch_fs *c, struct bio *src,
158                             void *dst_data, struct bch_extent_crc_unpacked crc)
159 {
160         struct bbuf src_data = { NULL };
161         size_t src_len = src->bi_iter.bi_size;
162         size_t dst_len = crc.uncompressed_size << 9;
163         void *workspace;
164         int ret;
165
166         src_data = bio_map_or_bounce(c, src, READ);
167
168         switch (crc.compression_type) {
169         case BCH_COMPRESSION_TYPE_lz4_old:
170         case BCH_COMPRESSION_TYPE_lz4:
171                 ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
172                                                   src_len, dst_len, dst_len);
173                 if (ret != dst_len)
174                         goto err;
175                 break;
176         case BCH_COMPRESSION_TYPE_gzip: {
177                 z_stream strm = {
178                         .next_in        = src_data.b,
179                         .avail_in       = src_len,
180                         .next_out       = dst_data,
181                         .avail_out      = dst_len,
182                 };
183
184                 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
185
186                 zlib_set_workspace(&strm, workspace);
187                 zlib_inflateInit2(&strm, -MAX_WBITS);
188                 ret = zlib_inflate(&strm, Z_FINISH);
189
190                 mempool_free(workspace, &c->decompress_workspace);
191
192                 if (ret != Z_STREAM_END)
193                         goto err;
194                 break;
195         }
196         case BCH_COMPRESSION_TYPE_zstd: {
197                 ZSTD_DCtx *ctx;
198                 size_t real_src_len = le32_to_cpup(src_data.b);
199
200                 if (real_src_len > src_len - 4)
201                         goto err;
202
203                 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
204                 ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
205
206                 ret = ZSTD_decompressDCtx(ctx,
207                                 dst_data,       dst_len,
208                                 src_data.b + 4, real_src_len);
209
210                 mempool_free(workspace, &c->decompress_workspace);
211
212                 if (ret != dst_len)
213                         goto err;
214                 break;
215         }
216         default:
217                 BUG();
218         }
219         ret = 0;
220 out:
221         bio_unmap_or_unbounce(c, src_data);
222         return ret;
223 err:
224         ret = -EIO;
225         goto out;
226 }
227
228 int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
229                                 struct bch_extent_crc_unpacked *crc)
230 {
231         struct bbuf data = { NULL };
232         size_t dst_len = crc->uncompressed_size << 9;
233
234         /* bio must own its pages: */
235         BUG_ON(!bio->bi_vcnt);
236         BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
237
238         if (crc->uncompressed_size      > c->sb.encoded_extent_max ||
239             crc->compressed_size        > c->sb.encoded_extent_max) {
240                 bch_err(c, "error rewriting existing data: extent too big");
241                 return -EIO;
242         }
243
244         data = __bounce_alloc(c, dst_len, WRITE);
245
246         if (__bio_uncompress(c, bio, data.b, *crc)) {
247                 bch_err(c, "error rewriting existing data: decompression error");
248                 bio_unmap_or_unbounce(c, data);
249                 return -EIO;
250         }
251
252         /*
253          * XXX: don't have a good way to assert that the bio was allocated with
254          * enough space, we depend on bch2_move_extent doing the right thing
255          */
256         bio->bi_iter.bi_size = crc->live_size << 9;
257
258         memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
259
260         crc->csum_type          = 0;
261         crc->compression_type   = 0;
262         crc->compressed_size    = crc->live_size;
263         crc->uncompressed_size  = crc->live_size;
264         crc->offset             = 0;
265         crc->csum               = (struct bch_csum) { 0, 0 };
266
267         bio_unmap_or_unbounce(c, data);
268         return 0;
269 }
270
271 int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
272                        struct bio *dst, struct bvec_iter dst_iter,
273                        struct bch_extent_crc_unpacked crc)
274 {
275         struct bbuf dst_data = { NULL };
276         size_t dst_len = crc.uncompressed_size << 9;
277         int ret = -ENOMEM;
278
279         if (crc.uncompressed_size       > c->sb.encoded_extent_max ||
280             crc.compressed_size         > c->sb.encoded_extent_max)
281                 return -EIO;
282
283         dst_data = dst_len == dst_iter.bi_size
284                 ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
285                 : __bounce_alloc(c, dst_len, WRITE);
286
287         ret = __bio_uncompress(c, src, dst_data.b, crc);
288         if (ret)
289                 goto err;
290
291         if (dst_data.type != BB_NONE &&
292             dst_data.type != BB_VMAP)
293                 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
294 err:
295         bio_unmap_or_unbounce(c, dst_data);
296         return ret;
297 }
298
299 static int attempt_compress(struct bch_fs *c,
300                             void *workspace,
301                             void *dst, size_t dst_len,
302                             void *src, size_t src_len,
303                             enum bch_compression_type compression_type)
304 {
305         switch (compression_type) {
306         case BCH_COMPRESSION_TYPE_lz4: {
307                 int len = src_len;
308                 int ret = LZ4_compress_destSize(
309                                 src,            dst,
310                                 &len,           dst_len,
311                                 workspace);
312
313                 if (len < src_len)
314                         return -len;
315
316                 return ret;
317         }
318         case BCH_COMPRESSION_TYPE_gzip: {
319                 z_stream strm = {
320                         .next_in        = src,
321                         .avail_in       = src_len,
322                         .next_out       = dst,
323                         .avail_out      = dst_len,
324                 };
325
326                 zlib_set_workspace(&strm, workspace);
327                 zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
328                                   Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
329                                   Z_DEFAULT_STRATEGY);
330
331                 if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
332                         return 0;
333
334                 if (zlib_deflateEnd(&strm) != Z_OK)
335                         return 0;
336
337                 return strm.total_out;
338         }
339         case BCH_COMPRESSION_TYPE_zstd: {
340                 ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
341                         ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
342
343                 size_t len = ZSTD_compressCCtx(ctx,
344                                 dst + 4,        dst_len - 4,
345                                 src,            src_len,
346                                 c->zstd_params);
347                 if (ZSTD_isError(len))
348                         return 0;
349
350                 *((__le32 *) dst) = cpu_to_le32(len);
351                 return len + 4;
352         }
353         default:
354                 BUG();
355         }
356 }
357
358 static unsigned __bio_compress(struct bch_fs *c,
359                                struct bio *dst, size_t *dst_len,
360                                struct bio *src, size_t *src_len,
361                                enum bch_compression_type compression_type)
362 {
363         struct bbuf src_data = { NULL }, dst_data = { NULL };
364         void *workspace;
365         unsigned pad;
366         int ret = 0;
367
368         BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
369         BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
370
371         /* If it's only one block, don't bother trying to compress: */
372         if (bio_sectors(src) <= c->opts.block_size)
373                 return 0;
374
375         dst_data = bio_map_or_bounce(c, dst, WRITE);
376         src_data = bio_map_or_bounce(c, src, READ);
377
378         workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
379
380         *src_len = src->bi_iter.bi_size;
381         *dst_len = dst->bi_iter.bi_size;
382
383         /*
384          * XXX: this algorithm sucks when the compression code doesn't tell us
385          * how much would fit, like LZ4 does:
386          */
387         while (1) {
388                 if (*src_len <= block_bytes(c)) {
389                         ret = -1;
390                         break;
391                 }
392
393                 ret = attempt_compress(c, workspace,
394                                        dst_data.b,      *dst_len,
395                                        src_data.b,      *src_len,
396                                        compression_type);
397                 if (ret > 0) {
398                         *dst_len = ret;
399                         ret = 0;
400                         break;
401                 }
402
403                 /* Didn't fit: should we retry with a smaller amount?  */
404                 if (*src_len <= *dst_len) {
405                         ret = -1;
406                         break;
407                 }
408
409                 /*
410                  * If ret is negative, it's a hint as to how much data would fit
411                  */
412                 BUG_ON(-ret >= *src_len);
413
414                 if (ret < 0)
415                         *src_len = -ret;
416                 else
417                         *src_len -= (*src_len - *dst_len) / 2;
418                 *src_len = round_down(*src_len, block_bytes(c));
419         }
420
421         mempool_free(workspace, &c->compress_workspace[compression_type]);
422
423         if (ret)
424                 goto err;
425
426         /* Didn't get smaller: */
427         if (round_up(*dst_len, block_bytes(c)) >= *src_len)
428                 goto err;
429
430         pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
431
432         memset(dst_data.b + *dst_len, 0, pad);
433         *dst_len += pad;
434
435         if (dst_data.type != BB_NONE &&
436             dst_data.type != BB_VMAP)
437                 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
438
439         BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
440         BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
441         BUG_ON(*dst_len & (block_bytes(c) - 1));
442         BUG_ON(*src_len & (block_bytes(c) - 1));
443 out:
444         bio_unmap_or_unbounce(c, src_data);
445         bio_unmap_or_unbounce(c, dst_data);
446         return compression_type;
447 err:
448         compression_type = BCH_COMPRESSION_TYPE_incompressible;
449         goto out;
450 }
451
452 unsigned bch2_bio_compress(struct bch_fs *c,
453                            struct bio *dst, size_t *dst_len,
454                            struct bio *src, size_t *src_len,
455                            unsigned compression_type)
456 {
457         unsigned orig_dst = dst->bi_iter.bi_size;
458         unsigned orig_src = src->bi_iter.bi_size;
459
460         /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
461         src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
462                                      c->sb.encoded_extent_max << 9);
463         /* Don't generate a bigger output than input: */
464         dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
465
466         if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
467                 compression_type = BCH_COMPRESSION_TYPE_lz4;
468
469         compression_type =
470                 __bio_compress(c, dst, dst_len, src, src_len, compression_type);
471
472         dst->bi_iter.bi_size = orig_dst;
473         src->bi_iter.bi_size = orig_src;
474         return compression_type;
475 }
476
477 static int __bch2_fs_compress_init(struct bch_fs *, u64);
478
479 #define BCH_FEATURE_none        0
480
481 static const unsigned bch2_compression_opt_to_feature[] = {
482 #define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
483         BCH_COMPRESSION_OPTS()
484 #undef x
485 };
486
487 #undef BCH_FEATURE_none
488
489 static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
490 {
491         int ret = 0;
492
493         if ((c->sb.features & f) == f)
494                 return 0;
495
496         mutex_lock(&c->sb_lock);
497
498         if ((c->sb.features & f) == f) {
499                 mutex_unlock(&c->sb_lock);
500                 return 0;
501         }
502
503         ret = __bch2_fs_compress_init(c, c->sb.features|f);
504         if (ret) {
505                 mutex_unlock(&c->sb_lock);
506                 return ret;
507         }
508
509         c->disk_sb.sb->features[0] |= cpu_to_le64(f);
510         bch2_write_super(c);
511         mutex_unlock(&c->sb_lock);
512
513         return 0;
514 }
515
516 int bch2_check_set_has_compressed_data(struct bch_fs *c,
517                                        unsigned compression_type)
518 {
519         BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
520
521         return compression_type
522                 ? __bch2_check_set_has_compressed_data(c,
523                                 1ULL << bch2_compression_opt_to_feature[compression_type])
524                 : 0;
525 }
526
527 void bch2_fs_compress_exit(struct bch_fs *c)
528 {
529         unsigned i;
530
531         mempool_exit(&c->decompress_workspace);
532         for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
533                 mempool_exit(&c->compress_workspace[i]);
534         mempool_exit(&c->compression_bounce[WRITE]);
535         mempool_exit(&c->compression_bounce[READ]);
536 }
537
538 static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
539 {
540         size_t max_extent = c->sb.encoded_extent_max << 9;
541         size_t decompress_workspace_size = 0;
542         bool decompress_workspace_needed;
543         ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
544         struct {
545                 unsigned        feature;
546                 unsigned        type;
547                 size_t          compress_workspace;
548                 size_t          decompress_workspace;
549         } compression_types[] = {
550                 { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
551                 { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
552                         zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
553                         zlib_inflate_workspacesize(), },
554                 { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
555                         ZSTD_CCtxWorkspaceBound(params.cParams),
556                         ZSTD_DCtxWorkspaceBound() },
557         }, *i;
558         int ret = 0;
559
560         pr_verbose_init(c->opts, "");
561
562         c->zstd_params = params;
563
564         for (i = compression_types;
565              i < compression_types + ARRAY_SIZE(compression_types);
566              i++)
567                 if (features & (1 << i->feature))
568                         goto have_compressed;
569
570         goto out;
571 have_compressed:
572
573         if (!mempool_initialized(&c->compression_bounce[READ])) {
574                 ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
575                                                   1, max_extent);
576                 if (ret)
577                         goto out;
578         }
579
580         if (!mempool_initialized(&c->compression_bounce[WRITE])) {
581                 ret = mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
582                                                   1, max_extent);
583                 if (ret)
584                         goto out;
585         }
586
587         for (i = compression_types;
588              i < compression_types + ARRAY_SIZE(compression_types);
589              i++) {
590                 decompress_workspace_size =
591                         max(decompress_workspace_size, i->decompress_workspace);
592
593                 if (!(features & (1 << i->feature)))
594                         continue;
595
596                 if (i->decompress_workspace)
597                         decompress_workspace_needed = true;
598
599                 if (mempool_initialized(&c->compress_workspace[i->type]))
600                         continue;
601
602                 ret = mempool_init_kvpmalloc_pool(
603                                 &c->compress_workspace[i->type],
604                                 1, i->compress_workspace);
605                 if (ret)
606                         goto out;
607         }
608
609         if (!mempool_initialized(&c->decompress_workspace)) {
610                 ret = mempool_init_kvpmalloc_pool(
611                                 &c->decompress_workspace,
612                                 1, decompress_workspace_size);
613                 if (ret)
614                         goto out;
615         }
616 out:
617         pr_verbose_init(c->opts, "ret %i", ret);
618         return ret;
619 }
620
621 int bch2_fs_compress_init(struct bch_fs *c)
622 {
623         u64 f = c->sb.features;
624
625         if (c->opts.compression)
626                 f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
627
628         if (c->opts.background_compression)
629                 f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
630
631         return __bch2_fs_compress_init(c, f);
632
633 }