]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/compress.c
Update bcachefs sources to ed6b7f81a7 six locks: Disable percpu read lock mode in...
[bcachefs-tools-debian] / libbcachefs / compress.c
1 // SPDX-License-Identifier: GPL-2.0
2 #include "bcachefs.h"
3 #include "checksum.h"
4 #include "compress.h"
5 #include "extents.h"
6 #include "io.h"
7 #include "super-io.h"
8
9 #include <linux/lz4.h>
10 #include <linux/zlib.h>
11 #include <linux/zstd.h>
12
13 /* Bounce buffer: */
14 struct bbuf {
15         void            *b;
16         enum {
17                 BB_NONE,
18                 BB_VMAP,
19                 BB_KMALLOC,
20                 BB_MEMPOOL,
21         }               type;
22         int             rw;
23 };
24
25 static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
26 {
27         void *b;
28
29         BUG_ON(size > c->opts.encoded_extent_max);
30
31         b = kmalloc(size, GFP_NOFS|__GFP_NOWARN);
32         if (b)
33                 return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
34
35         b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS);
36         if (b)
37                 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
38
39         BUG();
40 }
41
42 static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
43 {
44         struct bio_vec bv;
45         struct bvec_iter iter;
46         void *expected_start = NULL;
47
48         __bio_for_each_bvec(bv, bio, iter, start) {
49                 if (expected_start &&
50                     expected_start != page_address(bv.bv_page) + bv.bv_offset)
51                         return false;
52
53                 expected_start = page_address(bv.bv_page) +
54                         bv.bv_offset + bv.bv_len;
55         }
56
57         return true;
58 }
59
60 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
61                                        struct bvec_iter start, int rw)
62 {
63         struct bbuf ret;
64         struct bio_vec bv;
65         struct bvec_iter iter;
66         unsigned nr_pages = 0;
67         struct page *stack_pages[16];
68         struct page **pages = NULL;
69         void *data;
70
71         BUG_ON(start.bi_size > c->opts.encoded_extent_max);
72
73         if (!PageHighMem(bio_iter_page(bio, start)) &&
74             bio_phys_contig(bio, start))
75                 return (struct bbuf) {
76                         .b = page_address(bio_iter_page(bio, start)) +
77                                 bio_iter_offset(bio, start),
78                         .type = BB_NONE, .rw = rw
79                 };
80
81         /* check if we can map the pages contiguously: */
82         __bio_for_each_segment(bv, bio, iter, start) {
83                 if (iter.bi_size != start.bi_size &&
84                     bv.bv_offset)
85                         goto bounce;
86
87                 if (bv.bv_len < iter.bi_size &&
88                     bv.bv_offset + bv.bv_len < PAGE_SIZE)
89                         goto bounce;
90
91                 nr_pages++;
92         }
93
94         BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
95
96         pages = nr_pages > ARRAY_SIZE(stack_pages)
97                 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
98                 : stack_pages;
99         if (!pages)
100                 goto bounce;
101
102         nr_pages = 0;
103         __bio_for_each_segment(bv, bio, iter, start)
104                 pages[nr_pages++] = bv.bv_page;
105
106         data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
107         if (pages != stack_pages)
108                 kfree(pages);
109
110         if (data)
111                 return (struct bbuf) {
112                         .b = data + bio_iter_offset(bio, start),
113                         .type = BB_VMAP, .rw = rw
114                 };
115 bounce:
116         ret = __bounce_alloc(c, start.bi_size, rw);
117
118         if (rw == READ)
119                 memcpy_from_bio(ret.b, bio, start);
120
121         return ret;
122 }
123
124 static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
125 {
126         return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
127 }
128
129 static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
130 {
131         switch (buf.type) {
132         case BB_NONE:
133                 break;
134         case BB_VMAP:
135                 vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
136                 break;
137         case BB_KMALLOC:
138                 kfree(buf.b);
139                 break;
140         case BB_MEMPOOL:
141                 mempool_free(buf.b, &c->compression_bounce[buf.rw]);
142                 break;
143         }
144 }
145
146 static inline void zlib_set_workspace(z_stream *strm, void *workspace)
147 {
148 #ifdef __KERNEL__
149         strm->workspace = workspace;
150 #endif
151 }
152
153 static int __bio_uncompress(struct bch_fs *c, struct bio *src,
154                             void *dst_data, struct bch_extent_crc_unpacked crc)
155 {
156         struct bbuf src_data = { NULL };
157         size_t src_len = src->bi_iter.bi_size;
158         size_t dst_len = crc.uncompressed_size << 9;
159         void *workspace;
160         int ret;
161
162         src_data = bio_map_or_bounce(c, src, READ);
163
164         switch (crc.compression_type) {
165         case BCH_COMPRESSION_TYPE_lz4_old:
166         case BCH_COMPRESSION_TYPE_lz4:
167                 ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
168                                                   src_len, dst_len, dst_len);
169                 if (ret != dst_len)
170                         goto err;
171                 break;
172         case BCH_COMPRESSION_TYPE_gzip: {
173                 z_stream strm = {
174                         .next_in        = src_data.b,
175                         .avail_in       = src_len,
176                         .next_out       = dst_data,
177                         .avail_out      = dst_len,
178                 };
179
180                 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
181
182                 zlib_set_workspace(&strm, workspace);
183                 zlib_inflateInit2(&strm, -MAX_WBITS);
184                 ret = zlib_inflate(&strm, Z_FINISH);
185
186                 mempool_free(workspace, &c->decompress_workspace);
187
188                 if (ret != Z_STREAM_END)
189                         goto err;
190                 break;
191         }
192         case BCH_COMPRESSION_TYPE_zstd: {
193                 ZSTD_DCtx *ctx;
194                 size_t real_src_len = le32_to_cpup(src_data.b);
195
196                 if (real_src_len > src_len - 4)
197                         goto err;
198
199                 workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
200                 ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
201
202                 ret = zstd_decompress_dctx(ctx,
203                                 dst_data,       dst_len,
204                                 src_data.b + 4, real_src_len);
205
206                 mempool_free(workspace, &c->decompress_workspace);
207
208                 if (ret != dst_len)
209                         goto err;
210                 break;
211         }
212         default:
213                 BUG();
214         }
215         ret = 0;
216 out:
217         bio_unmap_or_unbounce(c, src_data);
218         return ret;
219 err:
220         ret = -EIO;
221         goto out;
222 }
223
224 int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
225                                 struct bch_extent_crc_unpacked *crc)
226 {
227         struct bbuf data = { NULL };
228         size_t dst_len = crc->uncompressed_size << 9;
229
230         /* bio must own its pages: */
231         BUG_ON(!bio->bi_vcnt);
232         BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
233
234         if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
235             crc->compressed_size << 9   > c->opts.encoded_extent_max) {
236                 bch_err(c, "error rewriting existing data: extent too big");
237                 return -EIO;
238         }
239
240         data = __bounce_alloc(c, dst_len, WRITE);
241
242         if (__bio_uncompress(c, bio, data.b, *crc)) {
243                 bch_err(c, "error rewriting existing data: decompression error");
244                 bio_unmap_or_unbounce(c, data);
245                 return -EIO;
246         }
247
248         /*
249          * XXX: don't have a good way to assert that the bio was allocated with
250          * enough space, we depend on bch2_move_extent doing the right thing
251          */
252         bio->bi_iter.bi_size = crc->live_size << 9;
253
254         memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
255
256         crc->csum_type          = 0;
257         crc->compression_type   = 0;
258         crc->compressed_size    = crc->live_size;
259         crc->uncompressed_size  = crc->live_size;
260         crc->offset             = 0;
261         crc->csum               = (struct bch_csum) { 0, 0 };
262
263         bio_unmap_or_unbounce(c, data);
264         return 0;
265 }
266
267 int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
268                        struct bio *dst, struct bvec_iter dst_iter,
269                        struct bch_extent_crc_unpacked crc)
270 {
271         struct bbuf dst_data = { NULL };
272         size_t dst_len = crc.uncompressed_size << 9;
273         int ret;
274
275         if (crc.uncompressed_size << 9  > c->opts.encoded_extent_max ||
276             crc.compressed_size << 9    > c->opts.encoded_extent_max)
277                 return -EIO;
278
279         dst_data = dst_len == dst_iter.bi_size
280                 ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
281                 : __bounce_alloc(c, dst_len, WRITE);
282
283         ret = __bio_uncompress(c, src, dst_data.b, crc);
284         if (ret)
285                 goto err;
286
287         if (dst_data.type != BB_NONE &&
288             dst_data.type != BB_VMAP)
289                 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
290 err:
291         bio_unmap_or_unbounce(c, dst_data);
292         return ret;
293 }
294
295 static int attempt_compress(struct bch_fs *c,
296                             void *workspace,
297                             void *dst, size_t dst_len,
298                             void *src, size_t src_len,
299                             enum bch_compression_type compression_type)
300 {
301         switch (compression_type) {
302         case BCH_COMPRESSION_TYPE_lz4: {
303                 int len = src_len;
304                 int ret = LZ4_compress_destSize(
305                                 src,            dst,
306                                 &len,           dst_len,
307                                 workspace);
308
309                 if (len < src_len)
310                         return -len;
311
312                 return ret;
313         }
314         case BCH_COMPRESSION_TYPE_gzip: {
315                 z_stream strm = {
316                         .next_in        = src,
317                         .avail_in       = src_len,
318                         .next_out       = dst,
319                         .avail_out      = dst_len,
320                 };
321
322                 zlib_set_workspace(&strm, workspace);
323                 zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
324                                   Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
325                                   Z_DEFAULT_STRATEGY);
326
327                 if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
328                         return 0;
329
330                 if (zlib_deflateEnd(&strm) != Z_OK)
331                         return 0;
332
333                 return strm.total_out;
334         }
335         case BCH_COMPRESSION_TYPE_zstd: {
336                 ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
337                         zstd_cctx_workspace_bound(&c->zstd_params.cParams));
338
339                 /*
340                  * ZSTD requires that when we decompress we pass in the exact
341                  * compressed size - rounding it up to the nearest sector
342                  * doesn't work, so we use the first 4 bytes of the buffer for
343                  * that.
344                  *
345                  * Additionally, the ZSTD code seems to have a bug where it will
346                  * write just past the end of the buffer - so subtract a fudge
347                  * factor (7 bytes) from the dst buffer size to account for
348                  * that.
349                  */
350                 size_t len = zstd_compress_cctx(ctx,
351                                 dst + 4,        dst_len - 4 - 7,
352                                 src,            src_len,
353                                 &c->zstd_params);
354                 if (zstd_is_error(len))
355                         return 0;
356
357                 *((__le32 *) dst) = cpu_to_le32(len);
358                 return len + 4;
359         }
360         default:
361                 BUG();
362         }
363 }
364
365 static unsigned __bio_compress(struct bch_fs *c,
366                                struct bio *dst, size_t *dst_len,
367                                struct bio *src, size_t *src_len,
368                                enum bch_compression_type compression_type)
369 {
370         struct bbuf src_data = { NULL }, dst_data = { NULL };
371         void *workspace;
372         unsigned pad;
373         int ret = 0;
374
375         BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
376         BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
377
378         /* If it's only one block, don't bother trying to compress: */
379         if (src->bi_iter.bi_size <= c->opts.block_size)
380                 return BCH_COMPRESSION_TYPE_incompressible;
381
382         dst_data = bio_map_or_bounce(c, dst, WRITE);
383         src_data = bio_map_or_bounce(c, src, READ);
384
385         workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS);
386
387         *src_len = src->bi_iter.bi_size;
388         *dst_len = dst->bi_iter.bi_size;
389
390         /*
391          * XXX: this algorithm sucks when the compression code doesn't tell us
392          * how much would fit, like LZ4 does:
393          */
394         while (1) {
395                 if (*src_len <= block_bytes(c)) {
396                         ret = -1;
397                         break;
398                 }
399
400                 ret = attempt_compress(c, workspace,
401                                        dst_data.b,      *dst_len,
402                                        src_data.b,      *src_len,
403                                        compression_type);
404                 if (ret > 0) {
405                         *dst_len = ret;
406                         ret = 0;
407                         break;
408                 }
409
410                 /* Didn't fit: should we retry with a smaller amount?  */
411                 if (*src_len <= *dst_len) {
412                         ret = -1;
413                         break;
414                 }
415
416                 /*
417                  * If ret is negative, it's a hint as to how much data would fit
418                  */
419                 BUG_ON(-ret >= *src_len);
420
421                 if (ret < 0)
422                         *src_len = -ret;
423                 else
424                         *src_len -= (*src_len - *dst_len) / 2;
425                 *src_len = round_down(*src_len, block_bytes(c));
426         }
427
428         mempool_free(workspace, &c->compress_workspace[compression_type]);
429
430         if (ret)
431                 goto err;
432
433         /* Didn't get smaller: */
434         if (round_up(*dst_len, block_bytes(c)) >= *src_len)
435                 goto err;
436
437         pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
438
439         memset(dst_data.b + *dst_len, 0, pad);
440         *dst_len += pad;
441
442         if (dst_data.type != BB_NONE &&
443             dst_data.type != BB_VMAP)
444                 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
445
446         BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
447         BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
448         BUG_ON(*dst_len & (block_bytes(c) - 1));
449         BUG_ON(*src_len & (block_bytes(c) - 1));
450 out:
451         bio_unmap_or_unbounce(c, src_data);
452         bio_unmap_or_unbounce(c, dst_data);
453         return compression_type;
454 err:
455         compression_type = BCH_COMPRESSION_TYPE_incompressible;
456         goto out;
457 }
458
459 unsigned bch2_bio_compress(struct bch_fs *c,
460                            struct bio *dst, size_t *dst_len,
461                            struct bio *src, size_t *src_len,
462                            unsigned compression_type)
463 {
464         unsigned orig_dst = dst->bi_iter.bi_size;
465         unsigned orig_src = src->bi_iter.bi_size;
466
467         /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
468         src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
469                                      c->opts.encoded_extent_max);
470         /* Don't generate a bigger output than input: */
471         dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
472
473         if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
474                 compression_type = BCH_COMPRESSION_TYPE_lz4;
475
476         compression_type =
477                 __bio_compress(c, dst, dst_len, src, src_len, compression_type);
478
479         dst->bi_iter.bi_size = orig_dst;
480         src->bi_iter.bi_size = orig_src;
481         return compression_type;
482 }
483
484 static int __bch2_fs_compress_init(struct bch_fs *, u64);
485
486 #define BCH_FEATURE_none        0
487
488 static const unsigned bch2_compression_opt_to_feature[] = {
489 #define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
490         BCH_COMPRESSION_OPTS()
491 #undef x
492 };
493
494 #undef BCH_FEATURE_none
495
496 static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
497 {
498         int ret = 0;
499
500         if ((c->sb.features & f) == f)
501                 return 0;
502
503         mutex_lock(&c->sb_lock);
504
505         if ((c->sb.features & f) == f) {
506                 mutex_unlock(&c->sb_lock);
507                 return 0;
508         }
509
510         ret = __bch2_fs_compress_init(c, c->sb.features|f);
511         if (ret) {
512                 mutex_unlock(&c->sb_lock);
513                 return ret;
514         }
515
516         c->disk_sb.sb->features[0] |= cpu_to_le64(f);
517         bch2_write_super(c);
518         mutex_unlock(&c->sb_lock);
519
520         return 0;
521 }
522
523 int bch2_check_set_has_compressed_data(struct bch_fs *c,
524                                        unsigned compression_type)
525 {
526         BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
527
528         return compression_type
529                 ? __bch2_check_set_has_compressed_data(c,
530                                 1ULL << bch2_compression_opt_to_feature[compression_type])
531                 : 0;
532 }
533
534 void bch2_fs_compress_exit(struct bch_fs *c)
535 {
536         unsigned i;
537
538         mempool_exit(&c->decompress_workspace);
539         for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
540                 mempool_exit(&c->compress_workspace[i]);
541         mempool_exit(&c->compression_bounce[WRITE]);
542         mempool_exit(&c->compression_bounce[READ]);
543 }
544
545 static int _bch2_fs_compress_init(struct bch_fs *c, u64 features)
546 {
547         size_t decompress_workspace_size = 0;
548         bool decompress_workspace_needed;
549         ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max);
550         struct {
551                 unsigned        feature;
552                 unsigned        type;
553                 size_t          compress_workspace;
554                 size_t          decompress_workspace;
555         } compression_types[] = {
556                 { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
557                 { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
558                         zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
559                         zlib_inflate_workspacesize(), },
560                 { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
561                         zstd_cctx_workspace_bound(&params.cParams),
562                         zstd_dctx_workspace_bound() },
563         }, *i;
564         bool have_compressed = false;
565
566         c->zstd_params = params;
567
568         for (i = compression_types;
569              i < compression_types + ARRAY_SIZE(compression_types);
570              i++)
571                 have_compressed |= (features & (1 << i->feature)) != 0;
572
573         if (!have_compressed)
574                 return 0;
575
576         if (!mempool_initialized(&c->compression_bounce[READ]) &&
577             mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
578                                         1, c->opts.encoded_extent_max))
579                 return -BCH_ERR_ENOMEM_compression_bounce_read_init;
580
581         if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
582             mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
583                                         1, c->opts.encoded_extent_max))
584                 return -BCH_ERR_ENOMEM_compression_bounce_write_init;
585
586         for (i = compression_types;
587              i < compression_types + ARRAY_SIZE(compression_types);
588              i++) {
589                 decompress_workspace_size =
590                         max(decompress_workspace_size, i->decompress_workspace);
591
592                 if (!(features & (1 << i->feature)))
593                         continue;
594
595                 if (i->decompress_workspace)
596                         decompress_workspace_needed = true;
597
598                 if (mempool_initialized(&c->compress_workspace[i->type]))
599                         continue;
600
601                 if (mempool_init_kvpmalloc_pool(
602                                 &c->compress_workspace[i->type],
603                                 1, i->compress_workspace))
604                         return -BCH_ERR_ENOMEM_compression_workspace_init;
605         }
606
607         if (!mempool_initialized(&c->decompress_workspace) &&
608             mempool_init_kvpmalloc_pool(&c->decompress_workspace,
609                                         1, decompress_workspace_size))
610                 return -BCH_ERR_ENOMEM_decompression_workspace_init;
611
612         return 0;
613 }
614
615 static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
616 {
617         int ret;
618
619         pr_verbose_init(c->opts, "");
620         ret = _bch2_fs_compress_init(c, features);
621         pr_verbose_init(c->opts, "ret %i", ret);
622
623         return ret;
624 }
625
626 int bch2_fs_compress_init(struct bch_fs *c)
627 {
628         u64 f = c->sb.features;
629
630         if (c->opts.compression)
631                 f |= 1ULL << bch2_compression_opt_to_feature[c->opts.compression];
632
633         if (c->opts.background_compression)
634                 f |= 1ULL << bch2_compression_opt_to_feature[c->opts.background_compression];
635
636         return __bch2_fs_compress_init(c, f);
637
638 }