]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcachefs/compress.c
Update bcachefs sources to 9e7ae5219c bcachefs: Make write points more dynamic
[bcachefs-tools-debian] / libbcachefs / compress.c
1 #include "bcachefs.h"
2 #include "compress.h"
3 #include "extents.h"
4 #include "io.h"
5 #include "super-io.h"
6
7 #include "lz4.h"
8 #include <linux/lz4.h>
9 #include <linux/zlib.h>
10
11 /* Bounce buffer: */
12 struct bbuf {
13         void            *b;
14         enum {
15                 BB_NONE,
16                 BB_VMAP,
17                 BB_KMALLOC,
18                 BB_VMALLOC,
19                 BB_MEMPOOL,
20         }               type;
21         int             rw;
22 };
23
24 static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
25 {
26         void *b;
27
28         BUG_ON(size > c->sb.encoded_extent_max << 9);
29
30         b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
31         if (b)
32                 return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
33
34         b = mempool_alloc(&c->compression_bounce[rw], GFP_NOWAIT);
35         b = b ? page_address(b) : NULL;
36         if (b)
37                 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
38
39         b = vmalloc(size);
40         if (b)
41                 return (struct bbuf) { .b = b, .type = BB_VMALLOC, .rw = rw };
42
43         b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
44         b = b ? page_address(b) : NULL;
45         if (b)
46                 return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
47
48         BUG();
49 }
50
51 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
52                                        struct bvec_iter start, int rw)
53 {
54         struct bbuf ret;
55         struct bio_vec bv;
56         struct bvec_iter iter;
57         unsigned nr_pages = 0;
58         struct page *stack_pages[16];
59         struct page **pages = NULL;
60         bool first = true;
61         unsigned prev_end = PAGE_SIZE;
62         void *data;
63
64         BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
65
66 #ifndef CONFIG_HIGHMEM
67         __bio_for_each_contig_segment(bv, bio, iter, start) {
68                 if (bv.bv_len == start.bi_size)
69                         return (struct bbuf) {
70                                 .b = page_address(bv.bv_page) + bv.bv_offset,
71                                 .type = BB_NONE, .rw = rw
72                         };
73         }
74 #endif
75         __bio_for_each_segment(bv, bio, iter, start) {
76                 if ((!first && bv.bv_offset) ||
77                     prev_end != PAGE_SIZE)
78                         goto bounce;
79
80                 prev_end = bv.bv_offset + bv.bv_len;
81                 nr_pages++;
82         }
83
84         BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
85
86         pages = nr_pages > ARRAY_SIZE(stack_pages)
87                 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
88                 : stack_pages;
89         if (!pages)
90                 goto bounce;
91
92         nr_pages = 0;
93         __bio_for_each_segment(bv, bio, iter, start)
94                 pages[nr_pages++] = bv.bv_page;
95
96         data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
97         if (pages != stack_pages)
98                 kfree(pages);
99
100         if (data)
101                 return (struct bbuf) {
102                         .b = data + bio_iter_offset(bio, start),
103                         .type = BB_VMAP, .rw = rw
104                 };
105 bounce:
106         ret = __bounce_alloc(c, start.bi_size, rw);
107
108         if (rw == READ)
109                 memcpy_from_bio(ret.b, bio, start);
110
111         return ret;
112 }
113
114 static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
115 {
116         return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
117 }
118
119 static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
120 {
121         switch (buf.type) {
122         case BB_NONE:
123                 break;
124         case BB_VMAP:
125                 vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
126                 break;
127         case BB_KMALLOC:
128                 kfree(buf.b);
129                 break;
130         case BB_VMALLOC:
131                 vfree(buf.b);
132                 break;
133         case BB_MEMPOOL:
134                 mempool_free(virt_to_page(buf.b),
135                              &c->compression_bounce[buf.rw]);
136                 break;
137         }
138 }
139
140 static inline void zlib_set_workspace(z_stream *strm, void *workspace)
141 {
142 #ifdef __KERNEL__
143         strm->workspace = workspace;
144 #endif
145 }
146
147 static int __bio_uncompress(struct bch_fs *c, struct bio *src,
148                             void *dst_data, struct bch_extent_crc128 crc)
149 {
150         struct bbuf src_data = { NULL };
151         size_t src_len = src->bi_iter.bi_size;
152         size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
153         int ret;
154
155         src_data = bio_map_or_bounce(c, src, READ);
156
157         switch (crc.compression_type) {
158         case BCH_COMPRESSION_LZ4_OLD:
159                 ret = bch2_lz4_decompress(src_data.b, &src_len,
160                                      dst_data, dst_len);
161                 if (ret) {
162                         ret = -EIO;
163                         goto err;
164                 }
165                 break;
166         case BCH_COMPRESSION_LZ4:
167                 ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
168                                                   src_len, dst_len, dst_len);
169                 if (ret != dst_len) {
170                         ret = -EIO;
171                         goto err;
172                 }
173                 break;
174         case BCH_COMPRESSION_GZIP: {
175                 void *workspace;
176                 z_stream strm;
177
178                 workspace = kmalloc(zlib_inflate_workspacesize(),
179                                     GFP_NOIO|__GFP_NOWARN);
180                 if (!workspace) {
181                         mutex_lock(&c->zlib_workspace_lock);
182                         workspace = c->zlib_workspace;
183                 }
184
185                 strm.next_in    = src_data.b;
186                 strm.avail_in   = src_len;
187                 strm.next_out   = dst_data;
188                 strm.avail_out  = dst_len;
189                 zlib_set_workspace(&strm, workspace);
190                 zlib_inflateInit2(&strm, -MAX_WBITS);
191
192                 ret = zlib_inflate(&strm, Z_FINISH);
193
194                 if (workspace == c->zlib_workspace)
195                         mutex_unlock(&c->zlib_workspace_lock);
196                 else
197                         kfree(workspace);
198
199                 if (ret != Z_STREAM_END) {
200                         ret = -EIO;
201                         goto err;
202                 }
203                 break;
204         }
205         default:
206                 BUG();
207         }
208         ret = 0;
209 err:
210         bio_unmap_or_unbounce(c, src_data);
211         return ret;
212 }
213
214 int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
215                                unsigned live_data_sectors,
216                                struct bch_extent_crc128 crc)
217 {
218         struct bbuf dst_data = { NULL };
219         size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
220         int ret = -ENOMEM;
221
222         BUG_ON(DIV_ROUND_UP(live_data_sectors, PAGE_SECTORS) > bio->bi_max_vecs);
223
224         if (crc_uncompressed_size(NULL, &crc) > c->sb.encoded_extent_max ||
225             crc_compressed_size(NULL, &crc)   > c->sb.encoded_extent_max)
226                 return -EIO;
227
228         dst_data = __bounce_alloc(c, dst_len, WRITE);
229
230         ret = __bio_uncompress(c, bio, dst_data.b, crc);
231         if (ret)
232                 goto err;
233
234         while (bio->bi_vcnt < DIV_ROUND_UP(live_data_sectors, PAGE_SECTORS)) {
235                 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
236
237                 bv->bv_page = alloc_page(GFP_NOIO);
238                 if (!bv->bv_page)
239                         goto use_mempool;
240
241                 bv->bv_len = PAGE_SIZE;
242                 bv->bv_offset = 0;
243                 bio->bi_vcnt++;
244         }
245
246         bio->bi_iter.bi_size = live_data_sectors << 9;
247 copy_data:
248         memcpy_to_bio(bio, bio->bi_iter, dst_data.b + (crc.offset << 9));
249 err:
250         bio_unmap_or_unbounce(c, dst_data);
251         return ret;
252 use_mempool:
253         /*
254          * We already allocated from mempool, we can't allocate from it again
255          * without freeing the pages we already allocated or else we could
256          * deadlock:
257          */
258
259         bch2_bio_free_pages_pool(c, bio);
260         bch2_bio_alloc_pages_pool(c, bio, live_data_sectors << 9);
261         goto copy_data;
262 }
263
264 int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
265                        struct bio *dst, struct bvec_iter dst_iter,
266                        struct bch_extent_crc128 crc)
267 {
268         struct bbuf dst_data = { NULL };
269         size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
270         int ret = -ENOMEM;
271
272         if (crc_uncompressed_size(NULL, &crc) > c->sb.encoded_extent_max ||
273             crc_compressed_size(NULL, &crc)   > c->sb.encoded_extent_max)
274                 return -EIO;
275
276         dst_data = dst_len == dst_iter.bi_size
277                 ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
278                 : __bounce_alloc(c, dst_len, WRITE);
279
280         ret = __bio_uncompress(c, src, dst_data.b, crc);
281         if (ret)
282                 goto err;
283
284         if (dst_data.type != BB_NONE)
285                 memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
286 err:
287         bio_unmap_or_unbounce(c, dst_data);
288         return ret;
289 }
290
291 static int __bio_compress(struct bch_fs *c,
292                           struct bio *dst, size_t *dst_len,
293                           struct bio *src, size_t *src_len,
294                           unsigned *compression_type)
295 {
296         struct bbuf src_data = { NULL }, dst_data = { NULL };
297         unsigned pad;
298         int ret = 0;
299
300         dst_data = bio_map_or_bounce(c, dst, WRITE);
301         src_data = bio_map_or_bounce(c, src, READ);
302
303         switch (*compression_type) {
304         case BCH_COMPRESSION_LZ4_OLD:
305                 *compression_type = BCH_COMPRESSION_LZ4;
306
307         case BCH_COMPRESSION_LZ4: {
308                 void *workspace;
309                 int len = src->bi_iter.bi_size;
310
311                 workspace = mempool_alloc(&c->lz4_workspace_pool, GFP_NOIO);
312
313                 while (1) {
314                         if (len <= block_bytes(c)) {
315                                 ret = 0;
316                                 break;
317                         }
318
319                         ret = LZ4_compress_destSize(
320                                         src_data.b,     dst_data.b,
321                                         &len,           dst->bi_iter.bi_size,
322                                         workspace);
323                         if (ret >= len) {
324                                 /* uncompressible: */
325                                 ret = 0;
326                                 break;
327                         }
328
329                         if (!(len & (block_bytes(c) - 1)))
330                                 break;
331                         len = round_down(len, block_bytes(c));
332                 }
333                 mempool_free(workspace, &c->lz4_workspace_pool);
334
335                 if (!ret)
336                         goto err;
337
338                 *src_len = len;
339                 *dst_len = ret;
340                 ret = 0;
341                 break;
342         }
343         case BCH_COMPRESSION_GZIP: {
344                 void *workspace;
345                 z_stream strm;
346
347                 workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
348                                                                DEF_MEM_LEVEL),
349                                     GFP_NOIO|__GFP_NOWARN);
350                 if (!workspace) {
351                         mutex_lock(&c->zlib_workspace_lock);
352                         workspace = c->zlib_workspace;
353                 }
354
355                 strm.next_in    = src_data.b;
356                 strm.avail_in   = min(src->bi_iter.bi_size,
357                                       dst->bi_iter.bi_size);
358                 strm.next_out   = dst_data.b;
359                 strm.avail_out  = dst->bi_iter.bi_size;
360                 zlib_set_workspace(&strm, workspace);
361                 zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
362                                   Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
363                                   Z_DEFAULT_STRATEGY);
364
365                 ret = zlib_deflate(&strm, Z_FINISH);
366                 if (ret != Z_STREAM_END) {
367                         ret = -EIO;
368                         goto zlib_err;
369                 }
370
371                 ret = zlib_deflateEnd(&strm);
372                 if (ret != Z_OK) {
373                         ret = -EIO;
374                         goto zlib_err;
375                 }
376
377                 ret = 0;
378 zlib_err:
379                 if (workspace == c->zlib_workspace)
380                         mutex_unlock(&c->zlib_workspace_lock);
381                 else
382                         kfree(workspace);
383
384                 if (ret)
385                         goto err;
386
387                 *dst_len = strm.total_out;
388                 *src_len = strm.total_in;
389                 break;
390         }
391         default:
392                 BUG();
393         }
394
395         /* Didn't get smaller: */
396         if (round_up(*dst_len, block_bytes(c)) >= *src_len)
397                 goto err;
398
399         pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
400
401         memset(dst_data.b + *dst_len, 0, pad);
402         *dst_len += pad;
403
404         if (dst_data.type != BB_NONE)
405                 memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
406 out:
407         bio_unmap_or_unbounce(c, src_data);
408         bio_unmap_or_unbounce(c, dst_data);
409         return ret;
410 err:
411         ret = -1;
412         goto out;
413 }
414
415 void bch2_bio_compress(struct bch_fs *c,
416                        struct bio *dst, size_t *dst_len,
417                        struct bio *src, size_t *src_len,
418                        unsigned *compression_type)
419 {
420         unsigned orig_dst = dst->bi_iter.bi_size;
421         unsigned orig_src = src->bi_iter.bi_size;
422
423         /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
424         src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
425                                      c->sb.encoded_extent_max << 9);
426
427         /* Don't generate a bigger output than input: */
428         dst->bi_iter.bi_size =
429                 min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
430
431         /* If it's only one block, don't bother trying to compress: */
432         if (*compression_type != BCH_COMPRESSION_NONE &&
433             bio_sectors(src) > c->opts.block_size &&
434             !__bio_compress(c, dst, dst_len, src, src_len, compression_type))
435                 goto out;
436
437         /* If compressing failed (didn't get smaller), just copy: */
438         *compression_type = BCH_COMPRESSION_NONE;
439         *dst_len = *src_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
440         bio_copy_data(dst, src);
441 out:
442         dst->bi_iter.bi_size = orig_dst;
443         src->bi_iter.bi_size = orig_src;
444
445         BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
446         BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
447         BUG_ON(*dst_len & (block_bytes(c) - 1));
448         BUG_ON(*src_len & (block_bytes(c) - 1));
449 }
450
451 /* doesn't write superblock: */
452 int bch2_check_set_has_compressed_data(struct bch_fs *c,
453                                       unsigned compression_type)
454 {
455         switch (compression_type) {
456         case BCH_COMPRESSION_OPT_NONE:
457                 return 0;
458         case BCH_COMPRESSION_OPT_LZ4:
459                 if (bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4))
460                         return 0;
461
462                 bch2_sb_set_feature(c->disk_sb, BCH_FEATURE_LZ4);
463                 break;
464         case BCH_COMPRESSION_OPT_GZIP:
465                 if (bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
466                         return 0;
467
468                 bch2_sb_set_feature(c->disk_sb, BCH_FEATURE_GZIP);
469                 break;
470         default:
471                 BUG();
472         }
473
474         return bch2_fs_compress_init(c);
475 }
476
477 void bch2_fs_compress_exit(struct bch_fs *c)
478 {
479         vfree(c->zlib_workspace);
480         mempool_exit(&c->lz4_workspace_pool);
481         mempool_exit(&c->compression_bounce[WRITE]);
482         mempool_exit(&c->compression_bounce[READ]);
483 }
484
485 #define COMPRESSION_WORKSPACE_SIZE                                      \
486         max_t(size_t, zlib_inflate_workspacesize(),                     \
487               zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL))
488
489 int bch2_fs_compress_init(struct bch_fs *c)
490 {
491         unsigned order = get_order(c->sb.encoded_extent_max << 9);
492         int ret;
493
494         if (!bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
495             !bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
496                 return 0;
497
498         if (!mempool_initialized(&c->compression_bounce[READ])) {
499                 ret = mempool_init_page_pool(&c->compression_bounce[READ],
500                                              1, order);
501                 if (ret)
502                         return ret;
503         }
504
505         if (!mempool_initialized(&c->compression_bounce[WRITE])) {
506                 ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
507                                              1, order);
508                 if (ret)
509                         return ret;
510         }
511
512         if (!mempool_initialized(&c->lz4_workspace_pool) &&
513             bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4)) {
514                 ret = mempool_init_kmalloc_pool(&c->lz4_workspace_pool,
515                                                 1, LZ4_MEM_COMPRESS);
516                 if (ret)
517                         return ret;
518         }
519
520         if (!c->zlib_workspace &&
521             bch2_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP)) {
522                 c->zlib_workspace = vmalloc(COMPRESSION_WORKSPACE_SIZE);
523                 if (!c->zlib_workspace)
524                         return -ENOMEM;
525         }
526
527         return 0;
528 }