]> git.sesse.net Git - bcachefs-tools-debian/blob - libbcache/compress.c
e76850be4e29d8b3cb8c37530fb14de412b38d4f
[bcachefs-tools-debian] / libbcache / compress.c
1 #include "bcache.h"
2 #include "compress.h"
3 #include "extents.h"
4 #include "io.h"
5 #include "super-io.h"
6
7 #include <linux/lz4.h>
8 #include <linux/zlib.h>
9
10 enum bounced {
11         BOUNCED_MAPPED,
12         BOUNCED_KMALLOCED,
13         BOUNCED_VMALLOCED,
14         BOUNCED_MEMPOOLED,
15 };
16
17 static void *__bounce_alloc(struct cache_set *c, unsigned size,
18                             unsigned *bounced, int direction)
19 {
20         void *data;
21
22         *bounced = BOUNCED_KMALLOCED;
23         data = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
24         if (data)
25                 return data;
26
27         *bounced = BOUNCED_MEMPOOLED;
28         data = mempool_alloc(&c->compression_bounce[direction], GFP_NOWAIT);
29         if (data)
30                 return page_address(data);
31
32         *bounced = BOUNCED_VMALLOCED;
33         data = vmalloc(size);
34         if (data)
35                 return data;
36
37         *bounced = BOUNCED_MEMPOOLED;
38         data = mempool_alloc(&c->compression_bounce[direction], GFP_NOIO);
39         return page_address(data);
40 }
41
42 static void *__bio_map_or_bounce(struct cache_set *c,
43                                  struct bio *bio, struct bvec_iter start,
44                                  unsigned *bounced, int direction)
45 {
46         struct bio_vec bv;
47         struct bvec_iter iter;
48         unsigned nr_pages = 0;
49         struct page *stack_pages[16];
50         struct page **pages = NULL;
51         bool first = true;
52         unsigned prev_end = PAGE_SIZE;
53         void *data;
54
55         BUG_ON(bvec_iter_sectors(start) > BCH_ENCODED_EXTENT_MAX);
56
57         *bounced = BOUNCED_MAPPED;
58
59         __bio_for_each_segment(bv, bio, iter, start) {
60                 if ((!first && bv.bv_offset) ||
61                     prev_end != PAGE_SIZE)
62                         goto bounce;
63
64                 prev_end = bv.bv_offset + bv.bv_len;
65                 nr_pages++;
66         }
67
68         BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
69
70         pages = nr_pages > ARRAY_SIZE(stack_pages)
71                 ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
72                 : stack_pages;
73         if (!pages)
74                 goto bounce;
75
76         nr_pages = 0;
77         __bio_for_each_segment(bv, bio, iter, start)
78                 pages[nr_pages++] = bv.bv_page;
79
80         data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
81         if (pages != stack_pages)
82                 kfree(pages);
83
84         return data + bio_iter_offset(bio, start);
85 bounce:
86         data = __bounce_alloc(c, start.bi_size, bounced, direction);
87
88         if (direction == READ)
89                 memcpy_from_bio(data, bio, start);
90
91         return data;
92 }
93
94 static void *bio_map_or_bounce(struct cache_set *c, struct bio *bio,
95                                unsigned *bounced, int direction)
96 {
97         return __bio_map_or_bounce(c, bio, bio->bi_iter, bounced, direction);
98 }
99
100 static void bio_unmap_or_unbounce(struct cache_set *c, void *data,
101                                   unsigned bounced, int direction)
102 {
103         if (!data)
104                 return;
105
106         switch (bounced) {
107         case BOUNCED_MAPPED:
108                 vunmap((void *) ((unsigned long) data & PAGE_MASK));
109                 return;
110         case BOUNCED_KMALLOCED:
111                 kfree(data);
112                 return;
113         case BOUNCED_VMALLOCED:
114                 vfree(data);
115                 return;
116         case BOUNCED_MEMPOOLED:
117                 mempool_free(virt_to_page(data), &c->compression_bounce[direction]);
118                 return;
119         }
120 }
121
122 static int __bio_uncompress(struct cache_set *c, struct bio *src,
123                             void *dst_data, struct bch_extent_crc128 crc)
124 {
125         void *src_data = NULL;
126         unsigned src_bounced;
127         size_t src_len = src->bi_iter.bi_size;
128         size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
129         int ret;
130
131         src_data = bio_map_or_bounce(c, src, &src_bounced, READ);
132
133         switch (crc.compression_type) {
134         case BCH_COMPRESSION_LZ4:
135                 ret = lz4_decompress(src_data, &src_len,
136                                      dst_data, dst_len);
137                 if (ret) {
138                         ret = -EIO;
139                         goto err;
140                 }
141                 break;
142         case BCH_COMPRESSION_GZIP: {
143                 void *workspace;
144                 z_stream strm;
145
146                 workspace = kmalloc(zlib_inflate_workspacesize(),
147                                     GFP_NOIO|__GFP_NOWARN);
148                 if (!workspace) {
149                         mutex_lock(&c->zlib_workspace_lock);
150                         workspace = c->zlib_workspace;
151                 }
152
153                 strm.workspace  = workspace;
154                 strm.next_in    = src_data;
155                 strm.avail_in   = src_len;
156                 strm.next_out   = dst_data;
157                 strm.avail_out  = dst_len;
158                 zlib_inflateInit2(&strm, -MAX_WBITS);
159
160                 ret = zlib_inflate(&strm, Z_FINISH);
161
162                 if (workspace == c->zlib_workspace)
163                         mutex_unlock(&c->zlib_workspace_lock);
164                 else
165                         kfree(workspace);
166
167                 if (ret != Z_STREAM_END) {
168                         ret = -EIO;
169                         goto err;
170                 }
171                 break;
172         }
173         default:
174                 BUG();
175         }
176         ret = 0;
177 err:
178         bio_unmap_or_unbounce(c, src_data, src_bounced, READ);
179         return ret;
180 }
181
182 int bch_bio_uncompress_inplace(struct cache_set *c, struct bio *bio,
183                                unsigned live_data_sectors,
184                                struct bch_extent_crc128 crc)
185 {
186         void *dst_data = NULL;
187         size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
188         int ret = -ENOMEM;
189
190         BUG_ON(DIV_ROUND_UP(live_data_sectors, PAGE_SECTORS) > bio->bi_max_vecs);
191
192         /* XXX mempoolify */
193         dst_data = kmalloc(dst_len, GFP_NOIO|__GFP_NOWARN);
194         if (!dst_data) {
195                 dst_data = vmalloc(dst_len);
196                 if (!dst_data)
197                         goto err;
198         }
199
200         ret = __bio_uncompress(c, bio, dst_data, crc);
201         if (ret)
202                 goto err;
203
204         while (bio->bi_vcnt < DIV_ROUND_UP(live_data_sectors, PAGE_SECTORS)) {
205                 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
206
207                 bv->bv_page = alloc_page(GFP_NOIO);
208                 if (!bv->bv_page)
209                         goto use_mempool;
210
211                 bv->bv_len = PAGE_SIZE;
212                 bv->bv_offset = 0;
213                 bio->bi_vcnt++;
214         }
215
216         bio->bi_iter.bi_size = live_data_sectors << 9;
217 copy_data:
218         memcpy_to_bio(bio, bio->bi_iter, dst_data + (crc.offset << 9));
219 err:
220         kvfree(dst_data);
221         return ret;
222 use_mempool:
223         /*
224          * We already allocated from mempool, we can't allocate from it again
225          * without freeing the pages we already allocated or else we could
226          * deadlock:
227          */
228
229         bch_bio_free_pages_pool(c, bio);
230         bch_bio_alloc_pages_pool(c, bio, live_data_sectors << 9);
231         goto copy_data;
232 }
233
234 int bch_bio_uncompress(struct cache_set *c, struct bio *src,
235                        struct bio *dst, struct bvec_iter dst_iter,
236                        struct bch_extent_crc128 crc)
237 {
238         void *dst_data = NULL;
239         unsigned dst_bounced;
240         size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
241         int ret = -ENOMEM;
242
243         dst_data = dst_len == dst_iter.bi_size
244                 ? __bio_map_or_bounce(c, dst, dst_iter, &dst_bounced, WRITE)
245                 : __bounce_alloc(c, dst_len, &dst_bounced, WRITE);
246
247         ret = __bio_uncompress(c, src, dst_data, crc);
248         if (ret)
249                 goto err;
250
251         if (dst_bounced)
252                 memcpy_to_bio(dst, dst_iter, dst_data + (crc.offset << 9));
253 err:
254         bio_unmap_or_unbounce(c, dst_data, dst_bounced, WRITE);
255         return ret;
256 }
257
258 static int __bio_compress(struct cache_set *c,
259                           struct bio *dst, size_t *dst_len,
260                           struct bio *src, size_t *src_len,
261                           unsigned compression_type)
262 {
263         void *src_data = NULL, *dst_data = NULL;
264         unsigned src_bounced, dst_bounced, pad;
265         int ret = -1;
266
267         dst_data = bio_map_or_bounce(c, dst, &dst_bounced, WRITE);
268         src_data = bio_map_or_bounce(c, src, &src_bounced, READ);
269
270         switch (compression_type) {
271         case BCH_COMPRESSION_LZ4: {
272                 void *workspace;
273
274                 *dst_len = dst->bi_iter.bi_size;
275                 *src_len = src->bi_iter.bi_size;
276
277                 workspace = mempool_alloc(&c->lz4_workspace_pool, GFP_NOIO);
278
279                 while (*src_len > block_bytes(c) &&
280                        (ret = lz4_compress(src_data, *src_len,
281                                            dst_data, dst_len,
282                                            workspace))) {
283                         /*
284                          * On error, the compressed data was bigger than
285                          * dst_len, and -ret is the amount of data we were able
286                          * to compress - round down to nearest block and try
287                          * again:
288                          */
289                         BUG_ON(ret > 0);
290                         BUG_ON(-ret >= *src_len);
291
292                         *src_len = round_down(-ret, block_bytes(c));
293                 }
294
295                 mempool_free(workspace, &c->lz4_workspace_pool);
296
297                 if (ret)
298                         goto err;
299                 break;
300         }
301         case BCH_COMPRESSION_GZIP: {
302                 void *workspace;
303                 z_stream strm;
304
305                 workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
306                                                                DEF_MEM_LEVEL),
307                                     GFP_NOIO|__GFP_NOWARN);
308                 if (!workspace) {
309                         mutex_lock(&c->zlib_workspace_lock);
310                         workspace = c->zlib_workspace;
311                 }
312
313                 strm.workspace  = workspace;
314                 strm.next_in    = src_data;
315                 strm.avail_in   = min(src->bi_iter.bi_size,
316                                       dst->bi_iter.bi_size);
317                 strm.next_out   = dst_data;
318                 strm.avail_out  = dst->bi_iter.bi_size;
319                 zlib_deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
320                                   Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
321                                   Z_DEFAULT_STRATEGY);
322
323                 ret = zlib_deflate(&strm, Z_FINISH);
324                 if (ret != Z_STREAM_END) {
325                         ret = -EIO;
326                         goto zlib_err;
327                 }
328
329                 ret = zlib_deflateEnd(&strm);
330                 if (ret != Z_OK) {
331                         ret = -EIO;
332                         goto zlib_err;
333                 }
334
335                 ret = 0;
336 zlib_err:
337                 if (workspace == c->zlib_workspace)
338                         mutex_unlock(&c->zlib_workspace_lock);
339                 else
340                         kfree(workspace);
341
342                 if (ret)
343                         goto err;
344
345                 *dst_len = strm.total_out;
346                 *src_len = strm.total_in;
347                 break;
348         }
349         default:
350                 BUG();
351         }
352
353         BUG_ON(!*dst_len);
354         BUG_ON(*dst_len > dst->bi_iter.bi_size);
355
356         BUG_ON(*src_len & (block_bytes(c) - 1));
357         BUG_ON(*src_len > src->bi_iter.bi_size);
358
359         /* Didn't get smaller: */
360         if (round_up(*dst_len, block_bytes(c)) >= *src_len) {
361                 ret = -1;
362                 goto err;
363         }
364
365         pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
366
367         memset(dst_data + *dst_len, 0, pad);
368         *dst_len += pad;
369
370         if (dst_bounced)
371                 memcpy_to_bio(dst, dst->bi_iter, dst_data);
372 err:
373         bio_unmap_or_unbounce(c, src_data, src_bounced, READ);
374         bio_unmap_or_unbounce(c, dst_data, dst_bounced, WRITE);
375         return ret;
376 }
377
378 void bch_bio_compress(struct cache_set *c,
379                       struct bio *dst, size_t *dst_len,
380                       struct bio *src, size_t *src_len,
381                       unsigned *compression_type)
382 {
383         unsigned orig_dst = dst->bi_iter.bi_size;
384         unsigned orig_src = src->bi_iter.bi_size;
385
386         /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
387         src->bi_iter.bi_size =
388                 min(src->bi_iter.bi_size, BCH_ENCODED_EXTENT_MAX << 9);
389
390         /* Don't generate a bigger output than input: */
391         dst->bi_iter.bi_size =
392                 min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
393
394         /* If it's only one block, don't bother trying to compress: */
395         if (*compression_type != BCH_COMPRESSION_NONE &&
396             bio_sectors(src) > c->sb.block_size &&
397             !__bio_compress(c, dst, dst_len, src, src_len, *compression_type))
398                 goto out;
399
400         /* If compressing failed (didn't get smaller), just copy: */
401         *compression_type = BCH_COMPRESSION_NONE;
402         *dst_len = *src_len = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
403         bio_copy_data(dst, src);
404 out:
405         dst->bi_iter.bi_size = orig_dst;
406         src->bi_iter.bi_size = orig_src;
407 }
408
409 /* doesn't write superblock: */
410 int bch_check_set_has_compressed_data(struct cache_set *c,
411                                       unsigned compression_type)
412 {
413         switch (compression_type) {
414         case BCH_COMPRESSION_NONE:
415                 return 0;
416         case BCH_COMPRESSION_LZ4:
417                 if (bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4))
418                         return 0;
419
420                 bch_sb_set_feature(c->disk_sb, BCH_FEATURE_LZ4);
421                 break;
422         case BCH_COMPRESSION_GZIP:
423                 if (bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
424                         return 0;
425
426                 bch_sb_set_feature(c->disk_sb, BCH_FEATURE_GZIP);
427                 break;
428         }
429
430         return bch_compress_init(c);
431 }
432
433 void bch_compress_free(struct cache_set *c)
434 {
435         vfree(c->zlib_workspace);
436         mempool_exit(&c->lz4_workspace_pool);
437         mempool_exit(&c->compression_bounce[WRITE]);
438         mempool_exit(&c->compression_bounce[READ]);
439         free_percpu(c->bio_decompress_worker);
440 }
441
442 #define COMPRESSION_WORKSPACE_SIZE                                      \
443         max_t(size_t, zlib_inflate_workspacesize(),                     \
444               zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL))
445
446 int bch_compress_init(struct cache_set *c)
447 {
448         unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9);
449         int ret, cpu;
450
451         if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
452             !bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
453                 return 0;
454
455         if (!c->bio_decompress_worker) {
456                 c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
457                 if (!c->bio_decompress_worker)
458                         return -ENOMEM;
459
460                 for_each_possible_cpu(cpu) {
461                         struct bio_decompress_worker *d =
462                                 per_cpu_ptr(c->bio_decompress_worker, cpu);
463
464                         d->c = c;
465                         INIT_WORK(&d->work, bch_bio_decompress_work);
466                         init_llist_head(&d->bio_list);
467                 }
468         }
469
470         if (!mempool_initialized(&c->compression_bounce[READ])) {
471                 ret = mempool_init_page_pool(&c->compression_bounce[READ],
472                                              1, order);
473                 if (ret)
474                         return ret;
475         }
476
477         if (!mempool_initialized(&c->compression_bounce[WRITE])) {
478                 ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
479                                              1, order);
480                 if (ret)
481                         return ret;
482         }
483
484         if (!mempool_initialized(&c->lz4_workspace_pool) &&
485             bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4)) {
486                 ret = mempool_init_kmalloc_pool(&c->lz4_workspace_pool,
487                                                 1, LZ4_MEM_COMPRESS);
488                 if (ret)
489                         return ret;
490         }
491
492         if (!c->zlib_workspace &&
493             bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP)) {
494                 c->zlib_workspace = vmalloc(COMPRESSION_WORKSPACE_SIZE);
495                 if (!c->zlib_workspace)
496                         return -ENOMEM;
497         }
498
499         return 0;
500 }