]> git.sesse.net Git - bcachefs-tools-debian/blobdiff - libbcachefs/compress.c
Update bcachefs sources to 070ec8d07b bcachefs: Snapshot depth, skiplist fields
[bcachefs-tools-debian] / libbcachefs / compress.c
index 6379905bad7b4ee341e9fea7244a2c94b28dc8f7..48427a270840b2812af39be06eae699e68330c34 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "checksum.h"
 #include "compress.h"
@@ -5,7 +6,6 @@
 #include "io.h"
 #include "super-io.h"
 
-#include "lz4.h"
 #include <linux/lz4.h>
 #include <linux/zlib.h>
 #include <linux/zstd.h>
@@ -17,7 +17,6 @@ struct bbuf {
                BB_NONE,
                BB_VMAP,
                BB_KMALLOC,
-               BB_VMALLOC,
                BB_MEMPOOL,
        }               type;
        int             rw;
@@ -27,27 +26,35 @@ static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
 {
        void *b;
 
-       BUG_ON(size > c->sb.encoded_extent_max << 9);
+       BUG_ON(size > c->opts.encoded_extent_max);
 
-       b = kmalloc(size, GFP_NOIO|__GFP_NOWARN);
+       b = kmalloc(size, GFP_NOFS|__GFP_NOWARN);
        if (b)
                return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
 
-       b = mempool_alloc(&c->compression_bounce[rw], GFP_NOWAIT);
-       b = b ? page_address(b) : NULL;
+       b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS);
        if (b)
                return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
 
-       b = vmalloc(size);
-       if (b)
-               return (struct bbuf) { .b = b, .type = BB_VMALLOC, .rw = rw };
+       BUG();
+}
 
-       b = mempool_alloc(&c->compression_bounce[rw], GFP_NOIO);
-       b = b ? page_address(b) : NULL;
-       if (b)
-               return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
+{
+       struct bio_vec bv;
+       struct bvec_iter iter;
+       void *expected_start = NULL;
 
-       BUG();
+       __bio_for_each_bvec(bv, bio, iter, start) {
+               if (expected_start &&
+                   expected_start != page_address(bv.bv_page) + bv.bv_offset)
+                       return false;
+
+               expected_start = page_address(bv.bv_page) +
+                       bv.bv_offset + bv.bv_len;
+       }
+
+       return true;
 }
 
 static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
@@ -59,34 +66,35 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
        unsigned nr_pages = 0;
        struct page *stack_pages[16];
        struct page **pages = NULL;
-       bool first = true;
-       unsigned prev_end = PAGE_SIZE;
        void *data;
 
-       BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max);
+       BUG_ON(start.bi_size > c->opts.encoded_extent_max);
 
-#ifndef CONFIG_HIGHMEM
-       __bio_for_each_contig_segment(bv, bio, iter, start) {
-               if (bv.bv_len == start.bi_size)
-                       return (struct bbuf) {
-                               .b = page_address(bv.bv_page) + bv.bv_offset,
-                               .type = BB_NONE, .rw = rw
-                       };
-       }
-#endif
+       if (!PageHighMem(bio_iter_page(bio, start)) &&
+           bio_phys_contig(bio, start))
+               return (struct bbuf) {
+                       .b = page_address(bio_iter_page(bio, start)) +
+                               bio_iter_offset(bio, start),
+                       .type = BB_NONE, .rw = rw
+               };
+
+       /* check if we can map the pages contiguously: */
        __bio_for_each_segment(bv, bio, iter, start) {
-               if ((!first && bv.bv_offset) ||
-                   prev_end != PAGE_SIZE)
+               if (iter.bi_size != start.bi_size &&
+                   bv.bv_offset)
+                       goto bounce;
+
+               if (bv.bv_len < iter.bi_size &&
+                   bv.bv_offset + bv.bv_len < PAGE_SIZE)
                        goto bounce;
 
-               prev_end = bv.bv_offset + bv.bv_len;
                nr_pages++;
        }
 
        BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
 
        pages = nr_pages > ARRAY_SIZE(stack_pages)
-               ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOIO)
+               ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
                : stack_pages;
        if (!pages)
                goto bounce;
@@ -129,12 +137,8 @@ static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
        case BB_KMALLOC:
                kfree(buf.b);
                break;
-       case BB_VMALLOC:
-               vfree(buf.b);
-               break;
        case BB_MEMPOOL:
-               mempool_free(virt_to_page(buf.b),
-                            &c->compression_bounce[buf.rw]);
+               mempool_free(buf.b, &c->compression_bounce[buf.rw]);
                break;
        }
 }
@@ -158,19 +162,14 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
        src_data = bio_map_or_bounce(c, src, READ);
 
        switch (crc.compression_type) {
-       case BCH_COMPRESSION_LZ4_OLD:
-               ret = bch2_lz4_decompress(src_data.b, &src_len,
-                                    dst_data, dst_len);
-               if (ret)
-                       goto err;
-               break;
-       case BCH_COMPRESSION_LZ4:
+       case BCH_COMPRESSION_TYPE_lz4_old:
+       case BCH_COMPRESSION_TYPE_lz4:
                ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
                                                  src_len, dst_len, dst_len);
                if (ret != dst_len)
                        goto err;
                break;
-       case BCH_COMPRESSION_GZIP: {
+       case BCH_COMPRESSION_TYPE_gzip: {
                z_stream strm = {
                        .next_in        = src_data.b,
                        .avail_in       = src_len,
@@ -178,7 +177,7 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
                        .avail_out      = dst_len,
                };
 
-               workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
+               workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
 
                zlib_set_workspace(&strm, workspace);
                zlib_inflateInit2(&strm, -MAX_WBITS);
@@ -190,22 +189,23 @@ static int __bio_uncompress(struct bch_fs *c, struct bio *src,
                        goto err;
                break;
        }
-       case BCH_COMPRESSION_ZSTD: {
+       case BCH_COMPRESSION_TYPE_zstd: {
                ZSTD_DCtx *ctx;
-               size_t len;
+               size_t real_src_len = le32_to_cpup(src_data.b);
 
-               workspace = mempool_alloc(&c->decompress_workspace, GFP_NOIO);
-               ctx = ZSTD_initDCtx(workspace, ZSTD_DCtxWorkspaceBound());
+               if (real_src_len > src_len - 4)
+                       goto err;
 
-               src_len = le32_to_cpup(src_data.b);
+               workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
+               ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
 
-               len = ZSTD_decompressDCtx(ctx,
+               ret = zstd_decompress_dctx(ctx,
                                dst_data,       dst_len,
-                               src_data.b + 4, src_len);
+                               src_data.b + 4, real_src_len);
 
                mempool_free(workspace, &c->decompress_workspace);
 
-               if (len != dst_len)
+               if (ret != dst_len)
                        goto err;
                break;
        }
@@ -231,8 +231,8 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
        BUG_ON(!bio->bi_vcnt);
        BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
 
-       if (crc->uncompressed_size      > c->sb.encoded_extent_max ||
-           crc->compressed_size        > c->sb.encoded_extent_max) {
+       if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
+           crc->compressed_size << 9   > c->opts.encoded_extent_max) {
                bch_err(c, "error rewriting existing data: extent too big");
                return -EIO;
        }
@@ -246,10 +246,10 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
        }
 
        /*
-        * might have to free existing pages and retry allocation from mempool -
-        * do this _after_ decompressing:
+        * XXX: don't have a good way to assert that the bio was allocated with
+        * enough space, we depend on bch2_move_extent doing the right thing
         */
-       bch2_bio_alloc_more_pages_pool(c, bio, crc->live_size << 9);
+       bio->bi_iter.bi_size = crc->live_size << 9;
 
        memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
 
@@ -270,10 +270,10 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
 {
        struct bbuf dst_data = { NULL };
        size_t dst_len = crc.uncompressed_size << 9;
-       int ret = -ENOMEM;
+       int ret;
 
-       if (crc.uncompressed_size       > c->sb.encoded_extent_max ||
-           crc.compressed_size         > c->sb.encoded_extent_max)
+       if (crc.uncompressed_size << 9  > c->opts.encoded_extent_max ||
+           crc.compressed_size << 9    > c->opts.encoded_extent_max)
                return -EIO;
 
        dst_data = dst_len == dst_iter.bi_size
@@ -284,7 +284,8 @@ int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
        if (ret)
                goto err;
 
-       if (dst_data.type != BB_NONE)
+       if (dst_data.type != BB_NONE &&
+           dst_data.type != BB_VMAP)
                memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
 err:
        bio_unmap_or_unbounce(c, dst_data);
@@ -295,10 +296,10 @@ static int attempt_compress(struct bch_fs *c,
                            void *workspace,
                            void *dst, size_t dst_len,
                            void *src, size_t src_len,
-                           unsigned compression_type)
+                           enum bch_compression_type compression_type)
 {
        switch (compression_type) {
-       case BCH_COMPRESSION_LZ4: {
+       case BCH_COMPRESSION_TYPE_lz4: {
                int len = src_len;
                int ret = LZ4_compress_destSize(
                                src,            dst,
@@ -310,7 +311,7 @@ static int attempt_compress(struct bch_fs *c,
 
                return ret;
        }
-       case BCH_COMPRESSION_GZIP: {
+       case BCH_COMPRESSION_TYPE_gzip: {
                z_stream strm = {
                        .next_in        = src,
                        .avail_in       = src_len,
@@ -331,15 +332,26 @@ static int attempt_compress(struct bch_fs *c,
 
                return strm.total_out;
        }
-       case BCH_COMPRESSION_ZSTD: {
-               ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
-                       ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
+       case BCH_COMPRESSION_TYPE_zstd: {
+               ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
+                       zstd_cctx_workspace_bound(&c->zstd_params.cParams));
 
-               size_t len = ZSTD_compressCCtx(ctx,
-                               dst + 4,        dst_len - 4,
+               /*
+                * ZSTD requires that when we decompress we pass in the exact
+                * compressed size - rounding it up to the nearest sector
+                * doesn't work, so we use the first 4 bytes of the buffer for
+                * that.
+                *
+                * Additionally, the ZSTD code seems to have a bug where it will
+                * write just past the end of the buffer - so subtract a fudge
+                * factor (7 bytes) from the dst buffer size to account for
+                * that.
+                */
+               size_t len = zstd_compress_cctx(ctx,
+                               dst + 4,        dst_len - 4 - 7,
                                src,            src_len,
-                               c->zstd_params);
-               if (ZSTD_isError(len))
+                               &c->zstd_params);
+               if (zstd_is_error(len))
                        return 0;
 
                *((__le32 *) dst) = cpu_to_le32(len);
@@ -353,24 +365,24 @@ static int attempt_compress(struct bch_fs *c,
 static unsigned __bio_compress(struct bch_fs *c,
                               struct bio *dst, size_t *dst_len,
                               struct bio *src, size_t *src_len,
-                              unsigned compression_type)
+                              enum bch_compression_type compression_type)
 {
        struct bbuf src_data = { NULL }, dst_data = { NULL };
        void *workspace;
        unsigned pad;
        int ret = 0;
 
-       BUG_ON(compression_type >= BCH_COMPRESSION_NR);
+       BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
        BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
 
        /* If it's only one block, don't bother trying to compress: */
-       if (bio_sectors(src) <= c->opts.block_size)
-               return 0;
+       if (src->bi_iter.bi_size <= c->opts.block_size)
+               return BCH_COMPRESSION_TYPE_incompressible;
 
        dst_data = bio_map_or_bounce(c, dst, WRITE);
        src_data = bio_map_or_bounce(c, src, READ);
 
-       workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOIO);
+       workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS);
 
        *src_len = src->bi_iter.bi_size;
        *dst_len = dst->bi_iter.bi_size;
@@ -427,7 +439,8 @@ static unsigned __bio_compress(struct bch_fs *c,
        memset(dst_data.b + *dst_len, 0, pad);
        *dst_len += pad;
 
-       if (dst_data.type != BB_NONE)
+       if (dst_data.type != BB_NONE &&
+           dst_data.type != BB_VMAP)
                memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
 
        BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
@@ -439,7 +452,7 @@ out:
        bio_unmap_or_unbounce(c, dst_data);
        return compression_type;
 err:
-       compression_type = 0;
+       compression_type = BCH_COMPRESSION_TYPE_incompressible;
        goto out;
 }
 
@@ -453,12 +466,12 @@ unsigned bch2_bio_compress(struct bch_fs *c,
 
        /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
        src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
-                                    c->sb.encoded_extent_max << 9);
+                                    c->opts.encoded_extent_max);
        /* Don't generate a bigger output than input: */
        dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
 
-       if (compression_type == BCH_COMPRESSION_LZ4_OLD)
-               compression_type = BCH_COMPRESSION_LZ4;
+       if (compression_type == BCH_COMPRESSION_TYPE_lz4_old)
+               compression_type = BCH_COMPRESSION_TYPE_lz4;
 
        compression_type =
                __bio_compress(c, dst, dst_len, src, src_len, compression_type);
@@ -470,15 +483,15 @@ unsigned bch2_bio_compress(struct bch_fs *c,
 
 static int __bch2_fs_compress_init(struct bch_fs *, u64);
 
-#define BCH_FEATURE_NONE       0
+#define BCH_FEATURE_none       0
 
 static const unsigned bch2_compression_opt_to_feature[] = {
-#define x(t) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
-       BCH_COMPRESSION_TYPES()
+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
+       BCH_COMPRESSION_OPTS()
 #undef x
 };
 
-#undef BCH_FEATURE_NONE
+#undef BCH_FEATURE_none
 
 static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
 {
@@ -531,53 +544,44 @@ void bch2_fs_compress_exit(struct bch_fs *c)
 
 static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
 {
-       size_t max_extent = c->sb.encoded_extent_max << 9;
-       size_t order = get_order(max_extent);
        size_t decompress_workspace_size = 0;
        bool decompress_workspace_needed;
-       ZSTD_parameters params = ZSTD_getParams(0, max_extent, 0);
+       ZSTD_parameters params = zstd_get_params(0, c->opts.encoded_extent_max);
        struct {
                unsigned        feature;
                unsigned        type;
                size_t          compress_workspace;
                size_t          decompress_workspace;
        } compression_types[] = {
-               { BCH_FEATURE_LZ4, BCH_COMPRESSION_LZ4, LZ4_MEM_COMPRESS, 0 },
-               { BCH_FEATURE_GZIP, BCH_COMPRESSION_GZIP,
+               { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4, LZ4_MEM_COMPRESS, 0 },
+               { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
                        zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
                        zlib_inflate_workspacesize(), },
-               { BCH_FEATURE_ZSTD, BCH_COMPRESSION_ZSTD,
-                       ZSTD_CCtxWorkspaceBound(params.cParams),
-                       ZSTD_DCtxWorkspaceBound() },
+               { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
+                       zstd_cctx_workspace_bound(&params.cParams),
+                       zstd_dctx_workspace_bound() },
        }, *i;
-       int ret = 0;
-
-       pr_verbose_init(c->opts, "");
+       bool have_compressed = false;
 
        c->zstd_params = params;
 
        for (i = compression_types;
             i < compression_types + ARRAY_SIZE(compression_types);
             i++)
-               if (features & (1 << i->feature))
-                       goto have_compressed;
+               have_compressed |= (features & (1 << i->feature)) != 0;
 
-       goto out;
-have_compressed:
+       if (!have_compressed)
+               return 0;
 
-       if (!mempool_initialized(&c->compression_bounce[READ])) {
-               ret = mempool_init_page_pool(&c->compression_bounce[READ],
-                                            1, order);
-               if (ret)
-                       goto out;
-       }
+       if (!mempool_initialized(&c->compression_bounce[READ]) &&
+           mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
+                                       1, c->opts.encoded_extent_max))
+               return -BCH_ERR_ENOMEM_compression_bounce_read_init;
 
-       if (!mempool_initialized(&c->compression_bounce[WRITE])) {
-               ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
-                                            1, order);
-               if (ret)
-                       goto out;
-       }
+       if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
+           mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
+                                       1, c->opts.encoded_extent_max))
+               return -BCH_ERR_ENOMEM_compression_bounce_write_init;
 
        for (i = compression_types;
             i < compression_types + ARRAY_SIZE(compression_types);
@@ -594,21 +598,18 @@ have_compressed:
                if (mempool_initialized(&c->compress_workspace[i->type]))
                        continue;
 
-               ret = mempool_init_kvpmalloc_pool(
+               if (mempool_init_kvpmalloc_pool(
                                &c->compress_workspace[i->type],
-                               1, i->compress_workspace);
-               if (ret)
-                       goto out;
+                               1, i->compress_workspace))
+                       return -BCH_ERR_ENOMEM_compression_workspace_init;
        }
 
-       ret = mempool_init_kmalloc_pool(
-                       &c->decompress_workspace,
-                       1, decompress_workspace_size);
-       if (ret)
-               goto out;
-out:
-       pr_verbose_init(c->opts, "ret %i", ret);
-       return ret;
+       if (!mempool_initialized(&c->decompress_workspace) &&
+           mempool_init_kvpmalloc_pool(&c->decompress_workspace,
+                                       1, decompress_workspace_size))
+               return -BCH_ERR_ENOMEM_decompression_workspace_init;
+
+       return 0;
 }
 
 int bch2_fs_compress_init(struct bch_fs *c)