]> git.sesse.net Git - ffmpeg/blobdiff - tests/checkasm/sw_scale.c
avformat/mov: Extend data_size check in mov_read_udta_string()
[ffmpeg] / tests / checkasm / sw_scale.c
index 2680e47897de0ed890f0224c05bb3fe5306f37a0..a10118704b784bf4a6a2d4a448d0ec8a5e56d4e9 100644 (file)
@@ -22,6 +22,7 @@
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mem.h"
+#include "libavutil/mem_internal.h"
 
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
             AV_WN32(buf + j, rnd());      \
     } while (0)
 
+// This reference function is the same approximate algorithm employed by the
+// SIMD functions
+static void ref_function(const int16_t *filter, int filterSize,
+                                                 const int16_t **src, uint8_t *dest, int dstW,
+                                                 const uint8_t *dither, int offset)
+{
+    int i, d;
+    d = ((filterSize - 1) * 8 + dither[0]) >> 4;
+    for ( i = 0; i < dstW; i++) {
+        int16_t val = d;
+        int j;
+        union {
+            int val;
+            int16_t v[2];
+        } t;
+        for (j = 0; j < filterSize; j++){
+            t.val = (int)src[j][i + offset] * (int)filter[j];
+            val += t.v[1];
+        }
+        dest[i]= av_clip_uint8(val>>3);
+    }
+}
+
+static void check_yuv2yuvX(void)
+{
+    struct SwsContext *ctx;
+    int fsi, osi, isi, i, j;
+    int dstW;
+#define LARGEST_FILTER 16
+#define FILTER_SIZES 4
+    static const int filter_sizes[FILTER_SIZES] = {1, 4, 8, 16};
+#define LARGEST_INPUT_SIZE 512
+#define INPUT_SIZES 4
+    static const int input_sizes[INPUT_SIZES] = {128, 144, 256, 512};
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
+                      int filterSize, const int16_t **src, uint8_t *dest,
+                      int dstW, const uint8_t *dither, int offset);
+
+    const int16_t **src;
+    LOCAL_ALIGNED_8(int16_t, src_pixels, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_8(int16_t, filter_coeff, [LARGEST_FILTER]);
+    LOCAL_ALIGNED_8(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, dither, [LARGEST_INPUT_SIZE]);
+    union VFilterData{
+        const int16_t *src;
+        uint16_t coeff[8];
+    } *vFilterData;
+    uint8_t d_val = rnd();
+    memset(dither, d_val, LARGEST_INPUT_SIZE);
+    randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
+    randomize_buffers((uint8_t*)filter_coeff, LARGEST_FILTER * sizeof(int16_t));
+    ctx = sws_alloc_context();
+    if (sws_init_context(ctx, NULL, NULL) < 0)
+        fail();
+
+    ff_getSwsFunc(ctx);
+    for(isi = 0; isi < INPUT_SIZES; ++isi){
+        dstW = input_sizes[isi];
+        for(osi = 0; osi < 64; osi += 16){
+            for(fsi = 0; fsi < FILTER_SIZES; ++fsi){
+                src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
+                vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
+                memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
+                for(i = 0; i < filter_sizes[fsi]; ++i){
+                    src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
+                    vFilterData[i].src = src[i];
+                    for(j = 0; j < 4; ++j)
+                        vFilterData[i].coeff[j + 4] = filter_coeff[i];
+                }
+                if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d", filter_sizes[fsi], osi)){
+                    memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
+                    memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
+
+                    // The reference function is not the scalar function selected when mmx
+                    // is deactivated as the SIMD functions do not give the same result as
+                    // the scalar ones due to rounding. The SIMD functions are activated by
+                    // the flag SWS_ACCURATE_RND
+                    ref_function(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
+                    // There's no point in calling new for the reference function
+                    if(ctx->use_mmx_vfilter){
+                        call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+                        if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0])))
+                            fail();
+                        if(dstW == LARGEST_INPUT_SIZE)
+                            bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+                    }
+                }
+                av_freep(&src);
+                av_freep(&vFilterData);
+            }
+        }
+    }
+    sws_freeContext(ctx);
+#undef FILTER_SIZES
+}
+
+#undef SRC_PIXELS
 #define SRC_PIXELS 128
 
 static void check_hscale(void)
@@ -53,7 +153,7 @@ static void check_hscale(void)
     struct SwsContext *ctx;
 
     // padded
-    LOCAL_ALIGNED_32(uint8_t, src, [SRC_PIXELS + MAX_FILTER_WIDTH - 1]);
+    LOCAL_ALIGNED_32(uint8_t, src, [FFALIGN(SRC_PIXELS + MAX_FILTER_WIDTH - 1, 4)]);
     LOCAL_ALIGNED_32(uint32_t, dst0, [SRC_PIXELS]);
     LOCAL_ALIGNED_32(uint32_t, dst1, [SRC_PIXELS]);
 
@@ -131,4 +231,6 @@ void checkasm_check_sw_scale(void)
 {
     check_hscale();
     report("hscale");
+    check_yuv2yuvX();
+    report("yuv2yuvX");
 }