]> git.sesse.net Git - ffmpeg/commitdiff
pixblockdsp, avdct: Add get_pixels_unaligned
authorMartin Storsjö <martin@martin.st>
Tue, 12 May 2020 08:22:45 +0000 (11:22 +0300)
committerMartin Storsjö <martin@martin.st>
Wed, 13 May 2020 10:20:08 +0000 (13:20 +0300)
Use this in vf_spp.c, where the get_pixels operation is done on
unaligned source addresses.

Hook up the x86 (mmx and sse) versions of get_pixels to this
function pointer, as those implementations seem to support unaligned
use.

This fixes fate-filter-spp on armv7.

Signed-off-by: Martin Storsjö <martin@martin.st>
libavcodec/avdct.c
libavcodec/avdct.h
libavcodec/pixblockdsp.c
libavcodec/pixblockdsp.h
libavcodec/x86/pixblockdsp_init.c
libavfilter/vf_spp.c

index 7c761cf39ab2e5baa12fd19e29bb8ffba0508f09..e8fa41f73be3a85988dfdb5dc9f1885b01726885 100644 (file)
@@ -120,6 +120,7 @@ int avcodec_dct_init(AVDCT *dsp)
         PixblockDSPContext pdsp;
         ff_pixblockdsp_init(&pdsp, avctx);
         COPY(pdsp, get_pixels);
+        COPY(pdsp, get_pixels_unaligned);
     }
 #endif
 
index 272422e44c91c8f5feae396900f79e38146abde3..6411fab6f637fee80978029fbee26203911a9edf 100644 (file)
@@ -67,6 +67,10 @@ typedef struct AVDCT {
                        ptrdiff_t line_size);
 
     int bits_per_sample;
+
+    void (*get_pixels_unaligned)(int16_t *block /* align 16 */,
+                       const uint8_t *pixels,
+                       ptrdiff_t line_size);
 } AVDCT;
 
 /**
index 50e1d1d735ef084976d4904aceab79cf356c5290..a79e5477762a3e75ad3726f03c96753cc0b72763 100644 (file)
@@ -90,10 +90,12 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
     case 10:
     case 12:
     case 14:
+        c->get_pixels_unaligned =
         c->get_pixels = get_pixels_16_c;
         break;
     default:
         if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
+            c->get_pixels_unaligned =
             c->get_pixels = get_pixels_8_c;
         }
         break;
index e036700ff067af63caa0d5ef99438fc20cd3d144..fddb4672120aa4c2403b1dbc0cadd04c896cd57a 100644 (file)
@@ -29,6 +29,9 @@ typedef struct PixblockDSPContext {
     void (*get_pixels)(int16_t *av_restrict block /* align 16 */,
                        const uint8_t *pixels /* align 8 */,
                        ptrdiff_t stride);
+    void (*get_pixels_unaligned)(int16_t *av_restrict block /* align 16 */,
+                       const uint8_t *pixels,
+                       ptrdiff_t stride);
     void (*diff_pixels)(int16_t *av_restrict block /* align 16 */,
                         const uint8_t *s1 /* align 8 */,
                         const uint8_t *s2 /* align 8 */,
index ade55e01a3a48a7a85a9d93ea200b82d110dc75c..3a5eb6959c22605abd5beed6d8665c354d2c8750 100644 (file)
@@ -37,15 +37,19 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
     int cpu_flags = av_get_cpu_flags();
 
     if (EXTERNAL_MMX(cpu_flags)) {
-        if (!high_bit_depth)
+        if (!high_bit_depth) {
+            c->get_pixels_unaligned =
             c->get_pixels = ff_get_pixels_mmx;
+        }
         c->diff_pixels_unaligned =
         c->diff_pixels = ff_diff_pixels_mmx;
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {
-        if (!high_bit_depth)
+        if (!high_bit_depth) {
+            c->get_pixels_unaligned =
             c->get_pixels = ff_get_pixels_sse2;
+        }
         c->diff_pixels_unaligned =
         c->diff_pixels = ff_diff_pixels_sse2;
     }
index 6bee91b30913810756986f4a12daf9a410555075..a83b1195c06adb2bb2df534f7fd57989e0fac24c 100644 (file)
@@ -283,7 +283,7 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src,
                 const int x1 = x + offset[i + count - 1][0];
                 const int y1 = y + offset[i + count - 1][1];
                 const int index = x1 + y1*linesize;
-                p->dct->get_pixels(block, p->src + sample_bytes*index, sample_bytes*linesize);
+                p->dct->get_pixels_unaligned(block, p->src + sample_bytes*index, sample_bytes*linesize);
                 p->dct->fdct(block);
                 p->requantize(block2, block, qp, p->dct->idct_permutation);
                 p->dct->idct(block2);