Merge commit 'f46bb608d9d76c543e4929dc8cffe36b84bd789e'

author Michael Niedermayer <michaelni@gmx.at>

Wed, 9 Jul 2014 22:56:05 +0000 (00:56 +0200)

committer Michael Niedermayer <michaelni@gmx.at>

Wed, 9 Jul 2014 23:22:14 +0000 (01:22 +0200)
author Michael Niedermayer <michaelni@gmx.at>
Wed, 9 Jul 2014 22:56:05 +0000 (00:56 +0200)
committer Michael Niedermayer <michaelni@gmx.at>
Wed, 9 Jul 2014 23:22:14 +0000 (01:22 +0200)
diff --cc configure

index 4691c280a1d3b9177d7d27a44b15103317d626e1,7a29e82adf6c9489adcabb32fe3e16980c212c7d..632ba44c7e224052761c34622c3500fdd2b4daf7
--- 1/configure
--- 2/configure
+++ b/configure
@@@ -1997,17 -1707,16 +1998,17 @@@ threads_if_any="$THREADS_LIST
   
   # subsystems
   dct_select="rdft"
- dsputil_select="fdctdsp idctdsp"
+ dsputil_select="fdctdsp idctdsp pixblockdsp"
   error_resilience_select="dsputil"
+ +frame_thread_encoder_deps="encoders threads"
   intrax8_select="error_resilience"
   mdct_select="fft"
   rdft_select="fft"
   mpeg_er_select="error_resilience"
   mpegaudio_select="mpegaudiodsp"
   mpegaudiodsp_select="dct"
- -mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp"
+ +mpegvideo_select="blockdsp dsputil h264chroma hpeldsp idctdsp videodsp"
- mpegvideoenc_select="dsputil mpegvideo qpeldsp"
+ mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp"
   
   # decoders / encoders
   aac_decoder_select="mdct sinewin"
@@@ -2022,13 -1730,12 +2023,13 @@@ alac_encoder_select="lpc
   als_decoder_select="bswapdsp"
   amrnb_decoder_select="lsp"
   amrwb_decoder_select="lsp"
- -amv_decoder_select="sp5x_decoder"
- -ape_decoder_select="bswapdsp"
+ +amv_decoder_select="sp5x_decoder exif"
+ +amv_encoder_select="aandcttables mpegvideoenc"
+ +ape_decoder_select="bswapdsp llauddsp"
   asv1_decoder_select="blockdsp bswapdsp idctdsp"
- asv1_encoder_select="bswapdsp dsputil fdctdsp"
+ asv1_encoder_select="bswapdsp fdctdsp pixblockdsp"
   asv2_decoder_select="blockdsp bswapdsp idctdsp"
- asv2_encoder_select="bswapdsp dsputil fdctdsp"
+ asv2_encoder_select="bswapdsp fdctdsp pixblockdsp"
   atrac1_decoder_select="mdct sinewin"
   atrac3_decoder_select="mdct"
   atrac3p_decoder_select="mdct sinewin"
@@@ -2043,12 -1749,11 +2044,12 @@@ cook_decoder_select="audiodsp mdct sine
   cscd_decoder_select="lzo"
   cscd_decoder_suggest="zlib"
   dca_decoder_select="mdct"
+ +dirac_decoder_select="dsputil dwt golomb videodsp"
   dnxhd_decoder_select="blockdsp idctdsp"
- dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc"
+ dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp"
   dvvideo_decoder_select="dvprofile idctdsp"
- dvvideo_encoder_select="dsputil dvprofile fdctdsp"
+ dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp"
- -dxa_decoder_deps="zlib"
+ +dxa_decoder_select="zlib"
   eac3_decoder_select="ac3_decoder"
   eac3_encoder_select="ac3_encoder"
   eamad_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpegvideo"
diff --cc libavcodec/Makefile
Simple merge
diff --cc libavcodec/arm/Makefile

index fbbd0696b714525e9296b5a69929762da1653932,9ba6c2010ad9344c5c8da15a3224e7c6f16104ca..6b80de8a2bc50fcbf91b9fa10d4cc8c14672e2e1
--- 1/libavcodec/arm/Makefile
--- 2/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@@ -63,10 -63,9 +64,11 @@@ ARMV6-OBJS-$(CONFIG_IDCTDSP)           
                                             arm/simple_idct_armv6.o
   ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
   ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC)      += arm/mpegvideoencdsp_armv6.o
+ ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP)       += arm/pixblockdsp_armv6.o
   
   ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
+ +ARMV6-OBJS-$(CONFIG_VC1_DECODER)       += arm/startcode_armv6.o
+ +ARMV6-OBJS-$(CONFIG_VC1_PARSER)        += arm/startcode_armv6.o
   ARMV6-OBJS-$(CONFIG_VP7_DECODER)       += arm/vp8_armv6.o               \
                                             arm/vp8dsp_init_armv6.o       \
                                             arm/vp8dsp_armv6.o
diff --cc libavcodec/arm/dsputil_armv6.S
Simple merge
diff --cc libavcodec/arm/dsputil_init_armv6.c
Simple merge
diff --cc libavcodec/arm/pixblockdsp_armv6.S

index 0000000000000000000000000000000000000000,4c925a4daa7e9c4934883653eaae79c551daa017..b10ea78e8822349eabfb30dff38e6e59247e9fff

mode 000000,100644..100644
--- /dev/null
--- 2/libavcodec/arm/pixblockdsp_armv6.S
+++ b/libavcodec/arm/pixblockdsp_armv6.S
@@@ -1,0 -1,76 +1,76 @@@
- - * This file is part of Libav.
+ /*
+  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+  *
- - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
- - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
- - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+ #include "libavutil/arm/asm.S"
+ 
+ function ff_get_pixels_armv6, export=1
+         pld             [r1, r2]
+         push            {r4-r8, lr}
+         mov             lr,  #8
+ 1:
+         ldrd_post       r4,  r5,  r1,  r2
+         subs            lr,  lr,  #1
+         uxtb16          r6,  r4
+         uxtb16          r4,  r4,  ror #8
+         uxtb16          r12, r5
+         uxtb16          r8,  r5,  ror #8
+         pld             [r1, r2]
+         pkhbt           r5,  r6,  r4,  lsl #16
+         pkhtb           r6,  r4,  r6,  asr #16
+         pkhbt           r7,  r12, r8,  lsl #16
+         pkhtb           r12, r8,  r12, asr #16
+         stm             r0!, {r5,r6,r7,r12}
+         bgt             1b
+ 
+         pop             {r4-r8, pc}
+ endfunc
+ 
+ function ff_diff_pixels_armv6, export=1
+         pld             [r1, r3]
+         pld             [r2, r3]
+         push            {r4-r9, lr}
+         mov             lr,  #8
+ 1:
+         ldrd_post       r4,  r5,  r1,  r3
+         ldrd_post       r6,  r7,  r2,  r3
+         uxtb16          r8,  r4
+         uxtb16          r4,  r4,  ror #8
+         uxtb16          r9,  r6
+         uxtb16          r6,  r6,  ror #8
+         pld             [r1, r3]
+         ssub16          r9,  r8,  r9
+         ssub16          r6,  r4,  r6
+         uxtb16          r8,  r5
+         uxtb16          r5,  r5,  ror #8
+         pld             [r2, r3]
+         pkhbt           r4,  r9,  r6,  lsl #16
+         pkhtb           r6,  r6,  r9,  asr #16
+         uxtb16          r9,  r7
+         uxtb16          r7,  r7,  ror #8
+         ssub16          r9,  r8,  r9
+         ssub16          r5,  r5,  r7
+         subs            lr,  lr,  #1
+         pkhbt           r8,  r9,  r5,  lsl #16
+         pkhtb           r9,  r5,  r9,  asr #16
+         stm             r0!, {r4,r6,r8,r9}
+         bgt             1b
+ 
+         pop             {r4-r9, pc}
+ endfunc
diff --cc libavcodec/arm/pixblockdsp_init_arm.c

index 0000000000000000000000000000000000000000,f20769b3bc598d80c57baead4867cb9f80ebaad2..b77c523a6e616ed52097769d3b109352311892da

mode 000000,100644..100644
--- /dev/null
--- 2/libavcodec/arm/pixblockdsp_init_arm.c
+++ b/libavcodec/arm/pixblockdsp_init_arm.c
@@@ -1,0 -1,42 +1,42 @@@
- - * This file is part of Libav.
+ /*
- - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
- - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
- - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+ #include <stdint.h>
+ 
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/arm/cpu.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/pixblockdsp.h"
+ 
+ void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
+ void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
+                           const uint8_t *s2, int stride);
+ 
+ av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
+                                      AVCodecContext *avctx,
+                                      unsigned high_bit_depth)
+ {
+     int cpu_flags = av_get_cpu_flags();
+ 
+     if (have_armv6(cpu_flags)) {
+         if (!high_bit_depth)
+             c->get_pixels = ff_get_pixels_armv6;
+         c->diff_pixels = ff_diff_pixels_armv6;
+     }
+ }
diff --cc libavcodec/asv.h
Simple merge
diff --cc libavcodec/asvenc.c

index ae81953f30869d52c9b52143354d171eb4b14ff5,9944ffaa7c45856097eaad7c59602f7623ee6613..02cf2db9913df46c1fd03abce4ecd8fb7bcc191f
--- 1/libavcodec/asvenc.c
--- 2/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@@ -281,11 -241,17 +281,11 @@@ static av_cold int encode_init(AVCodecC
       int i;
       const int scale= avctx->codec_id == AV_CODEC_ID_ASV1 ? 1 : 2;
   
- -    avctx->coded_frame = av_frame_alloc();
- -    if (!avctx->coded_frame)
- -        return AVERROR(ENOMEM);
- -    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
- -    avctx->coded_frame->key_frame = 1;
- -
       ff_asv_common_init(avctx);
-     ff_dsputil_init(&a->dsp, avctx);
       ff_fdctdsp_init(&a->fdsp, avctx);
+     ff_pixblockdsp_init(&a->pdsp, avctx);
   
- -    if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
+ +    if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
   
       a->inv_qscale= (32*scale*FF_QUALITY_SCALE +  avctx->global_quality/2) / avctx->global_quality;
   
diff --cc libavcodec/dnxhdenc.c

index f6f9af833ac63d2ae19d91626d2386c6bca7f8f9,e656b6edba21474c85e2fc8125d34b2d7585874d..3ad625352a6e075c94f699bb86a0331b16e480ac
--- 1/libavcodec/dnxhdenc.c
--- 2/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@@ -33,10 -33,9 +33,10 @@@
   #include "fdctdsp.h"
   #include "internal.h"
   #include "mpegvideo.h"
+ #include "pixblockdsp.h"
   #include "dnxhdenc.h"
   
+ +
   // The largest value that will not lead to overflow for 10bit samples.
   #define DNX10BIT_QMAT_SHIFT 18
   #define RC_VARIANCE 1 // use variance or ssd for fast rc
@@@ -326,9 -311,8 +326,10 @@@ static av_cold int dnxhd_encode_init(AV
       ff_fdctdsp_init(&ctx->m.fdsp, avctx);
       ff_idctdsp_init(&ctx->m.idsp, avctx);
       ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
+     ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
       ff_dct_common_init(&ctx->m);
+ +    ff_dct_encode_init(&ctx->m);
+ +
       if (!ctx->m.dct_quantize)
           ctx->m.dct_quantize = ff_dct_quantize_c;
   
diff --cc libavcodec/dsputil.c

index c68a70a79e04d945db217cf6fa1aad42ed2b8b1b,8d0cef2e8282aca2c070a8dbc9ccab2ed489c645..1cd9658ba6944075c2eb6db46b43f1ba41078fc3
--- 1/libavcodec/dsputil.c
--- 2/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@@ -584,9 -547,9 +556,9 @@@ static int dct_sad8x8_c(MpegEncContext 
   {
       LOCAL_ALIGNED_16(int16_t, temp, [64]);
   
- -    assert(h == 8);
+ +    av_assert2(h == 8);
   
-     s->dsp.diff_pixels(temp, src1, src2, stride);
+     s->pdsp.diff_pixels(temp, src1, src2, stride);
       s->fdsp.fdct(temp);
       return s->dsp.sum_abs_dctelem(temp);
   }
@@@ -651,9 -614,9 +623,9 @@@ static int dct_max8x8_c(MpegEncContext 
       LOCAL_ALIGNED_16(int16_t, temp, [64]);
       int sum = 0, i;
   
- -    assert(h == 8);
+ +    av_assert2(h == 8);
   
-     s->dsp.diff_pixels(temp, src1, src2, stride);
+     s->pdsp.diff_pixels(temp, src1, src2, stride);
       s->fdsp.fdct(temp);
   
       for (i = 0; i < 64; i++)
@@@ -669,10 -632,10 +641,10 @@@ static int quant_psnr8x8_c(MpegEncConte
       int16_t *const bak = temp + 64;
       int sum = 0, i;
   
- -    assert(h == 8);
+ +    av_assert2(h == 8);
       s->mb_intra = 0;
   
-     s->dsp.diff_pixels(temp, src1, src2, stride);
+     s->pdsp.diff_pixels(temp, src1, src2, stride);
   
       memcpy(bak, temp, 64 * sizeof(int16_t));
   
@@@ -773,9 -736,9 +745,9 @@@ static int bit8x8_c(MpegEncContext *s, 
       const int esc_length = s->ac_esc_length;
       uint8_t *length, *last_length;
   
- -    assert(h == 8);
+ +    av_assert2(h == 8);
   
-     s->dsp.diff_pixels(temp, src1, src2, stride);
+     s->pdsp.diff_pixels(temp, src1, src2, stride);
   
       s->block_last_index[0 /* FIXME */] =
       last                               =
@@@ -969,10 -904,6 +941,8 @@@ av_cold void ff_dsputil_init(DSPContex
   {
       const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
   
-     c->diff_pixels = diff_pixels_c;
- 
+ +    ff_check_alignment();
+ +
       c->sum_abs_dctelem = sum_abs_dctelem_c;
   
       /* TODO [0] 16  [1] 8 */
@@@ -1015,27 -944,7 +985,12 @@@
       c->vsse[5] = vsse_intra8_c;
       c->nsse[0] = nsse16_c;
       c->nsse[1] = nsse8_c;
+ +#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
+ +    ff_dsputil_init_dwt(c);
+ +#endif
   
-     switch (avctx->bits_per_raw_sample) {
-     case 9:
-     case 10:
-     case 12:
-     case 14:
-         c->get_pixels = get_pixels_16_c;
-         break;
-     default:
-         if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
-             c->get_pixels = get_pixels_8_c;
-         }
-         break;
-     }
- 
- 
+ +    if (ARCH_ALPHA)
+ +        ff_dsputil_init_alpha(c, avctx);
       if (ARCH_ARM)
           ff_dsputil_init_arm(c, avctx, high_bit_depth);
       if (ARCH_PPC)
diff --cc libavcodec/dsputil.h
Simple merge
diff --cc libavcodec/dvenc.c

index a60b834dfe2b5799e0e8d69bfc1fb78af594101f,9f458e3e47c6cff3648c02d62d04ec5fbefd68fa..aeb4a332596c877bbeb5b63d4e88e1e3be9f6c15
--- 1/libavcodec/dvenc.c
--- 2/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@@ -67,12 -65,12 +69,13 @@@ static av_cold int dvvideo_encode_init(
   
       dv_vlc_map_tableinit();
   
+ +    memset(&dsp,0, sizeof(dsp));
       ff_dsputil_init(&dsp, avctx);
       ff_fdctdsp_init(&fdsp, avctx);
+     ff_pixblockdsp_init(&pdsp, avctx);
       ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp);
   
-     s->get_pixels = dsp.get_pixels;
+     s->get_pixels = pdsp.get_pixels;
       s->ildct_cmp  = dsp.ildct_cmp[5];
   
       s->fdct[0]    = fdsp.fdct;
diff --cc libavcodec/libavcodec.v

index 5909dce46b6ae5f8f34206176e04e1c79eee011b,bf148075c719dfd0d963f2ceb8f1ca4ccd775856..5a8c005b97df352b47670c080453f1b10f683296
--- 1/libavcodec/libavcodec.v
--- 2/libavcodec/libavcodec.v
+++ b/libavcodec/libavcodec.v
@@@ -1,33 -1,4 +1,34 @@@
   LIBAVCODEC_$MAJOR {
           global: av*;
+ +                #deprecated, remove after next bump
+ +                audio_resample;
+ +                audio_resample_close;
+ +                dsputil_init;
+ +                ff_dsputil_init;
+ +                ff_find_pix_fmt;
+ +                ff_framenum_to_drop_timecode;
+ +                ff_framenum_to_smtpe_timecode;
+ +                ff_raw_pix_fmt_tags;
+ +                ff_init_smtpe_timecode;
+ +                ff_fft*;
+ +                ff_mdct*;
+ +                ff_dct*;
+ +                ff_rdft*;
+ +                ff_prores_idct_put_10_sse2;
+ +                ff_simple_idct*;
+ +                ff_aanscales;
+ +                ff_faan*;
+ +                ff_mmx_idct;
+ +                ff_fdct*;
+ +                fdct_ifast;
+ +                j_rev_dct;
+ +                ff_mmxext_idct;
+ +                ff_idct_xvid*;
+ +                ff_jpeg_fdct*;
+ +                ff_dnxhd_get_cid_table;
+ +                ff_dnxhd_cid_table;
+ +                ff_idctdsp_init;
+ +                ff_fdctdsp_init;
++                ff_pixblockdsp_init;
           local:  *;
   };
diff --cc libavcodec/mpegvideo.h
Simple merge
diff --cc libavcodec/mpegvideo_enc.c

index 826f061eeaf63cba3a6272df3361d87ff167fb3f,e2504c7b7210e69e2b3ea735ef0a7ee1aaeef17f..56867ccb85eef697cfa1420e8626558444158d20
--- 1/libavcodec/mpegvideo_enc.c
--- 2/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@@ -818,8 -698,12 +818,9 @@@ av_cold int ff_MPV_encode_init(AVCodecC
       if (ff_MPV_common_init(s) < 0)
           return -1;
   
- -    if (ARCH_X86)
- -        ff_MPV_encode_init_x86(s);
- -
       ff_fdctdsp_init(&s->fdsp, avctx);
       ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
+     ff_pixblockdsp_init(&s->pdsp, avctx);
       ff_qpeldsp_init(&s->qdsp);
   
       s->avctx->coded_frame = s->current_picture.f;
@@@ -2102,18 -1953,13 +2103,18 @@@ static av_always_inline void encode_mb_
               skip_dct[4] = 1;
               skip_dct[5] = 1;
           } else {
-             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
-             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
+             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
+             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
- -            if (!s->chroma_y_shift) { /* 422 */
- -                s->pdsp.get_pixels(s->block[6],
- -                                   ptr_cb + (dct_offset >> 1), wrap_c);
- -                s->pdsp.get_pixels(s->block[7],
- -                                   ptr_cr + (dct_offset >> 1), wrap_c);
+ +            if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
-                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
-                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
++                s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
++                s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
+ +            } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
-                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
-                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
-                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
-                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
-                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
-                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
++                s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
++                s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
++                s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
++                s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
++                s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
++                s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
               }
           }
       } else {
@@@ -2191,13 -2036,13 +2192,13 @@@
               skip_dct[4] = 1;
               skip_dct[5] = 1;
           } else {
-             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
-             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
+             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
+             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
               if (!s->chroma_y_shift) { /* 422 */
-                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
-                                    dest_cb + uv_dct_offset, wrap_c);
-                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
-                                    dest_cr + uv_dct_offset, wrap_c);
- -                s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
- -                                    dest_cb + (dct_offset >> 1), wrap_c);
- -                s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
- -                                    dest_cr + (dct_offset >> 1), wrap_c);
++                s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
++                                    dest_cb + uv_dct_offset, wrap_c);
++                s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
++                                    dest_cr + uv_dct_offset, wrap_c);
               }
           }
           /* pre quantization */
diff --cc libavcodec/pixblockdsp.c

index 0000000000000000000000000000000000000000,71423f9cfc95d266d18b88590eca59cbaa2996fe..a69948e43ef275f04c72abfd25549b34bee16a1a

mode 000000,100644..100644
--- /dev/null
--- 2/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@@ -1,0 -1,76 +1,80 @@@
- - * This file is part of Libav.
+ /*
- - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
- - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
- - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
- -static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+ #include <stdint.h>
+ 
+ #include "config.h"
+ #include "libavutil/attributes.h"
+ #include "avcodec.h"
+ #include "pixblockdsp.h"
+ 
+ #define BIT_DEPTH 16
+ #include "pixblockdsp_template.c"
+ #undef BIT_DEPTH
+ 
+ #define BIT_DEPTH 8
+ #include "pixblockdsp_template.c"
+ 
- -        c->get_pixels = get_pixels_8_c;
++static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
+                           const uint8_t *s2, int stride)
+ {
+     int i;
+ 
+     /* read the pixels */
+     for (i = 0; i < 8; i++) {
+         block[0] = s1[0] - s2[0];
+         block[1] = s1[1] - s2[1];
+         block[2] = s1[2] - s2[2];
+         block[3] = s1[3] - s2[3];
+         block[4] = s1[4] - s2[4];
+         block[5] = s1[5] - s2[5];
+         block[6] = s1[6] - s2[6];
+         block[7] = s1[7] - s2[7];
+         s1      += stride;
+         s2      += stride;
+         block   += 8;
+     }
+ }
+ 
+ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
+ {
+     const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+ 
+     c->diff_pixels = diff_pixels_c;
+ 
+     switch (avctx->bits_per_raw_sample) {
+     case 9:
+     case 10:
++    case 12:
++    case 14:
+         c->get_pixels = get_pixels_16_c;
+         break;
+     default:
++        if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
++            c->get_pixels = get_pixels_8_c;
++        }
+         break;
+     }
+ 
+     if (ARCH_ARM)
+         ff_pixblockdsp_init_arm(c, avctx, high_bit_depth);
+     if (ARCH_PPC)
+         ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth);
+     if (ARCH_X86)
+         ff_pixblockdsp_init_x86(c, avctx, high_bit_depth);
+ }
diff --cc libavcodec/pixblockdsp.h

index 0000000000000000000000000000000000000000,8094d14b68e6d7062d72b0dd6b0b541f4524c0c1..a724ffbef0a9358d0cdd11c4a308432122880eaf

mode 000000,100644..100644
--- /dev/null
--- 2/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@@ -1,0 -1,44 +1,44 @@@
- - * This file is part of Libav.
+ /*
- - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
- - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
- - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+ #ifndef AVCODEC_PIXBLOCKDSP_H
+ #define AVCODEC_PIXBLOCKDSP_H
+ 
+ #include <stdint.h>
+ 
+ #include "avcodec.h"
+ 
+ typedef struct PixblockDSPContext {
+     void (*get_pixels)(int16_t *block /* align 16 */,
+                        const uint8_t *pixels /* align 8 */,
+                        int line_size);
+     void (*diff_pixels)(int16_t *block /* align 16 */,
+                         const uint8_t *s1 /* align 8 */,
+                         const uint8_t *s2 /* align 8 */,
+                         int stride);
+ } PixblockDSPContext;
+ 
+ void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
+ void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
+                              unsigned high_bit_depth);
+ void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
+                              unsigned high_bit_depth);
+ void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
+                              unsigned high_bit_depth);
+ 
+ #endif /* AVCODEC_PIXBLOCKDSP_H */
diff --cc libavcodec/pixblockdsp_template.c

index 711c404a97238ab85d568a029af86984f08d4a97,71d3cf150d68943ab4183b16edae77ce59357ab0..3aeddf526c7968b9ab346466845b03a06dccd460
--- 1/libavcodec/dsputilenc_template.c
--- 2/libavcodec/pixblockdsp_template.c
+++ b/libavcodec/pixblockdsp_template.c
@@@ -1,13 -1,7 +1,7 @@@
   /*
-  * DSP utils
-  * Copyright (c) 2000, 2001 Fabrice Bellard
-  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
-  *
-  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
-  *
- - * This file is part of Libav.
+ + * This file is part of FFmpeg.
    *
- - * Libav is free software; you can redistribute it and/or
+ + * FFmpeg is free software; you can redistribute it and/or
    * modify it under the terms of the GNU Lesser General Public
    * License as published by the Free Software Foundation; either
    * version 2.1 of the License, or (at your option) any later version.
@@@ -22,14 -16,9 +16,9 @@@
    * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    */
   
- /**
-  * @file
-  * DSP utils
-  */
- 
   #include "bit_depth_template.c"
   
- -static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
+ +static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels,
                                 int line_size)
   {
       const pixel *pixels = (const pixel *) _pixels;
diff --cc libavcodec/ppc/Makefile
Simple merge
diff --cc libavcodec/ppc/dsputil_altivec.c
Simple merge
diff --cc libavcodec/ppc/pixblockdsp.c

index 0000000000000000000000000000000000000000,698d655fc6a718ab4a9ec7bdb022f25aac906e9b..42c5be842eaf46f0bb5e70f8a56f1a45b062a5c3

mode 000000,100644..100644
--- /dev/null
--- 2/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@@ -1,0 -1,153 +1,153 @@@
- - * This file is part of Libav.
+ /*
+  * Copyright (c) 2002 Brian Foley
+  * Copyright (c) 2002 Dieter Shirley
+  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
+  *
- - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
- - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
- - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+ #include "config.h"
+ #if HAVE_ALTIVEC_H
+ #include <altivec.h>
+ #endif
+ 
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/ppc/cpu.h"
+ #include "libavutil/ppc/types_altivec.h"
+ #include "libavutil/ppc/util_altivec.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/pixblockdsp.h"
+ 
+ #if HAVE_ALTIVEC
+ 
+ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
+                                int line_size)
+ {
+     int i;
+     vector unsigned char perm = vec_lvsl(0, pixels);
+     const vector unsigned char zero =
+         (const vector unsigned char) vec_splat_u8(0);
+ 
+     for (i = 0; i < 8; i++) {
+         /* Read potentially unaligned pixels.
+          * We're reading 16 pixels, and actually only want 8,
+          * but we simply ignore the extras. */
+         vector unsigned char pixl = vec_ld(0, pixels);
+         vector unsigned char pixr = vec_ld(7, pixels);
+         vector unsigned char bytes = vec_perm(pixl, pixr, perm);
+ 
+         // Convert the bytes into shorts.
+         vector signed short shorts = (vector signed short) vec_mergeh(zero,
+                                                                       bytes);
+ 
+         // Save the data to the block, we assume the block is 16-byte aligned.
+         vec_st(shorts, i * 16, (vector signed short *) block);
+ 
+         pixels += line_size;
+     }
+ }
+ 
+ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
+                                 const uint8_t *s2, int stride)
+ {
+     int i;
+     vector unsigned char perm1 = vec_lvsl(0, s1);
+     vector unsigned char perm2 = vec_lvsl(0, s2);
+     const vector unsigned char zero =
+         (const vector unsigned char) vec_splat_u8(0);
+     vector signed short shorts1, shorts2;
+ 
+     for (i = 0; i < 4; i++) {
+         /* Read potentially unaligned pixels.
+          * We're reading 16 pixels, and actually only want 8,
+          * but we simply ignore the extras. */
+         vector unsigned char pixl  = vec_ld(0,  s1);
+         vector unsigned char pixr  = vec_ld(15, s1);
+         vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
+ 
+         // Convert the bytes into shorts.
+         shorts1 = (vector signed short) vec_mergeh(zero, bytes);
+ 
+         // Do the same for the second block of pixels.
+         pixl  = vec_ld(0,  s2);
+         pixr  = vec_ld(15, s2);
+         bytes = vec_perm(pixl, pixr, perm2);
+ 
+         // Convert the bytes into shorts.
+         shorts2 = (vector signed short) vec_mergeh(zero, bytes);
+ 
+         // Do the subtraction.
+         shorts1 = vec_sub(shorts1, shorts2);
+ 
+         // Save the data to the block, we assume the block is 16-byte aligned.
+         vec_st(shorts1, 0, (vector signed short *) block);
+ 
+         s1    += stride;
+         s2    += stride;
+         block += 8;
+ 
+         /* The code below is a copy of the code above...
+          * This is a manual unroll. */
+ 
+         /* Read potentially unaligned pixels.
+          * We're reading 16 pixels, and actually only want 8,
+          * but we simply ignore the extras. */
+         pixl  = vec_ld(0,  s1);
+         pixr  = vec_ld(15, s1);
+         bytes = vec_perm(pixl, pixr, perm1);
+ 
+         // Convert the bytes into shorts.
+         shorts1 = (vector signed short) vec_mergeh(zero, bytes);
+ 
+         // Do the same for the second block of pixels.
+         pixl  = vec_ld(0,  s2);
+         pixr  = vec_ld(15, s2);
+         bytes = vec_perm(pixl, pixr, perm2);
+ 
+         // Convert the bytes into shorts.
+         shorts2 = (vector signed short) vec_mergeh(zero, bytes);
+ 
+         // Do the subtraction.
+         shorts1 = vec_sub(shorts1, shorts2);
+ 
+         // Save the data to the block, we assume the block is 16-byte aligned.
+         vec_st(shorts1, 0, (vector signed short *) block);
+ 
+         s1    += stride;
+         s2    += stride;
+         block += 8;
+     }
+ }
+ 
+ #endif /* HAVE_ALTIVEC */
+ 
+ av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
+                                      AVCodecContext *avctx,
+                                      unsigned high_bit_depth)
+ {
+ #if HAVE_ALTIVEC
+     if (!PPC_ALTIVEC(av_get_cpu_flags()))
+         return;
+ 
+     c->diff_pixels = diff_pixels_altivec;
+ 
+     if (!high_bit_depth) {
+         c->get_pixels = get_pixels_altivec;
+     }
+ #endif /* HAVE_ALTIVEC */
+ }
diff --cc libavcodec/x86/Makefile

index 0843dcc77495cd556d536c45745127557a6b2bae,7c5ac3decf6b6ba5bf59fdbf0e72fa77df8bf615..44ccb2040f8194f757db93d27fa0a4f5a2f83d43
--- 1/libavcodec/x86/Makefile
--- 2/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@@ -105,11 -92,9 +106,12 @@@ YASM-OBJS-$(CONFIG_HEVC_DECODER)       
   YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \
                                             x86/hpeldsp.o
   YASM-OBJS-$(CONFIG_HUFFYUVDSP)         += x86/huffyuvdsp.o
+ +YASM-OBJS-$(CONFIG_IDCTDSP)            += x86/idctdsp.o
+ +YASM-OBJS-$(CONFIG_LLAUDDSP)           += x86/lossless_audiodsp.o
+ +YASM-OBJS-$(CONFIG_LLVIDDSP)           += x86/lossless_videodsp.o
   YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o
   YASM-OBJS-$(CONFIG_MPEGVIDEOENC)       += x86/mpegvideoencdsp.o
+ YASM-OBJS-$(CONFIG_PIXBLOCKDSP)        += x86/pixblockdsp.o
   YASM-OBJS-$(CONFIG_QPELDSP)            += x86/qpeldsp.o                 \
                                             x86/fpel.o                    \
                                             x86/qpel.o
diff --cc libavcodec/x86/dsputilenc.asm

index 13682ba5d46707161dca6f4afabd651930b1378e,8d989c26f89f3e22693ff311e5b253bf386e7a0a..023f512edd8093f4f4c1d4995d8ddc31095d9e7d
--- 1/libavcodec/x86/dsputilenc.asm
--- 2/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@@ -328,249 -323,14 +328,140 @@@ cglobal sse%1, 5,5,8, v, pix1, pix2, ls
       paddd     m7, m1
       paddd     m7, m3
   
- -    dec       r4
+ +%if %1 == mmsize
+ +    lea    pix1q, [pix1q + 2*lsizeq]
+ +    lea    pix2q, [pix2q + 2*lsizeq]
+ +%else
+ +    add    pix1q, lsizeq
+ +    add    pix2q, lsizeq
+ +%endif
+ +    dec       hd
       jnz .next2lines
   
- -    mova      m1, m7
- -    psrldq    m7, 8          ; shift hi qword to lo
- -    paddd     m7, m1
- -    mova      m1, m7
- -    psrldq    m7, 4          ; shift hi dword to lo
- -    paddd     m7, m1
+ +    HADDD     m7, m1
       movd     eax, m7         ; return value
       RET
- INIT_MMX mmx
- ; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
- cglobal get_pixels, 3,4
-     movsxdifnidn r2, r2d
-     add          r0, 128
-     mov          r3, -128
-     pxor         m7, m7
- .loop:
-     mova         m0, [r1]
-     mova         m2, [r1+r2]
-     mova         m1, m0
-     mova         m3, m2
-     punpcklbw    m0, m7
-     punpckhbw    m1, m7
-     punpcklbw    m2, m7
-     punpckhbw    m3, m7
-     mova [r0+r3+ 0], m0
-     mova [r0+r3+ 8], m1
-     mova [r0+r3+16], m2
-     mova [r0+r3+24], m3
-     lea          r1, [r1+r2*2]
-     add          r3, 32
-     js .loop
-     REP_RET
- 
- INIT_XMM sse2
- cglobal get_pixels, 3, 4, 5
-     movsxdifnidn r2, r2d
-     lea          r3, [r2*3]
-     pxor         m4, m4
-     movh         m0, [r1]
-     movh         m1, [r1+r2]
-     movh         m2, [r1+r2*2]
-     movh         m3, [r1+r3]
-     lea          r1, [r1+r2*4]
-     punpcklbw    m0, m4
-     punpcklbw    m1, m4
-     punpcklbw    m2, m4
-     punpcklbw    m3, m4
-     mova       [r0], m0
-     mova  [r0+0x10], m1
-     mova  [r0+0x20], m2
-     mova  [r0+0x30], m3
-     movh         m0, [r1]
-     movh         m1, [r1+r2*1]
-     movh         m2, [r1+r2*2]
-     movh         m3, [r1+r3]
-     punpcklbw    m0, m4
-     punpcklbw    m1, m4
-     punpcklbw    m2, m4
-     punpcklbw    m3, m4
-     mova  [r0+0x40], m0
-     mova  [r0+0x50], m1
-     mova  [r0+0x60], m2
-     mova  [r0+0x70], m3
-     RET
- 
- INIT_MMX mmx
- ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- ;                         int stride);
- cglobal diff_pixels, 4,5
-     movsxdifnidn r3, r3d
-     pxor         m7, m7
-     add          r0,  128
-     mov          r4, -128
- .loop:
-     mova         m0, [r1]
-     mova         m2, [r2]
-     mova         m1, m0
-     mova         m3, m2
-     punpcklbw    m0, m7
-     punpckhbw    m1, m7
-     punpcklbw    m2, m7
-     punpckhbw    m3, m7
-     psubw        m0, m2
-     psubw        m1, m3
-     mova  [r0+r4+0], m0
-     mova  [r0+r4+8], m1
-     add          r1, r3
-     add          r2, r3
-     add          r4, 16
-     jne .loop
-     REP_RET
- 
- INIT_XMM sse2
- cglobal diff_pixels, 4, 5, 5
-     movsxdifnidn r3, r3d
-     pxor         m4, m4
-     add          r0,  128
-     mov          r4, -128
- .loop:
-     movh         m0, [r1]
-     movh         m2, [r2]
-     movh         m1, [r1+r3]
-     movh         m3, [r2+r3]
-     punpcklbw    m0, m4
-     punpcklbw    m1, m4
-     punpcklbw    m2, m4
-     punpcklbw    m3, m4
-     psubw        m0, m2
-     psubw        m1, m3
-     mova [r0+r4+0 ], m0
-     mova [r0+r4+16], m1
-     lea          r1, [r1+r3*2]
-     lea          r2, [r2+r3*2]
-     add          r4, 32
-     jne .loop
-     RET
- 
+ +%endmacro
+ +
+ +INIT_MMX mmx
+ +SUM_SQUARED_ERRORS 8
+ +
+ +INIT_MMX mmx
+ +SUM_SQUARED_ERRORS 16
+ +
+ +INIT_XMM sse2
+ +SUM_SQUARED_ERRORS 16
+ +
+ +;-----------------------------------------------
+ +;int ff_sum_abs_dctelem(int16_t *block)
+ +;-----------------------------------------------
+ +; %1 = number of xmm registers used
+ +; %2 = number of inline loops
+ +
+ +%macro SUM_ABS_DCTELEM 2
+ +cglobal sum_abs_dctelem, 1, 1, %1, block
+ +    pxor    m0, m0
+ +    pxor    m1, m1
+ +%assign %%i 0
+ +%rep %2
+ +    mova      m2, [blockq+mmsize*(0+%%i)]
+ +    mova      m3, [blockq+mmsize*(1+%%i)]
+ +    mova      m4, [blockq+mmsize*(2+%%i)]
+ +    mova      m5, [blockq+mmsize*(3+%%i)]
+ +    ABS1_SUM  m2, m6, m0
+ +    ABS1_SUM  m3, m6, m1
+ +    ABS1_SUM  m4, m6, m0
+ +    ABS1_SUM  m5, m6, m1
+ +%assign %%i %%i+4
+ +%endrep
+ +    paddusw m0, m1
+ +    HSUM    m0, m1, eax
+ +    and     eax, 0xFFFF
+ +    RET
+ +%endmacro
+ +
+ +INIT_MMX mmx
+ +SUM_ABS_DCTELEM 0, 4
+ +INIT_MMX mmxext
+ +SUM_ABS_DCTELEM 0, 4
+ +INIT_XMM sse2
+ +SUM_ABS_DCTELEM 7, 2
+ +INIT_XMM ssse3
+ +SUM_ABS_DCTELEM 6, 2
+ +
+ +;------------------------------------------------------------------------------
+ +; int ff_hf_noise*_mmx(uint8_t *pix1, int lsize, int h)
+ +;------------------------------------------------------------------------------
+ +; %1 = 8/16. %2-5=m#
+ +%macro HF_NOISE_PART1 5
+ +    mova      m%2, [pix1q]
+ +%if %1 == 8
+ +    mova      m%3, m%2
+ +    psllq     m%2, 8
+ +    psrlq     m%3, 8
+ +    psrlq     m%2, 8
+ +%else
+ +    mova      m%3, [pix1q+1]
+ +%endif
+ +    mova      m%4, m%2
+ +    mova      m%5, m%3
+ +    punpcklbw m%2, m7
+ +    punpcklbw m%3, m7
+ +    punpckhbw m%4, m7
+ +    punpckhbw m%5, m7
+ +    psubw     m%2, m%3
+ +    psubw     m%4, m%5
+ +%endmacro
+ +
+ +; %1-2 = m#
+ +%macro HF_NOISE_PART2 4
+ +    psubw     m%1, m%3
+ +    psubw     m%2, m%4
+ +    pxor       m3, m3
+ +    pxor       m1, m1
+ +    pcmpgtw    m3, m%1
+ +    pcmpgtw    m1, m%2
+ +    pxor      m%1, m3
+ +    pxor      m%2, m1
+ +    psubw     m%1, m3
+ +    psubw     m%2, m1
+ +    paddw     m%2, m%1
+ +    paddw      m6, m%2
+ +%endmacro
+ +
+ +; %1 = 8/16
+ +%macro HF_NOISE 1
+ +cglobal hf_noise%1, 3,3,0, pix1, lsize, h
+ +    movsxdifnidn lsizeq, lsized
+ +    sub        hd, 2
+ +    pxor       m7, m7
+ +    pxor       m6, m6
+ +    HF_NOISE_PART1 %1, 0, 1, 2, 3
+ +    add     pix1q, lsizeq
+ +    HF_NOISE_PART1 %1, 4, 1, 5, 3
+ +    HF_NOISE_PART2     0, 2, 4, 5
+ +    add     pix1q, lsizeq
+ +.loop:
+ +    HF_NOISE_PART1 %1, 0, 1, 2, 3
+ +    HF_NOISE_PART2     4, 5, 0, 2
+ +    add     pix1q, lsizeq
+ +    HF_NOISE_PART1 %1, 4, 1, 5, 3
+ +    HF_NOISE_PART2     0, 2, 4, 5
+ +    add     pix1q, lsizeq
+ +    sub        hd, 2
+ +        jne .loop
+ +
+ +    mova       m0, m6
+ +    punpcklwd  m0, m7
+ +    punpckhwd  m6, m7
+ +    paddd      m6, m0
+ +    mova       m0, m6
+ +    psrlq      m6, 32
+ +    paddd      m0, m6
+ +    movd      eax, m0   ; eax = result of hf_noise8;
+ +    REP_RET                 ; return eax;
+ +%endmacro
+ +
+ +INIT_MMX mmx
+ +HF_NOISE 8
+ +HF_NOISE 16
diff --cc libavcodec/x86/dsputilenc_mmx.c

index d0936595d0bf08f8151b93f8124975067f84dc22,5a7d911ca864548058356e9bb62c75bc7dd6994b..5d48a78daa0065d47083e11cf095f97bf1f80f88
--- 1/libavcodec/x86/dsputilenc_mmx.c
--- 2/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@@ -30,37 -30,381 +30,31 @@@
   #include "libavcodec/mpegvideo.h"
   #include "dsputil_x86.h"
   
- void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
- void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
- void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                         int stride);
- void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                          int stride);
- -#if HAVE_INLINE_ASM
- -
- -static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
- -                    int line_size, int h)
- -{
- -    int tmp;
- -
- -    __asm__ volatile (
- -        "movl         %4, %%ecx          \n"
- -        "shr          $1, %%ecx          \n"
- -        "pxor      %%mm0, %%mm0          \n" /* mm0 = 0 */
- -        "pxor      %%mm7, %%mm7          \n" /* mm7 holds the sum */
- -        "1:                              \n"
- -        "movq       (%0), %%mm1          \n" /* mm1 = pix1[0][0 - 7] */
- -        "movq       (%1), %%mm2          \n" /* mm2 = pix2[0][0 - 7] */
- -        "movq   (%0, %3), %%mm3          \n" /* mm3 = pix1[1][0 - 7] */
- -        "movq   (%1, %3), %%mm4          \n" /* mm4 = pix2[1][0 - 7] */
- -
- -        /* todo: mm1-mm2, mm3-mm4 */
- -        /* algo: subtract mm1 from mm2 with saturation and vice versa */
- -        /*       OR the results to get absolute difference */
- -        "movq      %%mm1, %%mm5          \n"
- -        "movq      %%mm3, %%mm6          \n"
- -        "psubusb   %%mm2, %%mm1          \n"
- -        "psubusb   %%mm4, %%mm3          \n"
- -        "psubusb   %%mm5, %%mm2          \n"
- -        "psubusb   %%mm6, %%mm4          \n"
- -
- -        "por       %%mm1, %%mm2          \n"
- -        "por       %%mm3, %%mm4          \n"
- -
- -        /* now convert to 16-bit vectors so we can square them */
- -        "movq      %%mm2, %%mm1          \n"
- -        "movq      %%mm4, %%mm3          \n"
- -
- -        "punpckhbw %%mm0, %%mm2          \n"
- -        "punpckhbw %%mm0, %%mm4          \n"
- -        "punpcklbw %%mm0, %%mm1          \n" /* mm1 now spread over (mm1, mm2) */
- -        "punpcklbw %%mm0, %%mm3          \n" /* mm4 now spread over (mm3, mm4) */
- -
- -        "pmaddwd   %%mm2, %%mm2          \n"
- -        "pmaddwd   %%mm4, %%mm4          \n"
- -        "pmaddwd   %%mm1, %%mm1          \n"
- -        "pmaddwd   %%mm3, %%mm3          \n"
- -
- -        "lea (%0, %3, 2), %0             \n" /* pix1 += 2 * line_size */
- -        "lea (%1, %3, 2), %1             \n" /* pix2 += 2 * line_size */
- -
- -        "paddd     %%mm2, %%mm1          \n"
- -        "paddd     %%mm4, %%mm3          \n"
- -        "paddd     %%mm1, %%mm7          \n"
- -        "paddd     %%mm3, %%mm7          \n"
- -
- -        "decl      %%ecx                 \n"
- -        "jnz       1b                    \n"
- -
- -        "movq      %%mm7, %%mm1          \n"
- -        "psrlq       $32, %%mm7          \n" /* shift hi dword to lo */
- -        "paddd     %%mm7, %%mm1          \n"
- -        "movd      %%mm1, %2             \n"
- -        : "+r" (pix1), "+r" (pix2), "=r" (tmp)
- -        : "r" ((x86_reg) line_size), "m" (h)
- -        : "%ecx");
- -
- -    return tmp;
- -}
- -
- -static int sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
- -                     int line_size, int h)
- -{
- -    int tmp;
- -
- -    __asm__ volatile (
- -        "movl %4, %%ecx\n"
- -        "pxor %%mm0, %%mm0\n"    /* mm0 = 0 */
- -        "pxor %%mm7, %%mm7\n"    /* mm7 holds the sum */
- -        "1:\n"
- -        "movq (%0), %%mm1\n"     /* mm1 = pix1[0 -  7] */
- -        "movq (%1), %%mm2\n"     /* mm2 = pix2[0 -  7] */
- -        "movq 8(%0), %%mm3\n"    /* mm3 = pix1[8 - 15] */
- -        "movq 8(%1), %%mm4\n"    /* mm4 = pix2[8 - 15] */
- -
- -        /* todo: mm1-mm2, mm3-mm4 */
- -        /* algo: subtract mm1 from mm2 with saturation and vice versa */
- -        /*       OR the results to get absolute difference */
- -        "movq %%mm1, %%mm5\n"
- -        "movq %%mm3, %%mm6\n"
- -        "psubusb %%mm2, %%mm1\n"
- -        "psubusb %%mm4, %%mm3\n"
- -        "psubusb %%mm5, %%mm2\n"
- -        "psubusb %%mm6, %%mm4\n"
- -
- -        "por %%mm1, %%mm2\n"
- -        "por %%mm3, %%mm4\n"
- -
- -        /* now convert to 16-bit vectors so we can square them */
- -        "movq %%mm2, %%mm1\n"
- -        "movq %%mm4, %%mm3\n"
- -
- -        "punpckhbw %%mm0, %%mm2\n"
- -        "punpckhbw %%mm0, %%mm4\n"
- -        "punpcklbw %%mm0, %%mm1\n" /* mm1 now spread over (mm1, mm2) */
- -        "punpcklbw %%mm0, %%mm3\n" /* mm4 now spread over (mm3, mm4) */
- -
- -        "pmaddwd %%mm2, %%mm2\n"
- -        "pmaddwd %%mm4, %%mm4\n"
- -        "pmaddwd %%mm1, %%mm1\n"
- -        "pmaddwd %%mm3, %%mm3\n"
- -
- -        "add %3, %0\n"
- -        "add %3, %1\n"
- -
- -        "paddd %%mm2, %%mm1\n"
- -        "paddd %%mm4, %%mm3\n"
- -        "paddd %%mm1, %%mm7\n"
- -        "paddd %%mm3, %%mm7\n"
- -
- -        "decl %%ecx\n"
- -        "jnz 1b\n"
- -
- -        "movq %%mm7, %%mm1\n"
- -        "psrlq $32, %%mm7\n"    /* shift hi dword to lo */
- -        "paddd %%mm7, %%mm1\n"
- -        "movd %%mm1, %2\n"
- -        : "+r" (pix1), "+r" (pix2), "=r" (tmp)
- -        : "r" ((x86_reg) line_size), "m" (h)
- -        : "%ecx");
- -
- -    return tmp;
- -}
- -
- -static int hf_noise8_mmx(uint8_t *pix1, int line_size, int h)
- -{
- -    int tmp;
- -
- -    __asm__ volatile (
- -        "movl %3, %%ecx\n"
- -        "pxor %%mm7, %%mm7\n"
- -        "pxor %%mm6, %%mm6\n"
- -
- -        "movq (%0), %%mm0\n"
- -        "movq %%mm0, %%mm1\n"
- -        "psllq $8, %%mm0\n"
- -        "psrlq $8, %%mm1\n"
- -        "psrlq $8, %%mm0\n"
- -        "movq %%mm0, %%mm2\n"
- -        "movq %%mm1, %%mm3\n"
- -        "punpcklbw %%mm7, %%mm0\n"
- -        "punpcklbw %%mm7, %%mm1\n"
- -        "punpckhbw %%mm7, %%mm2\n"
- -        "punpckhbw %%mm7, %%mm3\n"
- -        "psubw %%mm1, %%mm0\n"
- -        "psubw %%mm3, %%mm2\n"
- -
- -        "add %2, %0\n"
- -
- -        "movq (%0), %%mm4\n"
- -        "movq %%mm4, %%mm1\n"
- -        "psllq $8, %%mm4\n"
- -        "psrlq $8, %%mm1\n"
- -        "psrlq $8, %%mm4\n"
- -        "movq %%mm4, %%mm5\n"
- -        "movq %%mm1, %%mm3\n"
- -        "punpcklbw %%mm7, %%mm4\n"
- -        "punpcklbw %%mm7, %%mm1\n"
- -        "punpckhbw %%mm7, %%mm5\n"
- -        "punpckhbw %%mm7, %%mm3\n"
- -        "psubw %%mm1, %%mm4\n"
- -        "psubw %%mm3, %%mm5\n"
- -        "psubw %%mm4, %%mm0\n"
- -        "psubw %%mm5, %%mm2\n"
- -        "pxor %%mm3, %%mm3\n"
- -        "pxor %%mm1, %%mm1\n"
- -        "pcmpgtw %%mm0, %%mm3\n\t"
- -        "pcmpgtw %%mm2, %%mm1\n\t"
- -        "pxor %%mm3, %%mm0\n"
- -        "pxor %%mm1, %%mm2\n"
- -        "psubw %%mm3, %%mm0\n"
- -        "psubw %%mm1, %%mm2\n"
- -        "paddw %%mm0, %%mm2\n"
- -        "paddw %%mm2, %%mm6\n"
- -
- -        "add %2, %0\n"
- -        "1:\n"
- -
- -        "movq (%0), %%mm0\n"
- -        "movq %%mm0, %%mm1\n"
- -        "psllq $8, %%mm0\n"
- -        "psrlq $8, %%mm1\n"
- -        "psrlq $8, %%mm0\n"
- -        "movq %%mm0, %%mm2\n"
- -        "movq %%mm1, %%mm3\n"
- -        "punpcklbw %%mm7, %%mm0\n"
- -        "punpcklbw %%mm7, %%mm1\n"
- -        "punpckhbw %%mm7, %%mm2\n"
- -        "punpckhbw %%mm7, %%mm3\n"
- -        "psubw %%mm1, %%mm0\n"
- -        "psubw %%mm3, %%mm2\n"
- -        "psubw %%mm0, %%mm4\n"
- -        "psubw %%mm2, %%mm5\n"
- -        "pxor  %%mm3, %%mm3\n"
- -        "pxor  %%mm1, %%mm1\n"
- -        "pcmpgtw %%mm4, %%mm3\n\t"
- -        "pcmpgtw %%mm5, %%mm1\n\t"
- -        "pxor  %%mm3, %%mm4\n"
- -        "pxor  %%mm1, %%mm5\n"
- -        "psubw %%mm3, %%mm4\n"
- -        "psubw %%mm1, %%mm5\n"
- -        "paddw %%mm4, %%mm5\n"
- -        "paddw %%mm5, %%mm6\n"
- -
- -        "add %2, %0\n"
- -
- -        "movq (%0), %%mm4\n"
- -        "movq      %%mm4, %%mm1\n"
- -        "psllq $8, %%mm4\n"
- -        "psrlq $8, %%mm1\n"
- -        "psrlq $8, %%mm4\n"
- -        "movq      %%mm4, %%mm5\n"
- -        "movq      %%mm1, %%mm3\n"
- -        "punpcklbw %%mm7, %%mm4\n"
- -        "punpcklbw %%mm7, %%mm1\n"
- -        "punpckhbw %%mm7, %%mm5\n"
- -        "punpckhbw %%mm7, %%mm3\n"
- -        "psubw     %%mm1, %%mm4\n"
- -        "psubw     %%mm3, %%mm5\n"
- -        "psubw     %%mm4, %%mm0\n"
- -        "psubw     %%mm5, %%mm2\n"
- -        "pxor      %%mm3, %%mm3\n"
- -        "pxor      %%mm1, %%mm1\n"
- -        "pcmpgtw   %%mm0, %%mm3\n\t"
- -        "pcmpgtw   %%mm2, %%mm1\n\t"
- -        "pxor      %%mm3, %%mm0\n"
- -        "pxor      %%mm1, %%mm2\n"
- -        "psubw     %%mm3, %%mm0\n"
- -        "psubw     %%mm1, %%mm2\n"
- -        "paddw     %%mm0, %%mm2\n"
- -        "paddw     %%mm2, %%mm6\n"
- -
- -        "add  %2, %0\n"
- -        "subl $2, %%ecx\n"
- -        " jnz 1b\n"
- -
- -        "movq      %%mm6, %%mm0\n"
- -        "punpcklwd %%mm7, %%mm0\n"
- -        "punpckhwd %%mm7, %%mm6\n"
- -        "paddd     %%mm0, %%mm6\n"
- -
- -        "movq  %%mm6, %%mm0\n"
- -        "psrlq $32,   %%mm6\n"
- -        "paddd %%mm6, %%mm0\n"
- -        "movd  %%mm0, %1\n"
- -        : "+r" (pix1), "=r" (tmp)
- -        : "r" ((x86_reg) line_size), "g" (h - 2)
- -        : "%ecx");
- -
- -    return tmp;
- -}
- -
- -static int hf_noise16_mmx(uint8_t *pix1, int line_size, int h)
- -{
- -    int tmp;
- -    uint8_t *pix = pix1;
- -
- -    __asm__ volatile (
- -        "movl %3, %%ecx\n"
- -        "pxor %%mm7, %%mm7\n"
- -        "pxor %%mm6, %%mm6\n"
- -
- -        "movq (%0), %%mm0\n"
- -        "movq 1(%0), %%mm1\n"
- -        "movq %%mm0, %%mm2\n"
- -        "movq %%mm1, %%mm3\n"
- -        "punpcklbw %%mm7, %%mm0\n"
- -        "punpcklbw %%mm7, %%mm1\n"
- -        "punpckhbw %%mm7, %%mm2\n"
- -        "punpckhbw %%mm7, %%mm3\n"
- -        "psubw %%mm1, %%mm0\n"
- -        "psubw %%mm3, %%mm2\n"
- -
- -        "add %2, %0\n"
- -
- -        "movq (%0), %%mm4\n"
- -        "movq 1(%0), %%mm1\n"
- -        "movq %%mm4, %%mm5\n"
- -        "movq %%mm1, %%mm3\n"
- -        "punpcklbw %%mm7, %%mm4\n"
- -        "punpcklbw %%mm7, %%mm1\n"
- -        "punpckhbw %%mm7, %%mm5\n"
- -        "punpckhbw %%mm7, %%mm3\n"
- -        "psubw %%mm1, %%mm4\n"
- -        "psubw %%mm3, %%mm5\n"
- -        "psubw %%mm4, %%mm0\n"
- -        "psubw %%mm5, %%mm2\n"
- -        "pxor %%mm3, %%mm3\n"
- -        "pxor %%mm1, %%mm1\n"
- -        "pcmpgtw %%mm0, %%mm3\n\t"
- -        "pcmpgtw %%mm2, %%mm1\n\t"
- -        "pxor %%mm3, %%mm0\n"
- -        "pxor %%mm1, %%mm2\n"
- -        "psubw %%mm3, %%mm0\n"
- -        "psubw %%mm1, %%mm2\n"
- -        "paddw %%mm0, %%mm2\n"
- -        "paddw %%mm2, %%mm6\n"
- -
- -        "add %2, %0\n"
- -        "1:\n"
- -
- -        "movq (%0), %%mm0\n"
- -        "movq 1(%0), %%mm1\n"
- -        "movq %%mm0, %%mm2\n"
- -        "movq %%mm1, %%mm3\n"
- -        "punpcklbw %%mm7, %%mm0\n"
- -        "punpcklbw %%mm7, %%mm1\n"
- -        "punpckhbw %%mm7, %%mm2\n"
- -        "punpckhbw %%mm7, %%mm3\n"
- -        "psubw %%mm1, %%mm0\n"
- -        "psubw %%mm3, %%mm2\n"
- -        "psubw %%mm0, %%mm4\n"
- -        "psubw %%mm2, %%mm5\n"
- -        "pxor %%mm3, %%mm3\n"
- -        "pxor %%mm1, %%mm1\n"
- -        "pcmpgtw %%mm4, %%mm3\n\t"
- -        "pcmpgtw %%mm5, %%mm1\n\t"
- -        "pxor %%mm3, %%mm4\n"
- -        "pxor %%mm1, %%mm5\n"
- -        "psubw %%mm3, %%mm4\n"
- -        "psubw %%mm1, %%mm5\n"
- -        "paddw %%mm4, %%mm5\n"
- -        "paddw %%mm5, %%mm6\n"
- -
- -        "add %2, %0\n"
- -
- -        "movq (%0), %%mm4\n"
- -        "movq 1(%0), %%mm1\n"
- -        "movq %%mm4, %%mm5\n"
- -        "movq %%mm1, %%mm3\n"
- -        "punpcklbw %%mm7, %%mm4\n"
- -        "punpcklbw %%mm7, %%mm1\n"
- -        "punpckhbw %%mm7, %%mm5\n"
- -        "punpckhbw %%mm7, %%mm3\n"
- -        "psubw %%mm1, %%mm4\n"
- -        "psubw %%mm3, %%mm5\n"
- -        "psubw %%mm4, %%mm0\n"
- -        "psubw %%mm5, %%mm2\n"
- -        "pxor %%mm3, %%mm3\n"
- -        "pxor %%mm1, %%mm1\n"
- -        "pcmpgtw %%mm0, %%mm3\n\t"
- -        "pcmpgtw %%mm2, %%mm1\n\t"
- -        "pxor %%mm3, %%mm0\n"
- -        "pxor %%mm1, %%mm2\n"
- -        "psubw %%mm3, %%mm0\n"
- -        "psubw %%mm1, %%mm2\n"
- -        "paddw %%mm0, %%mm2\n"
- -        "paddw %%mm2, %%mm6\n"
- -
- -        "add %2, %0\n"
- -        "subl $2, %%ecx\n"
- -        " jnz 1b\n"
+ +int ff_sum_abs_dctelem_mmx(int16_t *block);
+ +int ff_sum_abs_dctelem_mmxext(int16_t *block);
+ +int ff_sum_abs_dctelem_sse2(int16_t *block);
+ +int ff_sum_abs_dctelem_ssse3(int16_t *block);
+ +int ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ +                int line_size, int h);
+ +int ff_sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ +                 int line_size, int h);
+ +int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+ +                  int line_size, int h);
+ +int ff_hf_noise8_mmx(uint8_t *pix1, int lsize, int h);
+ +int ff_hf_noise16_mmx(uint8_t *pix1, int lsize, int h);
   
- -        "movq %%mm6, %%mm0\n"
- -        "punpcklwd %%mm7, %%mm0\n"
- -        "punpckhwd %%mm7, %%mm6\n"
- -        "paddd %%mm0, %%mm6\n"
+ +#define hadamard_func(cpu)                                              \
+ +    int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1,     \
+ +                                  uint8_t *src2, int stride, int h);    \
+ +    int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1,   \
+ +                                    uint8_t *src2, int stride, int h);
   
- -        "movq %%mm6, %%mm0\n"
- -        "psrlq $32, %%mm6\n"
- -        "paddd %%mm6, %%mm0\n"
- -        "movd %%mm0, %1\n"
- -        : "+r" (pix1), "=r" (tmp)
- -        : "r" ((x86_reg) line_size), "g" (h - 2)
- -        : "%ecx");
+ +hadamard_func(mmx)
+ +hadamard_func(mmxext)
+ +hadamard_func(sse2)
+ +hadamard_func(ssse3)
   
- -    return tmp + hf_noise8_mmx(pix + 8, line_size, h);
- -}
+ +#if HAVE_YASM
   
   static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
                         int line_size, int h)
@@@ -353,20 -818,16 +347,10 @@@ av_cold void ff_dsputilenc_init_mmx(DSP
   {
       int cpu_flags = av_get_cpu_flags();
   
-     if (EXTERNAL_MMX(cpu_flags)) {
-         if (!high_bit_depth)
-             c->get_pixels = ff_get_pixels_mmx;
-         c->diff_pixels = ff_diff_pixels_mmx;
-     }
- 
-     if (EXTERNAL_SSE2(cpu_flags))
-         if (!high_bit_depth)
-             c->get_pixels = ff_get_pixels_sse2;
- 
   #if HAVE_INLINE_ASM
       if (INLINE_MMX(cpu_flags)) {
- -        c->sum_abs_dctelem = sum_abs_dctelem_mmx;
- -
- -        c->sse[0]  = sse16_mmx;
- -        c->sse[1]  = sse8_mmx;
           c->vsad[4] = vsad_intra16_mmx;
   
- -        c->nsse[0] = nsse16_mmx;
- -        c->nsse[1] = nsse8_mmx;
           if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
               c->vsad[0]      = vsad16_mmx;
           }
@@@ -409,8 -865,6 +393,7 @@@
   
       if (EXTERNAL_SSE2(cpu_flags)) {
           c->sse[0] = ff_sse16_sse2;
-         c->diff_pixels = ff_diff_pixels_sse2;
+ +        c->sum_abs_dctelem   = ff_sum_abs_dctelem_sse2;
   
   #if HAVE_ALIGNED_STACK
           c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
diff --cc libavcodec/x86/pixblockdsp.asm

index 0000000000000000000000000000000000000000,c8fd1b24a13008f0954585d4d07597c14ae09202..00ee9b4ac23f621352d96e43dac94100b6a16894

mode 000000,100644..100644
--- /dev/null
--- 2/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@@ -1,0 -1,110 +1,135 @@@
- -;* This file is part of Libav.
+ ;*****************************************************************************
+ ;* SIMD-optimized pixel operations
+ ;*****************************************************************************
+ ;* Copyright (c) 2000, 2001 Fabrice Bellard
+ ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ ;*
- -;* Libav is free software; you can redistribute it and/or
++;* This file is part of FFmpeg.
+ ;*
- -;* Libav is distributed in the hope that it will be useful,
++;* FFmpeg is free software; you can redistribute it and/or
+ ;* modify it under the terms of the GNU Lesser General Public
+ ;* License as published by the Free Software Foundation; either
+ ;* version 2.1 of the License, or (at your option) any later version.
+ ;*
- -;* License along with Libav; if not, write to the Free Software
++;* FFmpeg is distributed in the hope that it will be useful,
+ ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ ;* Lesser General Public License for more details.
+ ;*
+ ;* You should have received a copy of the GNU Lesser General Public
- -cglobal get_pixels, 3, 4
++;* License along with FFmpeg; if not, write to the Free Software
+ ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ;*****************************************************************************
+ 
+ %include "libavutil/x86/x86util.asm"
+ 
+ SECTION .text
+ 
+ INIT_MMX mmx
+ ; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
+ cglobal get_pixels, 3,4
+     movsxdifnidn r2, r2d
+     add          r0, 128
+     mov          r3, -128
+     pxor         m7, m7
+ .loop:
+     mova         m0, [r1]
+     mova         m2, [r1+r2]
+     mova         m1, m0
+     mova         m3, m2
+     punpcklbw    m0, m7
+     punpckhbw    m1, m7
+     punpcklbw    m2, m7
+     punpckhbw    m3, m7
+     mova [r0+r3+ 0], m0
+     mova [r0+r3+ 8], m1
+     mova [r0+r3+16], m2
+     mova [r0+r3+24], m3
+     lea          r1, [r1+r2*2]
+     add          r3, 32
+     js .loop
+     REP_RET
+ 
+ INIT_XMM sse2
++cglobal get_pixels, 3, 4, 5
+     movsxdifnidn r2, r2d
+     lea          r3, [r2*3]
+     pxor         m4, m4
+     movh         m0, [r1]
+     movh         m1, [r1+r2]
+     movh         m2, [r1+r2*2]
+     movh         m3, [r1+r3]
+     lea          r1, [r1+r2*4]
+     punpcklbw    m0, m4
+     punpcklbw    m1, m4
+     punpcklbw    m2, m4
+     punpcklbw    m3, m4
+     mova       [r0], m0
+     mova  [r0+0x10], m1
+     mova  [r0+0x20], m2
+     mova  [r0+0x30], m3
+     movh         m0, [r1]
+     movh         m1, [r1+r2*1]
+     movh         m2, [r1+r2*2]
+     movh         m3, [r1+r3]
+     punpcklbw    m0, m4
+     punpcklbw    m1, m4
+     punpcklbw    m2, m4
+     punpcklbw    m3, m4
+     mova  [r0+0x40], m0
+     mova  [r0+0x50], m1
+     mova  [r0+0x60], m2
+     mova  [r0+0x70], m3
+     RET
+ 
+ INIT_MMX mmx
+ ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+ ;                         int stride);
+ cglobal diff_pixels, 4,5
+     movsxdifnidn r3, r3d
+     pxor         m7, m7
+     add          r0,  128
+     mov          r4, -128
+ .loop:
+     mova         m0, [r1]
+     mova         m2, [r2]
+     mova         m1, m0
+     mova         m3, m2
+     punpcklbw    m0, m7
+     punpckhbw    m1, m7
+     punpcklbw    m2, m7
+     punpckhbw    m3, m7
+     psubw        m0, m2
+     psubw        m1, m3
+     mova  [r0+r4+0], m0
+     mova  [r0+r4+8], m1
+     add          r1, r3
+     add          r2, r3
+     add          r4, 16
+     jne .loop
+     REP_RET
++
++INIT_XMM sse2
++cglobal diff_pixels, 4, 5, 5
++    movsxdifnidn r3, r3d
++    pxor         m4, m4
++    add          r0,  128
++    mov          r4, -128
++.loop:
++    movh         m0, [r1]
++    movh         m2, [r2]
++    movh         m1, [r1+r3]
++    movh         m3, [r2+r3]
++    punpcklbw    m0, m4
++    punpcklbw    m1, m4
++    punpcklbw    m2, m4
++    punpcklbw    m3, m4
++    psubw        m0, m2
++    psubw        m1, m3
++    mova [r0+r4+0 ], m0
++    mova [r0+r4+16], m1
++    lea          r1, [r1+r3*2]
++    lea          r2, [r2+r3*2]
++    add          r4, 32
++    jne .loop
++    RET
diff --cc libavcodec/x86/pixblockdsp_init.c

index 0000000000000000000000000000000000000000,9582e0b5c28700bd58ec0df4554807715daaee8b..4c31b802ff12b794b596916145549f072d8a9cdb

mode 000000,100644..100644
--- /dev/null
--- 2/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@@ -1,0 -1,47 +1,50 @@@
- - * This file is part of Libav.
+ /*
+  * SIMD-optimized pixel operations
+  *
- - * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+  *
- - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
- - * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/x86/cpu.h"
+ #include "libavcodec/pixblockdsp.h"
+ 
+ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
+ void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
+ void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+                         int stride);
++void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
++                         int stride);
+ 
+ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
+                                      AVCodecContext *avctx,
+                                      unsigned high_bit_depth)
+ {
+     int cpu_flags = av_get_cpu_flags();
+ 
+     if (EXTERNAL_MMX(cpu_flags)) {
+         if (!high_bit_depth)
+             c->get_pixels = ff_get_pixels_mmx;
+         c->diff_pixels = ff_diff_pixels_mmx;
+     }
+ 
+     if (EXTERNAL_SSE2(cpu_flags)) {
+         if (!high_bit_depth)
+             c->get_pixels = ff_get_pixels_sse2;
++        c->diff_pixels = ff_diff_pixels_sse2;
+     }
+ }
diff --cc libavfilter/vf_mpdecimate.c

index 099622a029542347538317d4831f96321b52efb5,0000000000000000000000000000000000000000..c667a9f4ccdd979a212d572541300ded65a7742f

mode 100644,000000..100644
--- 1/libavfilter/vf_mpdecimate.c
--- /dev/null
+++ b/libavfilter/vf_mpdecimate.c
@@@ -1,257 -1,0 +1,261 @@@
-             dspctx->diff_pixels(block,
+ +/*
+ + * Copyright (c) 2003 Rich Felker
+ + * Copyright (c) 2012 Stefano Sabatini
+ + *
+ + * This file is part of FFmpeg.
+ + *
+ + * FFmpeg is free software; you can redistribute it and/or modify
+ + * it under the terms of the GNU General Public License as published by
+ + * the Free Software Foundation; either version 2 of the License, or
+ + * (at your option) any later version.
+ + *
+ + * FFmpeg is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + * GNU General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License along
+ + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ + */
+ +
+ +/**
+ + * @file mpdecimate filter, ported from libmpcodecs/vf_decimate.c by
+ + * Rich Felker.
+ + */
+ +
+ +#include "libavutil/opt.h"
+ +#include "libavutil/pixdesc.h"
+ +#include "libavutil/timestamp.h"
+ +#include "libavcodec/dsputil.h"
++#include "libavcodec/pixblockdsp.h"
+ +#include "avfilter.h"
+ +#include "internal.h"
+ +#include "formats.h"
+ +#include "video.h"
+ +
+ +typedef struct {
+ +    const AVClass *class;
+ +    int lo, hi;                    ///< lower and higher threshold number of differences
+ +                                   ///< values for 8x8 blocks
+ +
+ +    float frac;                    ///< threshold of changed pixels over the total fraction
+ +
+ +    int max_drop_count;            ///< if positive: maximum number of sequential frames to drop
+ +                                   ///< if negative: minimum number of frames between two drops
+ +
+ +    int drop_count;                ///< if positive: number of frames sequentially dropped
+ +                                   ///< if negative: number of sequential frames which were not dropped
+ +
+ +    int hsub, vsub;                ///< chroma subsampling values
+ +    AVFrame *ref;                  ///< reference picture
+ +    DSPContext dspctx;             ///< context providing optimized diff routines
++    PixblockDSPContext pdsp;
+ +    AVCodecContext *avctx;         ///< codec context required for the DSPContext
+ +} DecimateContext;
+ +
+ +#define OFFSET(x) offsetof(DecimateContext, x)
+ +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+ +
+ +static const AVOption mpdecimate_options[] = {
+ +    { "max",  "set the maximum number of consecutive dropped frames (positive), or the minimum interval between dropped frames (negative)",
+ +      OFFSET(max_drop_count), AV_OPT_TYPE_INT, {.i64=0}, INT_MIN, INT_MAX, FLAGS },
+ +    { "hi",   "set high dropping threshold", OFFSET(hi), AV_OPT_TYPE_INT, {.i64=64*12}, INT_MIN, INT_MAX, FLAGS },
+ +    { "lo",   "set low dropping threshold", OFFSET(lo), AV_OPT_TYPE_INT, {.i64=64*5}, INT_MIN, INT_MAX, FLAGS },
+ +    { "frac", "set fraction dropping threshold",  OFFSET(frac), AV_OPT_TYPE_FLOAT, {.dbl=0.33}, 0, 1, FLAGS },
+ +    { NULL }
+ +};
+ +
+ +AVFILTER_DEFINE_CLASS(mpdecimate);
+ +
+ +/**
+ + * Return 1 if the two planes are different, 0 otherwise.
+ + */
+ +static int diff_planes(AVFilterContext *ctx,
+ +                       uint8_t *cur, uint8_t *ref, int linesize,
+ +                       int w, int h)
+ +{
+ +    DecimateContext *decimate = ctx->priv;
+ +    DSPContext *dspctx = &decimate->dspctx;
++    PixblockDSPContext *pdsp = &decimate->pdsp;
+ +
+ +    int x, y;
+ +    int d, c = 0;
+ +    int t = (w/16)*(h/16)*decimate->frac;
+ +    int16_t block[8*8];
+ +
+ +    /* compute difference for blocks of 8x8 bytes */
+ +    for (y = 0; y < h-7; y += 4) {
+ +        for (x = 8; x < w-7; x += 4) {
++            pdsp->diff_pixels(block,
+ +                                cur+x+y*linesize,
+ +                                ref+x+y*linesize, linesize);
+ +            d = dspctx->sum_abs_dctelem(block);
+ +            if (d > decimate->hi)
+ +                return 1;
+ +            if (d > decimate->lo) {
+ +                c++;
+ +                if (c > t)
+ +                    return 1;
+ +            }
+ +        }
+ +    }
+ +    return 0;
+ +}
+ +
+ +/**
+ + * Tell if the frame should be decimated, for example if it is no much
+ + * different with respect to the reference frame ref.
+ + */
+ +static int decimate_frame(AVFilterContext *ctx,
+ +                          AVFrame *cur, AVFrame *ref)
+ +{
+ +    DecimateContext *decimate = ctx->priv;
+ +    int plane;
+ +
+ +    if (decimate->max_drop_count > 0 &&
+ +        decimate->drop_count >= decimate->max_drop_count)
+ +        return 0;
+ +    if (decimate->max_drop_count < 0 &&
+ +        (decimate->drop_count-1) > decimate->max_drop_count)
+ +        return 0;
+ +
+ +    for (plane = 0; ref->data[plane] && ref->linesize[plane]; plane++) {
+ +        int vsub = plane == 1 || plane == 2 ? decimate->vsub : 0;
+ +        int hsub = plane == 1 || plane == 2 ? decimate->hsub : 0;
+ +        if (diff_planes(ctx,
+ +                        cur->data[plane], ref->data[plane], ref->linesize[plane],
+ +                        FF_CEIL_RSHIFT(ref->width,  hsub),
+ +                        FF_CEIL_RSHIFT(ref->height, vsub)))
+ +            return 0;
+ +    }
+ +
+ +    return 1;
+ +}
+ +
+ +static av_cold int init(AVFilterContext *ctx)
+ +{
+ +    DecimateContext *decimate = ctx->priv;
+ +
+ +    av_log(ctx, AV_LOG_VERBOSE, "max_drop_count:%d hi:%d lo:%d frac:%f\n",
+ +           decimate->max_drop_count, decimate->hi, decimate->lo, decimate->frac);
+ +
+ +    decimate->avctx = avcodec_alloc_context3(NULL);
+ +    if (!decimate->avctx)
+ +        return AVERROR(ENOMEM);
+ +    avpriv_dsputil_init(&decimate->dspctx, decimate->avctx);
++    ff_pixblockdsp_init(&decimate->pdsp, decimate->avctx);
+ +
+ +    return 0;
+ +}
+ +
+ +static av_cold void uninit(AVFilterContext *ctx)
+ +{
+ +    DecimateContext *decimate = ctx->priv;
+ +    av_frame_free(&decimate->ref);
+ +    if (decimate->avctx) {
+ +        avcodec_close(decimate->avctx);
+ +        av_freep(&decimate->avctx);
+ +    }
+ +}
+ +
+ +static int query_formats(AVFilterContext *ctx)
+ +{
+ +    static const enum AVPixelFormat pix_fmts[] = {
+ +        AV_PIX_FMT_YUV444P,      AV_PIX_FMT_YUV422P,
+ +        AV_PIX_FMT_YUV420P,      AV_PIX_FMT_YUV411P,
+ +        AV_PIX_FMT_YUV410P,      AV_PIX_FMT_YUV440P,
+ +        AV_PIX_FMT_YUVJ444P,     AV_PIX_FMT_YUVJ422P,
+ +        AV_PIX_FMT_YUVJ420P,     AV_PIX_FMT_YUVJ440P,
+ +        AV_PIX_FMT_YUVA420P,
+ +        AV_PIX_FMT_NONE
+ +    };
+ +
+ +    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+ +
+ +    return 0;
+ +}
+ +
+ +static int config_input(AVFilterLink *inlink)
+ +{
+ +    AVFilterContext *ctx = inlink->dst;
+ +    DecimateContext *decimate = ctx->priv;
+ +    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
+ +    decimate->hsub = pix_desc->log2_chroma_w;
+ +    decimate->vsub = pix_desc->log2_chroma_h;
+ +
+ +    return 0;
+ +}
+ +
+ +static int filter_frame(AVFilterLink *inlink, AVFrame *cur)
+ +{
+ +    DecimateContext *decimate = inlink->dst->priv;
+ +    AVFilterLink *outlink = inlink->dst->outputs[0];
+ +    int ret;
+ +
+ +    if (decimate->ref && decimate_frame(inlink->dst, cur, decimate->ref)) {
+ +        decimate->drop_count = FFMAX(1, decimate->drop_count+1);
+ +    } else {
+ +        av_frame_free(&decimate->ref);
+ +        decimate->ref = cur;
+ +        decimate->drop_count = FFMIN(-1, decimate->drop_count-1);
+ +
+ +        if (ret = ff_filter_frame(outlink, av_frame_clone(cur)) < 0)
+ +            return ret;
+ +    }
+ +
+ +    av_log(inlink->dst, AV_LOG_DEBUG,
+ +           "%s pts:%s pts_time:%s drop_count:%d\n",
+ +           decimate->drop_count > 0 ? "drop" : "keep",
+ +           av_ts2str(cur->pts), av_ts2timestr(cur->pts, &inlink->time_base),
+ +           decimate->drop_count);
+ +
+ +    if (decimate->drop_count > 0)
+ +        av_frame_free(&cur);
+ +
+ +    return 0;
+ +}
+ +
+ +static int request_frame(AVFilterLink *outlink)
+ +{
+ +    DecimateContext *decimate = outlink->src->priv;
+ +    AVFilterLink *inlink = outlink->src->inputs[0];
+ +    int ret;
+ +
+ +    do {
+ +        ret = ff_request_frame(inlink);
+ +    } while (decimate->drop_count > 0 && ret >= 0);
+ +
+ +    return ret;
+ +}
+ +
+ +static const AVFilterPad mpdecimate_inputs[] = {
+ +    {
+ +        .name         = "default",
+ +        .type         = AVMEDIA_TYPE_VIDEO,
+ +        .config_props = config_input,
+ +        .filter_frame = filter_frame,
+ +    },
+ +    { NULL }
+ +};
+ +
+ +static const AVFilterPad mpdecimate_outputs[] = {
+ +    {
+ +        .name          = "default",
+ +        .type          = AVMEDIA_TYPE_VIDEO,
+ +        .request_frame = request_frame,
+ +    },
+ +    { NULL }
+ +};
+ +
+ +AVFilter ff_vf_mpdecimate = {
+ +    .name          = "mpdecimate",
+ +    .description   = NULL_IF_CONFIG_SMALL("Remove near-duplicate frames."),
+ +    .init          = init,
+ +    .uninit        = uninit,
+ +    .priv_size     = sizeof(DecimateContext),
+ +    .priv_class    = &mpdecimate_class,
+ +    .query_formats = query_formats,
+ +    .inputs        = mpdecimate_inputs,
+ +    .outputs       = mpdecimate_outputs,
+ +};
diff --cc libavfilter/vf_spp.c

index 9df87ff3f26e1622261d1a4f08ce25f79afc548f,0000000000000000000000000000000000000000..4e4a5795f472f5158e132294df354a831dae5ba7

mode 100644,000000..100644
--- 1/libavfilter/vf_spp.c
--- /dev/null
+++ b/libavfilter/vf_spp.c
@@@ -1,439 -1,0 +1,439 @@@
-                 p->dsp.get_pixels(block, p->src + index, linesize);
+ +/*
+ + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ + * Copyright (c) 2013 Clément Bœsch <u pkh me>
+ + *
+ + * This file is part of FFmpeg.
+ + *
+ + * FFmpeg is free software; you can redistribute it and/or modify
+ + * it under the terms of the GNU General Public License as published by
+ + * the Free Software Foundation; either version 2 of the License, or
+ + * (at your option) any later version.
+ + *
+ + * FFmpeg is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + * GNU General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License along
+ + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ + */
+ +
+ +/**
+ + * @file
+ + * Simple post processing filter
+ + *
+ + * This implementation is based on an algorithm described in
+ + * "Aria Nosratinia Embedded Post-Processing for
+ + * Enhancement of Compressed Images (1999)"
+ + *
+ + * Originally written by Michael Niedermayer for the MPlayer project, and
+ + * ported by Clément Bœsch for FFmpeg.
+ + */
+ +
+ +#include "libavcodec/dsputil.h"
+ +#include "libavutil/avassert.h"
+ +#include "libavutil/imgutils.h"
+ +#include "libavutil/opt.h"
+ +#include "libavutil/pixdesc.h"
+ +#include "internal.h"
+ +#include "vf_spp.h"
+ +
+ +enum mode {
+ +    MODE_HARD,
+ +    MODE_SOFT,
+ +    NB_MODES
+ +};
+ +
+ +#define OFFSET(x) offsetof(SPPContext, x)
+ +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+ +static const AVOption spp_options[] = {
+ +    { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 3}, 0, MAX_LEVEL, FLAGS },
+ +    { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 63, FLAGS },
+ +    { "mode", "set thresholding mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_HARD}, 0, NB_MODES - 1, FLAGS, "mode" },
+ +        { "hard", "hard thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_HARD}, INT_MIN, INT_MAX, FLAGS, "mode" },
+ +        { "soft", "soft thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_SOFT}, INT_MIN, INT_MAX, FLAGS, "mode" },
+ +    { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
+ +    { NULL }
+ +};
+ +
+ +AVFILTER_DEFINE_CLASS(spp);
+ +
+ +// XXX: share between filters?
+ +DECLARE_ALIGNED(8, static const uint8_t, ldither)[8][8] = {
+ +    {  0,  48,  12,  60,   3,  51,  15,  63 },
+ +    { 32,  16,  44,  28,  35,  19,  47,  31 },
+ +    {  8,  56,   4,  52,  11,  59,   7,  55 },
+ +    { 40,  24,  36,  20,  43,  27,  39,  23 },
+ +    {  2,  50,  14,  62,   1,  49,  13,  61 },
+ +    { 34,  18,  46,  30,  33,  17,  45,  29 },
+ +    { 10,  58,   6,  54,   9,  57,   5,  53 },
+ +    { 42,  26,  38,  22,  41,  25,  37,  21 },
+ +};
+ +
+ +static const uint8_t offset[127][2] = {
+ +    {0,0},
+ +    {0,0}, {4,4},                                           // quality = 1
+ +    {0,0}, {2,2}, {6,4}, {4,6},                             // quality = 2
+ +    {0,0}, {5,1}, {2,2}, {7,3}, {4,4}, {1,5}, {6,6}, {3,7}, // quality = 3
+ +
+ +    {0,0}, {4,0}, {1,1}, {5,1}, {3,2}, {7,2}, {2,3}, {6,3}, // quality = 4
+ +    {0,4}, {4,4}, {1,5}, {5,5}, {3,6}, {7,6}, {2,7}, {6,7},
+ +
+ +    {0,0}, {0,2}, {0,4}, {0,6}, {1,1}, {1,3}, {1,5}, {1,7}, // quality = 5
+ +    {2,0}, {2,2}, {2,4}, {2,6}, {3,1}, {3,3}, {3,5}, {3,7},
+ +    {4,0}, {4,2}, {4,4}, {4,6}, {5,1}, {5,3}, {5,5}, {5,7},
+ +    {6,0}, {6,2}, {6,4}, {6,6}, {7,1}, {7,3}, {7,5}, {7,7},
+ +
+ +    {0,0}, {4,4}, {0,4}, {4,0}, {2,2}, {6,6}, {2,6}, {6,2}, // quality = 6
+ +    {0,2}, {4,6}, {0,6}, {4,2}, {2,0}, {6,4}, {2,4}, {6,0},
+ +    {1,1}, {5,5}, {1,5}, {5,1}, {3,3}, {7,7}, {3,7}, {7,3},
+ +    {1,3}, {5,7}, {1,7}, {5,3}, {3,1}, {7,5}, {3,5}, {7,1},
+ +    {0,1}, {4,5}, {0,5}, {4,1}, {2,3}, {6,7}, {2,7}, {6,3},
+ +    {0,3}, {4,7}, {0,7}, {4,3}, {2,1}, {6,5}, {2,5}, {6,1},
+ +    {1,0}, {5,4}, {1,4}, {5,0}, {3,2}, {7,6}, {3,6}, {7,2},
+ +    {1,2}, {5,6}, {1,6}, {5,2}, {3,0}, {7,4}, {3,4}, {7,0},
+ +};
+ +
+ +static void hardthresh_c(int16_t dst[64], const int16_t src[64],
+ +                         int qp, const uint8_t *permutation)
+ +{
+ +    int i;
+ +    int bias = 0; // FIXME
+ +
+ +    unsigned threshold1 = qp * ((1<<4) - bias) - 1;
+ +    unsigned threshold2 = threshold1 << 1;
+ +
+ +    memset(dst, 0, 64 * sizeof(dst[0]));
+ +    dst[0] = (src[0] + 4) >> 3;
+ +
+ +    for (i = 1; i < 64; i++) {
+ +        int level = src[i];
+ +        if (((unsigned)(level + threshold1)) > threshold2) {
+ +            const int j = permutation[i];
+ +            dst[j] = (level + 4) >> 3;
+ +        }
+ +    }
+ +}
+ +
+ +static void softthresh_c(int16_t dst[64], const int16_t src[64],
+ +                         int qp, const uint8_t *permutation)
+ +{
+ +    int i;
+ +    int bias = 0; //FIXME
+ +
+ +    unsigned threshold1 = qp * ((1<<4) - bias) - 1;
+ +    unsigned threshold2 = threshold1 << 1;
+ +
+ +    memset(dst, 0, 64 * sizeof(dst[0]));
+ +    dst[0] = (src[0] + 4) >> 3;
+ +
+ +    for (i = 1; i < 64; i++) {
+ +        int level = src[i];
+ +        if (((unsigned)(level + threshold1)) > threshold2) {
+ +            const int j = permutation[i];
+ +            if (level > 0) dst[j] = (level - threshold1 + 4) >> 3;
+ +            else           dst[j] = (level + threshold1 + 4) >> 3;
+ +        }
+ +    }
+ +}
+ +
+ +static void store_slice_c(uint8_t *dst, const int16_t *src,
+ +                          int dst_linesize, int src_linesize,
+ +                          int width, int height, int log2_scale,
+ +                          const uint8_t dither[8][8])
+ +{
+ +    int y, x;
+ +
+ +#define STORE(pos) do {                                                     \
+ +    temp = ((src[x + y*src_linesize + pos] << log2_scale) + d[pos]) >> 6;   \
+ +    if (temp & 0x100)                                                       \
+ +        temp = ~(temp >> 31);                                               \
+ +    dst[x + y*dst_linesize + pos] = temp;                                   \
+ +} while (0)
+ +
+ +    for (y = 0; y < height; y++) {
+ +        const uint8_t *d = dither[y];
+ +        for (x = 0; x < width; x += 8) {
+ +            int temp;
+ +            STORE(0);
+ +            STORE(1);
+ +            STORE(2);
+ +            STORE(3);
+ +            STORE(4);
+ +            STORE(5);
+ +            STORE(6);
+ +            STORE(7);
+ +        }
+ +    }
+ +}
+ +
+ +static inline void add_block(int16_t *dst, int linesize, const int16_t block[64])
+ +{
+ +    int y;
+ +
+ +    for (y = 0; y < 8; y++) {
+ +        *(uint32_t *)&dst[0 + y*linesize] += *(uint32_t *)&block[0 + y*8];
+ +        *(uint32_t *)&dst[2 + y*linesize] += *(uint32_t *)&block[2 + y*8];
+ +        *(uint32_t *)&dst[4 + y*linesize] += *(uint32_t *)&block[4 + y*8];
+ +        *(uint32_t *)&dst[6 + y*linesize] += *(uint32_t *)&block[6 + y*8];
+ +    }
+ +}
+ +
+ +// XXX: export the function?
+ +static inline int norm_qscale(int qscale, int type)
+ +{
+ +    switch (type) {
+ +    case FF_QSCALE_TYPE_MPEG1: return qscale;
+ +    case FF_QSCALE_TYPE_MPEG2: return qscale >> 1;
+ +    case FF_QSCALE_TYPE_H264:  return qscale >> 2;
+ +    case FF_QSCALE_TYPE_VP56:  return (63 - qscale + 2) >> 2;
+ +    }
+ +    return qscale;
+ +}
+ +
+ +static void filter(SPPContext *p, uint8_t *dst, uint8_t *src,
+ +                   int dst_linesize, int src_linesize, int width, int height,
+ +                   const uint8_t *qp_table, int qp_stride, int is_luma)
+ +{
+ +    int x, y, i;
+ +    const int count = 1 << p->log2_count;
+ +    const int linesize = is_luma ? p->temp_linesize : FFALIGN(width+16, 16);
+ +    DECLARE_ALIGNED(16, uint64_t, block_align)[32];
+ +    int16_t *block  = (int16_t *)block_align;
+ +    int16_t *block2 = (int16_t *)(block_align + 16);
+ +
+ +    for (y = 0; y < height; y++) {
+ +        int index = 8 + 8*linesize + y*linesize;
+ +        memcpy(p->src + index, src + y*src_linesize, width);
+ +        for (x = 0; x < 8; x++) {
+ +            p->src[index         - x - 1] = p->src[index +         x    ];
+ +            p->src[index + width + x    ] = p->src[index + width - x - 1];
+ +        }
+ +    }
+ +    for (y = 0; y < 8; y++) {
+ +        memcpy(p->src + (       7-y)*linesize, p->src + (       y+8)*linesize, linesize);
+ +        memcpy(p->src + (height+8+y)*linesize, p->src + (height-y+7)*linesize, linesize);
+ +    }
+ +
+ +    for (y = 0; y < height + 8; y += 8) {
+ +        memset(p->temp + (8 + y) * linesize, 0, 8 * linesize * sizeof(*p->temp));
+ +        for (x = 0; x < width + 8; x += 8) {
+ +            int qp;
+ +
+ +            if (p->qp) {
+ +                qp = p->qp;
+ +            } else{
+ +                const int qps = 3 + is_luma;
+ +                qp = qp_table[(FFMIN(x, width - 1) >> qps) + (FFMIN(y, height - 1) >> qps) * qp_stride];
+ +                qp = FFMAX(1, norm_qscale(qp, p->qscale_type));
+ +            }
+ +            for (i = 0; i < count; i++) {
+ +                const int x1 = x + offset[i + count - 1][0];
+ +                const int y1 = y + offset[i + count - 1][1];
+ +                const int index = x1 + y1*linesize;
-     avpriv_dsputil_init(&spp->dsp, spp->avctx);
++                p->pdsp.get_pixels(block, p->src + index, linesize);
+ +                p->fdsp.fdct(block);
+ +                p->requantize(block2, block, qp, p->idsp.idct_permutation);
+ +                p->idsp.idct(block2);
+ +                add_block(p->temp + index, linesize, block2);
+ +            }
+ +        }
+ +        if (y)
+ +            p->store_slice(dst + (y - 8) * dst_linesize, p->temp + 8 + y*linesize,
+ +                           dst_linesize, linesize, width,
+ +                           FFMIN(8, height + 8 - y), MAX_LEVEL - p->log2_count,
+ +                           ldither);
+ +    }
+ +}
+ +
+ +static int query_formats(AVFilterContext *ctx)
+ +{
+ +    static const enum PixelFormat pix_fmts[] = {
+ +        AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV422P,
+ +        AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV411P,
+ +        AV_PIX_FMT_YUV410P,  AV_PIX_FMT_YUV440P,
+ +        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
+ +        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
+ +        AV_PIX_FMT_NONE
+ +    };
+ +    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+ +    return 0;
+ +}
+ +
+ +static int config_input(AVFilterLink *inlink)
+ +{
+ +    SPPContext *spp = inlink->dst->priv;
+ +    const int h = FFALIGN(inlink->h + 16, 16);
+ +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+ +
+ +    spp->hsub = desc->log2_chroma_w;
+ +    spp->vsub = desc->log2_chroma_h;
+ +    spp->temp_linesize = FFALIGN(inlink->w + 16, 16);
+ +    spp->temp = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->temp));
+ +    spp->src  = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->src));
+ +    if (!spp->use_bframe_qp) {
+ +        /* we are assuming here the qp blocks will not be smaller that 16x16 */
+ +        spp->non_b_qp_alloc_size = FF_CEIL_RSHIFT(inlink->w, 4) * FF_CEIL_RSHIFT(inlink->h, 4);
+ +        spp->non_b_qp_table = av_calloc(spp->non_b_qp_alloc_size, sizeof(*spp->non_b_qp_table));
+ +        if (!spp->non_b_qp_table)
+ +            return AVERROR(ENOMEM);
+ +    }
+ +    if (!spp->temp || !spp->src)
+ +        return AVERROR(ENOMEM);
+ +    return 0;
+ +}
+ +
+ +static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+ +{
+ +    AVFilterContext *ctx = inlink->dst;
+ +    SPPContext *spp = ctx->priv;
+ +    AVFilterLink *outlink = ctx->outputs[0];
+ +    AVFrame *out = in;
+ +    int qp_stride = 0;
+ +    const int8_t *qp_table = NULL;
+ +
+ +    /* if we are not in a constant user quantizer mode and we don't want to use
+ +     * the quantizers from the B-frames (B-frames often have a higher QP), we
+ +     * need to save the qp table from the last non B-frame; this is what the
+ +     * following code block does */
+ +    if (!spp->qp) {
+ +        qp_table = av_frame_get_qp_table(in, &qp_stride, &spp->qscale_type);
+ +
+ +        if (qp_table && !spp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
+ +            int w, h;
+ +
+ +            /* if the qp stride is not set, it means the QP are only defined on
+ +             * a line basis */
+ +            if (!qp_stride) {
+ +                w = FF_CEIL_RSHIFT(inlink->w, 4);
+ +                h = 1;
+ +            } else {
+ +                w = FF_CEIL_RSHIFT(qp_stride, 4);
+ +                h = FF_CEIL_RSHIFT(inlink->h, 4);
+ +            }
+ +            av_assert0(w * h <= spp->non_b_qp_alloc_size);
+ +            memcpy(spp->non_b_qp_table, qp_table, w * h);
+ +        }
+ +    }
+ +
+ +    if (spp->log2_count && !ctx->is_disabled) {
+ +        if (!spp->use_bframe_qp && spp->non_b_qp_table)
+ +            qp_table = spp->non_b_qp_table;
+ +
+ +        if (qp_table || spp->qp) {
+ +            const int cw = FF_CEIL_RSHIFT(inlink->w, spp->hsub);
+ +            const int ch = FF_CEIL_RSHIFT(inlink->h, spp->vsub);
+ +
+ +            /* get a new frame if in-place is not possible or if the dimensions
+ +             * are not multiple of 8 */
+ +            if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
+ +                const int aligned_w = FFALIGN(inlink->w, 8);
+ +                const int aligned_h = FFALIGN(inlink->h, 8);
+ +
+ +                out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
+ +                if (!out) {
+ +                    av_frame_free(&in);
+ +                    return AVERROR(ENOMEM);
+ +                }
+ +                av_frame_copy_props(out, in);
+ +                out->width  = in->width;
+ +                out->height = in->height;
+ +            }
+ +
+ +            filter(spp, out->data[0], in->data[0], out->linesize[0], in->linesize[0], inlink->w, inlink->h, qp_table, qp_stride, 1);
+ +            filter(spp, out->data[1], in->data[1], out->linesize[1], in->linesize[1], cw,        ch,        qp_table, qp_stride, 0);
+ +            filter(spp, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw,        ch,        qp_table, qp_stride, 0);
+ +            emms_c();
+ +        }
+ +    }
+ +
+ +    if (in != out) {
+ +        if (in->data[3])
+ +            av_image_copy_plane(out->data[3], out->linesize[3],
+ +                                in ->data[3], in ->linesize[3],
+ +                                inlink->w, inlink->h);
+ +        av_frame_free(&in);
+ +    }
+ +    return ff_filter_frame(outlink, out);
+ +}
+ +
+ +static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+ +                           char *res, int res_len, int flags)
+ +{
+ +    SPPContext *spp = ctx->priv;
+ +
+ +    if (!strcmp(cmd, "level")) {
+ +        if (!strcmp(args, "max"))
+ +            spp->log2_count = MAX_LEVEL;
+ +        else
+ +            spp->log2_count = av_clip(strtol(args, NULL, 10), 0, MAX_LEVEL);
+ +        return 0;
+ +    }
+ +    return AVERROR(ENOSYS);
+ +}
+ +
+ +static av_cold int init(AVFilterContext *ctx)
+ +{
+ +    SPPContext *spp = ctx->priv;
+ +
+ +    spp->avctx = avcodec_alloc_context3(NULL);
+ +    if (!spp->avctx)
+ +        return AVERROR(ENOMEM);
+ +    ff_idctdsp_init(&spp->idsp, spp->avctx);
+ +    ff_fdctdsp_init(&spp->fdsp, spp->avctx);
++    ff_pixblockdsp_init(&spp->pdsp, spp->avctx);
+ +    spp->store_slice = store_slice_c;
+ +    switch (spp->mode) {
+ +    case MODE_HARD: spp->requantize = hardthresh_c; break;
+ +    case MODE_SOFT: spp->requantize = softthresh_c; break;
+ +    }
+ +    if (ARCH_X86)
+ +        ff_spp_init_x86(spp);
+ +    return 0;
+ +}
+ +
+ +static av_cold void uninit(AVFilterContext *ctx)
+ +{
+ +    SPPContext *spp = ctx->priv;
+ +
+ +    av_freep(&spp->temp);
+ +    av_freep(&spp->src);
+ +    if (spp->avctx) {
+ +        avcodec_close(spp->avctx);
+ +        av_freep(&spp->avctx);
+ +    }
+ +    av_freep(&spp->non_b_qp_table);
+ +}
+ +
+ +static const AVFilterPad spp_inputs[] = {
+ +    {
+ +        .name         = "default",
+ +        .type         = AVMEDIA_TYPE_VIDEO,
+ +        .config_props = config_input,
+ +        .filter_frame = filter_frame,
+ +    },
+ +    { NULL }
+ +};
+ +
+ +static const AVFilterPad spp_outputs[] = {
+ +    {
+ +        .name = "default",
+ +        .type = AVMEDIA_TYPE_VIDEO,
+ +    },
+ +    { NULL }
+ +};
+ +
+ +AVFilter ff_vf_spp = {
+ +    .name            = "spp",
+ +    .description     = NULL_IF_CONFIG_SMALL("Apply a simple post processing filter."),
+ +    .priv_size       = sizeof(SPPContext),
+ +    .init            = init,
+ +    .uninit          = uninit,
+ +    .query_formats   = query_formats,
+ +    .inputs          = spp_inputs,
+ +    .outputs         = spp_outputs,
+ +    .process_command = process_command,
+ +    .priv_class      = &spp_class,
+ +    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+ +};
diff --cc libavfilter/vf_spp.h

index 909d4de81226d824288fe9a8bf702d3b8a371645,0000000000000000000000000000000000000000..c8eac3caf2951a01f29b4effdbb507f049d3d336

mode 100644,000000..100644
--- 1/libavfilter/vf_spp.h
--- /dev/null
+++ b/libavfilter/vf_spp.h
@@@ -1,63 -1,0 +1,63 @@@
- #include "libavcodec/dsputil.h"
+ +/*
+ + * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ + * Copyright (c) 2013 Clément Bœsch
+ + *
+ + * This file is part of FFmpeg.
+ + *
+ + * FFmpeg is free software; you can redistribute it and/or modify
+ + * it under the terms of the GNU General Public License as published by
+ + * the Free Software Foundation; either version 2 of the License, or
+ + * (at your option) any later version.
+ + *
+ + * FFmpeg is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + * GNU General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License along
+ + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ + */
+ +
+ +#ifndef AVFILTER_SPP_H
+ +#define AVFILTER_SPP_H
+ +
+ +#include "libavcodec/avcodec.h"
-     DSPContext dsp;
++#include "libavcodec/pixblockdsp.h"
+ +#include "libavcodec/idctdsp.h"
+ +#include "libavcodec/fdctdsp.h"
+ +#include "avfilter.h"
+ +
+ +#define MAX_LEVEL 6 /* quality levels */
+ +
+ +typedef struct {
+ +    const AVClass *av_class;
+ +
+ +    int log2_count;
+ +    int qp;
+ +    int mode;
+ +    int qscale_type;
+ +    int temp_linesize;
+ +    uint8_t *src;
+ +    int16_t *temp;
+ +    AVCodecContext *avctx;
+ +    IDCTDSPContext idsp;
+ +    FDCTDSPContext fdsp;
++    PixblockDSPContext pdsp;
+ +    int8_t *non_b_qp_table;
+ +    int non_b_qp_alloc_size;
+ +    int use_bframe_qp;
+ +    int hsub, vsub;
+ +
+ +    void (*store_slice)(uint8_t *dst, const int16_t *src,
+ +                        int dst_stride, int src_stride,
+ +                        int width, int height, int log2_scale,
+ +                        const uint8_t dither[8][8]);
+ +
+ +    void (*requantize)(int16_t dst[64], const int16_t src[64],
+ +                       int qp, const uint8_t *permutation);
+ +} SPPContext;
+ +
+ +void ff_spp_init_x86(SPPContext *s);
+ +
+ +#endif /* AVFILTER_SPP_H */
author	Michael Niedermayer <michaelni@gmx.at>
	Wed, 9 Jul 2014 22:56:05 +0000 (00:56 +0200)
committer	Michael Niedermayer <michaelni@gmx.at>
	Wed, 9 Jul 2014 23:22:14 +0000 (01:22 +0200)
		1	2
configure	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/arm/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/arm/dsputil_armv6.S	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/arm/dsputil_init_armv6.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/arm/pixblockdsp_armv6.S	patch \|	\|	diff2 \|	blob \| history
libavcodec/arm/pixblockdsp_init_arm.c	patch \|	\|	diff2 \|	blob \| history
libavcodec/asv.h	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/asvenc.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/dnxhdenc.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/dsputil.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/dsputil.h	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/dvenc.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/libavcodec.v	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/mpegvideo.h	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/mpegvideo_enc.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/pixblockdsp.c	patch \|	\|	diff2 \|	blob \| history
libavcodec/pixblockdsp.h	patch \|	\|	diff2 \|	blob \| history
libavcodec/pixblockdsp_template.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/ppc/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/ppc/dsputil_altivec.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/ppc/pixblockdsp.c	patch \|	\|	diff2 \|	blob \| history
libavcodec/x86/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/x86/dsputilenc.asm	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/x86/dsputilenc_mmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/x86/pixblockdsp.asm	patch \|	\|	diff2 \|	blob \| history
libavcodec/x86/pixblockdsp_init.c	patch \|	\|	diff2 \|	blob \| history
libavfilter/vf_mpdecimate.c	patch \|	diff1 \|	\|	blob \| history
libavfilter/vf_spp.c	patch \|	diff1 \|	\|	blob \| history
libavfilter/vf_spp.h	patch \|	diff1 \|	\|	blob \| history