]> git.sesse.net Git - ffmpeg/commitdiff
Merge commit '5801f9ed245ca5ebb57b0b5183de7a24aaece133'
authorClément Bœsch <u@pkh.me>
Thu, 23 Mar 2017 10:58:01 +0000 (11:58 +0100)
committerClément Bœsch <u@pkh.me>
Thu, 23 Mar 2017 10:58:01 +0000 (11:58 +0100)
* commit '5801f9ed245ca5ebb57b0b5183de7a24aaece133':
  h264_intrapred: x86: Update comments left behind in 95c89da36ebeeb96b7146c0d70f46c582397da7f

Merged-by: Clément Bœsch <u@pkh.me>
1  2 
libavcodec/x86/h264_intrapred.asm
libavcodec/x86/h264_intrapred_10bit.asm

index 0f3b46287edb9f88fd06b0e772c7017fffe0074e,1ea97fa1cac2fd6f47f2f40f40704fa95df4ae28..f3aa3172f05f274649186988593ab0f5a6724a92
@@@ -268,45 -268,8 +268,45 @@@ cglobal pred16x16_tm_vp8_8, 2,6,
      jg .loop
      REP_RET
  
 +%if HAVE_AVX2_EXTERNAL
 +INIT_YMM avx2
 +cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration
 +    sub                       dstq, strideq
 +    pmovzxbw                    m0, [dstq]
 +    vpbroadcastb               xm1, [r0-1]
 +    pmovzxbw                    m1, xm1
 +    psubw                       m0, m1
 +    mov                 iterationd, 4
 +    lea                   stride3q, [strideq*3]
 +.loop:
 +    vpbroadcastb               xm1, [dstq+strideq*1-1]
 +    vpbroadcastb               xm2, [dstq+strideq*2-1]
 +    vpbroadcastb               xm3, [dstq+stride3q-1]
 +    vpbroadcastb               xm4, [dstq+strideq*4-1]
 +    pmovzxbw                    m1, xm1
 +    pmovzxbw                    m2, xm2
 +    pmovzxbw                    m3, xm3
 +    pmovzxbw                    m4, xm4
 +    paddw                       m1, m0
 +    paddw                       m2, m0
 +    paddw                       m3, m0
 +    paddw                       m4, m0
 +    vpackuswb                   m1, m1, m2
 +    vpackuswb                   m3, m3, m4
 +    vpermq                      m1, m1, q3120
 +    vpermq                      m3, m3, q3120
 +    movdqa        [dstq+strideq*1], xm1
 +    vextracti128  [dstq+strideq*2], m1, 1
 +    movdqa       [dstq+stride3q*1], xm3
 +    vextracti128  [dstq+strideq*4], m3, 1
 +    lea                       dstq, [dstq+strideq*4]
 +    dec                 iterationd
 +    jg .loop
 +    REP_RET
 +%endif
 +
  ;-----------------------------------------------------------------------------
- ; void ff_pred16x16_plane_*_8(uint8_t *src, int stride)
+ ; void ff_pred16x16_plane_*_8(uint8_t *src, ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  
  %macro H264_PRED16x16_PLANE 1
index 9e40cfe24b9b6629ee3c54f755a35c12673623e1,7ba9828e17ea9e78b0940386e0f7fc82b4843f05..629e0a72e3ad132ceb4489d66789bb7f9cfd6cd9
@@@ -83,13 -83,12 +84,14 @@@ INIT_XMM sse
  PRED4x4_DR
  INIT_XMM ssse3
  PRED4x4_DR
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED4x4_DR
 +%endif
  
  ;------------------------------------------------------------------------------
- ; void ff_pred4x4_vertical_right(pixel *src, const pixel *topright, int stride)
+ ; void ff_pred4x4_vertical_right_10(pixel *src, const pixel *topright,
+ ;                                   ptrdiff_t stride)
  ;------------------------------------------------------------------------------
  %macro PRED4x4_VR 0
  cglobal pred4x4_vertical_right_10, 3, 3, 6
@@@ -122,13 -121,12 +124,14 @@@ INIT_XMM sse
  PRED4x4_VR
  INIT_XMM ssse3
  PRED4x4_VR
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED4x4_VR
 +%endif
  
  ;-------------------------------------------------------------------------------
- ; void ff_pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride)
+ ; void ff_pred4x4_horizontal_down_10(pixel *src, const pixel *topright,
+ ;                                    ptrdiff_t stride)
  ;-------------------------------------------------------------------------------
  %macro PRED4x4_HD 0
  cglobal pred4x4_horizontal_down_10, 3, 3
@@@ -164,14 -162,28 +167,14 @@@ INIT_XMM sse
  PRED4x4_HD
  INIT_XMM ssse3
  PRED4x4_HD
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED4x4_HD
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride)
+ ; void ff_pred4x4_dc_10(pixel *src, const pixel *topright, ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
 -%macro HADDD 2 ; sum junk
 -%if mmsize == 16
 -    movhlps %2, %1
 -    paddd   %1, %2
 -    pshuflw %2, %1, 0xE
 -    paddd   %1, %2
 -%else
 -    pshufw  %2, %1, 0xE
 -    paddd   %1, %2
 -%endif
 -%endmacro
 -
 -%macro HADDW 2
 -    pmaddwd %1, [pw_1]
 -    HADDD   %1, %2
 -%endmacro
  
  INIT_MMX mmxext
  cglobal pred4x4_dc_10, 3, 3
@@@ -219,13 -232,12 +223,14 @@@ cglobal pred4x4_down_left_10, 3, 
  
  INIT_XMM sse2
  PRED4x4_DL
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED4x4_DL
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred4x4_vertical_left(pixel *src, const pixel *topright, int stride)
+ ; void ff_pred4x4_vertical_left_10(pixel *src, const pixel *topright,
+ ;                                  ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  %macro PRED4x4_VL 0
  cglobal pred4x4_vertical_left_10, 3, 3
  
  INIT_XMM sse2
  PRED4x4_VL
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED4x4_VL
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
+ ; void ff_pred4x4_horizontal_up_10(pixel *src, const pixel *topright,
+ ;                                  ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  INIT_MMX mmxext
  cglobal pred4x4_horizontal_up_10, 3, 3
@@@ -560,13 -571,12 +566,14 @@@ cglobal pred8x8l_top_dc_10, 4, 4, 
  
  INIT_XMM sse2
  PRED8x8L_TOP_DC
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED8x8L_TOP_DC
 +%endif
  
  ;-------------------------------------------------------------------------------
- ; void ff_pred8x8l_dc(pixel *src, int has_topleft, int has_topright, int stride)
+ ; void ff_pred8x8l_dc_10(pixel *src, int has_topleft, int has_topright,
+ ;                        ptrdiff_t stride)
  ;-------------------------------------------------------------------------------
  ;TODO: see if scalar is faster
  %macro PRED8x8L_DC 0
@@@ -619,14 -629,12 +626,14 @@@ cglobal pred8x8l_dc_10, 4, 6, 
  
  INIT_XMM sse2
  PRED8x8L_DC
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED8x8L_DC
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred8x8l_vertical(pixel *src, int has_topleft, int has_topright,
- ;                           int stride)
+ ; void ff_pred8x8l_vertical_10(pixel *src, int has_topleft, int has_topright,
+ ;                              ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  %macro PRED8x8L_VERTICAL 0
  cglobal pred8x8l_vertical_10, 4, 4, 6
  
  INIT_XMM sse2
  PRED8x8L_VERTICAL
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED8x8L_VERTICAL
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright,
- ;                             int stride)
+ ; void ff_pred8x8l_horizontal_10(uint8_t *src, int has_topleft,
+ ;                                int has_topright, ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  %macro PRED8x8L_HORIZONTAL 0
  cglobal pred8x8l_horizontal_10, 4, 4, 5
@@@ -712,14 -718,12 +719,14 @@@ INIT_XMM sse
  PRED8x8L_HORIZONTAL
  INIT_XMM ssse3
  PRED8x8L_HORIZONTAL
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED8x8L_HORIZONTAL
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred8x8l_down_left(pixel *src, int has_topleft, int has_topright,
- ;                            int stride)
+ ; void ff_pred8x8l_down_left_10(pixel *src, int has_topleft, int has_topright,
+ ;                               ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  %macro PRED8x8L_DOWN_LEFT 0
  cglobal pred8x8l_down_left_10, 4, 4, 7
@@@ -781,14 -785,12 +788,14 @@@ INIT_XMM sse
  PRED8x8L_DOWN_LEFT
  INIT_XMM ssse3
  PRED8x8L_DOWN_LEFT
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED8x8L_DOWN_LEFT
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred8x8l_down_right(pixel *src, int has_topleft, int has_topright,
- ;                             int stride)
+ ; void ff_pred8x8l_down_right_10(pixel *src, int has_topleft,
+ ;                                int has_topright, ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  %macro PRED8x8L_DOWN_RIGHT 0
  ; standard forbids this when has_topleft is false
@@@ -856,14 -858,12 +863,14 @@@ INIT_XMM sse
  PRED8x8L_DOWN_RIGHT
  INIT_XMM ssse3
  PRED8x8L_DOWN_RIGHT
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED8x8L_DOWN_RIGHT
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred8x8l_vertical_right(pixel *src, int has_topleft,
- ;                                 int has_topright, int stride)
+ ; void ff_pred8x8l_vertical_right_10(pixel *src, int has_topleft,
+ ;                                    int has_topright, ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  %macro PRED8x8L_VERTICAL_RIGHT 0
  ; likewise with 8x8l_down_right
@@@ -927,14 -927,12 +934,14 @@@ INIT_XMM sse
  PRED8x8L_VERTICAL_RIGHT
  INIT_XMM ssse3
  PRED8x8L_VERTICAL_RIGHT
 +%if HAVE_AVX_EXTERNAL
  INIT_XMM avx
  PRED8x8L_VERTICAL_RIGHT
 +%endif
  
  ;-----------------------------------------------------------------------------
- ; void ff_pred8x8l_horizontal_up(pixel *src, int has_topleft,
- ;                                int has_topright, int stride)
+ ; void ff_pred8x8l_horizontal_up_10(pixel *src, int has_topleft,
+ ;                                   int has_topright, ptrdiff_t stride)
  ;-----------------------------------------------------------------------------
  %macro PRED8x8L_HORIZONTAL_UP 0
  cglobal pred8x8l_horizontal_up_10, 4, 4, 6