;* Copyright (c) 2010 Loren Merritt
;* Copyright (c) 2010 Ronald S. Bultje
;*
-;* This file is part of FFmpeg.
+;* This file is part of Libav.
;*
-;* FFmpeg is free software; you can redistribute it and/or
+;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* FFmpeg is distributed in the hope that it will be useful,
+;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86inc.asm"
%endif
paddw m0, m1 ; sum of H coefficients
-%ifidn %3, h264
- pmullw m0, [pw_5]
- paddw m0, [pw_32]
- psraw m0, 6
-%elifidn %3, rv40
- pmullw m0, [pw_5]
- psraw m0, 6
-%elifidn %3, svq3
- movd r3d, m0
- movsx r3, r3w
- test r3, r3
- lea r4, [r3+3]
- cmovs r3, r4
- sar r3, 2 ; H/4
- lea r3, [r3*5] ; 5*(H/4)
- test r3, r3
- lea r4, [r3+15]
- cmovs r3, r4
- sar r3, 4 ; (5*(H/4))/16
- movd m0, r3d
-%endif
-
lea r4, [r0+r2*8-1]
lea r3, [r0+r2*4-1]
add r4, r2
movzx r3, byte [r3+r2*2 ]
lea r3, [r3+r4+1]
shl r3, 4
+
movd r1d, m0
movsx r1d, r1w
+%ifnidn %3, svq3
+%ifidn %3, h264
+ lea r1d, [r1d*5+32]
+%else ; rv40
+ lea r1d, [r1d*5]
+%endif
+ sar r1d, 6
+%else ; svq3
+ test r1d, r1d
+ lea r4d, [r1d+3]
+ cmovs r1d, r4d
+ sar r1d, 2 ; H/4
+ lea r1d, [r1d*5] ; 5*(H/4)
+ test r1d, r1d
+ lea r4d, [r1d+15]
+ cmovs r1d, r4d
+ sar r1d, 4 ; (5*(H/4))/16
+%endif
+ movd m0, r1d
+
add r1d, r5d
add r3d, r1d
shl r1d, 3
%endif
paddw m0, m1 ; sum of H coefficients
- pmullw m0, [pw_17]
- paddw m0, [pw_16]
- psraw m0, 5
-
lea r4, [r0+r2*4-1]
lea r3, [r0 -1]
add r4, r2
shl r3, 4
movd r1d, m0
movsx r1d, r1w
+ imul r1d, 17
+ add r1d, 16
+ sar r1d, 5
+ movd m0, r1d
add r1d, r5d
sub r3d, r1d
add r1d, r1d
;-----------------------------------------------------------------------------
; void pred8x8_top_dc_mmxext(uint8_t *src, int stride)
;-----------------------------------------------------------------------------
-%ifdef CONFIG_GPL
cglobal pred8x8_top_dc_mmxext, 2,5
sub r0, r1
movq mm0, [r0]
movq [r4+r1*1], m1
movq [r4+r1*2], m1
RET
-%endif
;-----------------------------------------------------------------------------
; void pred8x8_dc_rv40(uint8_t *src, int stride)
;-----------------------------------------------------------------------------
; void pred8x8l_top_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
;-----------------------------------------------------------------------------
-%ifdef CONFIG_GPL
%macro PRED8x8L_TOP_DC 1
cglobal pred8x8l_top_dc_%1, 4,4
sub r0, r3
sub r0, r3
lea r2, [r0+r3*2]
movq mm0, [r0+r3*1-8]
- punpckhbw mm0, [r0+r3*0-8]
+ test r1, r1
+ lea r1, [r0+r3]
+ cmovnz r1, r0
+ punpckhbw mm0, [r1+r3*0-8]
movq mm1, [r2+r3*1-8]
punpckhbw mm1, [r0+r3*2-8]
mov r2, r0
punpckhdq mm3, mm1
lea r0, [r0+r3*2]
movq mm0, [r0+r3*0-8]
- movq mm1, [r2]
+ movq mm1, [r1+r3*0-8]
mov r0, r2
movq mm4, mm3
movq mm2, mm3
PALIGNR mm4, mm0, 7, mm0
PALIGNR mm1, mm2, 1, mm2
- test r1, r1 ; top_left
- jnz .do_left
-.fix_lt_1:
- movq mm5, mm3
- pxor mm5, mm4
- psrlq mm5, 56
- psllq mm5, 48
- pxor mm1, mm5
-.do_left:
movq mm0, mm4
PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
movq mm4, mm0
sub r0, r3
lea r2, [r0+r3*2]
movq mm0, [r0+r3*1-8]
- punpckhbw mm0, [r0+r3*0-8]
+ test r1, r1
+ lea r1, [r0+r3]
+ cmovnz r1, r0
+ punpckhbw mm0, [r1+r3*0-8]
movq mm1, [r2+r3*1-8]
punpckhbw mm1, [r0+r3*2-8]
mov r2, r0
punpckhdq mm3, mm1
lea r0, [r0+r3*2]
movq mm0, [r0+r3*0-8]
- movq mm1, [r2]
+ movq mm1, [r1+r3*0-8]
mov r0, r2
movq mm4, mm3
movq mm2, mm3
PALIGNR mm4, mm0, 7, mm0
PALIGNR mm1, mm2, 1, mm2
- test r1, r1
- jnz .do_left
-.fix_lt_1:
- movq mm5, mm3
- pxor mm5, mm4
- psrlq mm5, 56
- psllq mm5, 48
- pxor mm1, mm5
-.do_left:
movq mm0, mm4
PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
movq mm4, mm0
INIT_MMX
%define PALIGNR PALIGNR_SSSE3
PRED8x8L_HORIZONTAL_DOWN ssse3
-%endif
;-----------------------------------------------------------------------------
; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
;-----------------------------------------------------------------------------
; void pred4x4_down_left_mmxext(uint8_t *src, const uint8_t *topright, int stride)
;-----------------------------------------------------------------------------
-%ifdef CONFIG_GPL
INIT_MMX
cglobal pred4x4_down_left_mmxext, 3,3
sub r0, r2
psrlq m0, 8
movh [r0+r2*1], m0
RET
-%endif