%define GLOBAL
%endif
+%assign FDEC_STRIDE 32
%include "amd64inc.asm"
-%macro SAVE_0_1 1
- movq [%1] , mm0
- movq [%1 + 8] , mm1
+%macro STORE8x8 2
+ movq [parm1q + 1*FDEC_STRIDE], %1
+ movq [parm1q + 2*FDEC_STRIDE], %1
+ movq [parm1q + 3*FDEC_STRIDE], %1
+ movq [parm1q + 4*FDEC_STRIDE], %1
+ movq [parm1q + 5*FDEC_STRIDE], %2
+ movq [parm1q + 6*FDEC_STRIDE], %2
+ movq [parm1q + 7*FDEC_STRIDE], %2
+ movq [parm1q + 8*FDEC_STRIDE], %2
%endmacro
-%macro SAVE_0_0 1
- movq [%1] , mm0
- movq [%1 + 8] , mm0
+%macro STORE16x16 2
+ mov eax, 4
+ALIGN 4
+.loop:
+ movq [parm1q + 1*FDEC_STRIDE], %1
+ movq [parm1q + 2*FDEC_STRIDE], %1
+ movq [parm1q + 3*FDEC_STRIDE], %1
+ movq [parm1q + 4*FDEC_STRIDE], %1
+ movq [parm1q + 1*FDEC_STRIDE + 8], %2
+ movq [parm1q + 2*FDEC_STRIDE + 8], %2
+ movq [parm1q + 3*FDEC_STRIDE + 8], %2
+ movq [parm1q + 4*FDEC_STRIDE + 8], %2
+ dec eax
+ lea parm1q, [parm1q + 4*FDEC_STRIDE]
+ jnz .loop
+ nop
%endmacro
%endmacro
%macro PRED8x8_LOAD_TOP 0
- sub parm1q, parm2q
+ sub parm1q, FDEC_STRIDE
- and parm3d, 12
+ and parm2d, 12
movq mm1, [parm1q-1]
movq mm2, [parm1q+1]
- cmp parm3d, byte 8
+ cmp parm2d, byte 8
jge .have_topleft
mov al, [parm1q]
mov ah, [parm1q]
pinsrw mm1, eax, 0
.have_topleft:
- and parm3d, byte 4
+ and parm2d, byte 4
jne .have_topright
mov al, [parm1q+7]
mov ah, [parm1q+7]
;-----------------------------------------------------------------------------
;
-; void predict_8x8_v_mmxext( uint8_t *src, int i_stride, int i_neighbors )
+; void predict_8x8_v_mmxext( uint8_t *src, int i_neighbors )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_8x8_v_mmxext:
PRED8x8_LOAD_TOP
-
- lea rax, [parm2q + 2*parm2q]
- movq [parm1q + parm2q], mm0 ; 0
- movq [parm1q + 2*parm2q], mm0 ; 1
- movq [parm1q + 4*parm2q], mm0 ; 3
- movq [parm1q + 8*parm2q], mm0 ; 7
- add parm1q, rax
- movq [parm1q], mm0 ; 2
- movq [parm1q + 2*parm2q], mm0 ; 4
- movq [parm1q + rax ], mm0 ; 5
- movq [parm1q + 4*parm2q], mm0 ; 6
-
+ STORE8x8 mm0, mm0
ret
;-----------------------------------------------------------------------------
;
-; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, uint8_t *pix_left );
+; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_neighbors, uint8_t *pix_left );
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_8x8_dc_core_mmxext:
- movq mm1, [parm4q-1]
- movq mm2, [parm4q+1]
- PRED8x8_LOWPASS mm4, [parm4q]
+ movq mm1, [parm3q-1]
+ movq mm2, [parm3q+1]
+ PRED8x8_LOWPASS mm4, [parm3q]
PRED8x8_LOAD_TOP
pshufw mm0, mm0, 0
packuswb mm0, mm0
- lea rax, [parm2q + 2*parm2q]
- movq [parm1q + parm2q], mm0 ; 0
- movq [parm1q + 2*parm2q], mm0 ; 1
- movq [parm1q + 4*parm2q], mm0 ; 3
- movq [parm1q + 8*parm2q], mm0 ; 7
- add parm1q, rax
- movq [parm1q], mm0 ; 2
- movq [parm1q + 2*parm2q], mm0 ; 4
- movq [parm1q + rax ], mm0 ; 5
- movq [parm1q + 4*parm2q], mm0 ; 6
-
+ STORE8x8 mm0, mm0
ret
;-----------------------------------------------------------------------------
;
-; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
+; void predict_8x8c_v_mmx( uint8_t *src )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_8x8c_v_mmx :
- sub parm1q, parm2q ; esi <-- line -1
-
- movq mm0, [parm1q]
- movq [parm1q + parm2q], mm0 ; 0
- movq [parm1q + 2 * parm2q], mm0 ; 1
- movq [parm1q + 4 * parm2q], mm0 ; 3
- movq [parm1q + 8 * parm2q], mm0 ; 7
- add parm1q, parm2q ; <-- line 0
- movq [parm1q + 2 * parm2q], mm0 ; 2
- movq [parm1q + 4 * parm2q], mm0 ; 4
- lea parm1q, [parm1q + 4 * parm2q] ; <-- line 4
- movq [parm1q + parm2q], mm0 ; 5
- movq [parm1q + 2 * parm2q], mm0 ; 6
-
+ sub parm1q, FDEC_STRIDE
+ movq mm0, [parm1q]
+ STORE8x8 mm0, mm0
ret
;-----------------------------------------------------------------------------
;
-; void predict_8x8c_dc_core_mmxext( uint8_t *src, int i_stride, int s2, int s3 )
+; void predict_8x8c_dc_core_mmxext( uint8_t *src, int s2, int s3 )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_8x8c_dc_core_mmxext:
- sub parm1q, parm2q
- lea rax, [parm2q + 2*parm2q]
+ sub parm1q, FDEC_STRIDE
movq mm0, [parm1q]
pxor mm1, mm1
psadbw mm1, mm2 ; s1
psadbw mm0, mm2 ; s0
- movd mm4, parm3d
- movd mm5, parm4d
+ movd mm4, parm2d
+ movd mm5, parm3d
paddw mm0, mm4
pshufw mm2, mm5, 0
psrlw mm0, 3
packuswb mm0, mm1 ; dc0,dc1 (b)
packuswb mm2, mm3 ; dc2,dc3 (b)
- movq [parm1q + parm2q], mm0 ; 0
- movq [parm1q + 2*parm2q], mm0 ; 1
- movq [parm1q + rax ], mm0 ; 2
- movq [parm1q + 4*parm2q], mm0 ; 3
- lea parm1q, [parm1q + 4*parm2q]
- movq [parm1q + parm2q], mm2 ; 4
- movq [parm1q + 2*parm2q], mm2 ; 5
- movq [parm1q + rax ], mm2 ; 6
- movq [parm1q + 4*parm2q], mm2 ; 7
-
+ STORE8x8 mm0, mm2
ret
;-----------------------------------------------------------------------------
;
-; void predict_8x8c_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c )
+; void predict_8x8c_p_core_mmx( uint8_t *src, int i00, int b, int c )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_8x8c_p_core_mmx:
- movd mm0, parm3d
- movd mm2, parm4d
- movd mm4, parm5d
+ movd mm0, parm2d
+ movd mm2, parm3d
+ movd mm4, parm4d
pshufw mm0, mm0, 0
pshufw mm2, mm2, 0
pshufw mm4, mm4, 0
paddsw mm0, mm4
paddsw mm1, mm4
- add parm1q, parm2q
+ add parm1q, FDEC_STRIDE
dec eax
jg .loop
;-----------------------------------------------------------------------------
;
-; void predict_16x16_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c )
+; void predict_16x16_p_core_mmx( uint8_t *src, int i00, int b, int c )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_16x16_p_core_mmx:
- movd mm0, parm3d
- movd mm2, parm4d
- movd mm4, parm5d
+ movd mm0, parm2d
+ movd mm2, parm3d
+ movd mm4, parm4d
pshufw mm0, mm0, 0
pshufw mm2, mm2, 0
pshufw mm4, mm4, 0
paddsw mm1, mm4
paddsw mm2, mm4
paddsw mm3, mm4
- add parm1q, parm2q
+ add parm1q, FDEC_STRIDE
dec eax
jg .loop
;-----------------------------------------------------------------------------
;
-; void predict_16x16_v_mmx( uint8_t *src, int i_stride )
+; void predict_16x16_v_mmx( uint8_t *src )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_16x16_v_mmx :
- sub parm1q, parm2q ; line -1
-
+ sub parm1q, FDEC_STRIDE
movq mm0, [parm1q]
movq mm1, [parm1q + 8]
- lea rax, [parm2q + 2 * parm2q] ; 3 * stride
-
- SAVE_0_1 (parm1q + parm2q) ; 0
- SAVE_0_1 (parm1q + 2 * parm2q) ; 1
- SAVE_0_1 (parm1q + rax) ; 2
- SAVE_0_1 (parm1q + 4 * parm2q) ; 3
- SAVE_0_1 (parm1q + 2 * rax) ; 5
- SAVE_0_1 (parm1q + 8 * parm2q) ; 7
- SAVE_0_1 (parm1q + 4 * rax) ; 11
- add parm1q, parm2q ; <-- line 0
- SAVE_0_1 (parm1q + 4 * parm2q) ; 4
- SAVE_0_1 (parm1q + 2 * rax) ; 6
- SAVE_0_1 (parm1q + 8 * parm2q) ; 8
- SAVE_0_1 (parm1q + 4 * rax) ; 12
- lea parm1q, [parm1q + 8 * parm2q] ; <-- line 8
- SAVE_0_1 (parm1q + parm2q) ; 9
- SAVE_0_1 (parm1q + 2 * parm2q) ; 10
- lea parm1q, [parm1q + 4 * parm2q] ; <-- line 12
- SAVE_0_1 (parm1q + parm2q) ; 13
- SAVE_0_1 (parm1q + 2 * parm2q) ; 14
- SAVE_0_1 (parm1q + rax) ; 15
-
+ STORE16x16 mm0, mm1
ret
;-----------------------------------------------------------------------------
;
-; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_stride, int i_dc_left )
+; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_dc_left )
;
;-----------------------------------------------------------------------------
%macro PRED16x16_DC 2
- sub parm1q, parm2q ; parm1q <-- line -1
+ sub parm1q, FDEC_STRIDE
pxor mm0, mm0
pxor mm1, mm1
paddusw mm0, %1
psrlw mm0, %2 ; dc
pshufw mm0, mm0, 0
- lea r8, [parm2q + 2*parm2q] ; eax <-- 3* stride
packuswb mm0, mm0 ; dc in bytes
- mov eax, 4
-ALIGN 4
-.loop:
- SAVE_0_0 (parm1q + parm2q) ; 0
- SAVE_0_0 (parm1q + 2 * parm2q) ; 1
- SAVE_0_0 (parm1q + r8 ) ; 2
- SAVE_0_0 (parm1q + 4 * parm2q) ; 3
- dec eax
- lea parm1q, [parm1q + 4 * parm2q]
- jg .loop
- nop
+ STORE16x16 mm0, mm0
%endmacro
ALIGN 16
predict_16x16_dc_core_mmxext:
- movd mm2, parm3d
+ movd mm2, parm2d
PRED16x16_DC mm2, 5
ret
%define picesp esp
%endif
+%assign FDEC_STRIDE 32
%include "i386inc.asm"
+; this is faster than a constant [edx + Y*FDEC_STRIDE]
+%macro STORE8x8 2
+ movq [edx + ecx], %1 ; 0
+ movq [edx + 2*ecx], %1 ; 1
+ movq [edx + 4*ecx], %1 ; 3
+ movq [edx + 8*ecx], %2 ; 7
+ add edx, eax
+ movq [edx ], %1 ; 2
+ movq [edx + 2*ecx], %2 ; 4
+ movq [edx + eax], %2 ; 5
+ movq [edx + 4*ecx], %2 ; 6
+%endmacro
+
%macro SAVE_0_1 1
movq [%1] , mm0
movq [%1 + 8] , mm1
%macro PRED8x8_LOAD_TOP 0
mov edx, [picesp + 4]
- mov ecx, [picesp + 8]
- mov eax, [picesp +12]
+ mov ecx, FDEC_STRIDE
+ mov eax, [picesp + 8]
sub edx, ecx
and eax, 12
mov al, [edx]
mov ah, [edx]
pinsrw mm1, eax, 0
- mov eax, [picesp + 12]
+ mov eax, [picesp + 8]
.have_topleft:
and eax, byte 4
;-----------------------------------------------------------------------------
;
-; void predict_8x8_v_mmxext( uint8_t *src, int i_stride, int i_neighbors )
+; void predict_8x8_v_mmxext( uint8_t *src, int i_neighbors )
;
;-----------------------------------------------------------------------------
PRED8x8_LOAD_TOP
lea eax, [ecx + 2*ecx]
- movq [edx + ecx], mm0 ; 0
- movq [edx + 2*ecx], mm0 ; 1
- movq [edx + 4*ecx], mm0 ; 3
- movq [edx + 8*ecx], mm0 ; 7
- add edx, eax
- movq [edx], mm0 ; 2
- movq [edx + 2*ecx], mm0 ; 4
- movq [edx + eax], mm0 ; 5
- movq [edx + 4*ecx], mm0 ; 6
+ STORE8x8 mm0, mm0
picpop ebx
ret
;-----------------------------------------------------------------------------
;
-; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, uint8_t *pix_left );
+; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_neighbors, uint8_t *pix_left );
;
;-----------------------------------------------------------------------------
picpush ebx
picgetgot ebx
- mov eax, [picesp + 16]
+ mov eax, [picesp + 12]
movq mm1, [eax-1]
movq mm2, [eax+1]
PRED8x8_LOWPASS mm4, [eax]
packuswb mm0, mm0
lea eax, [ecx + 2*ecx]
- movq [edx + ecx], mm0 ; 0
- movq [edx + 2*ecx], mm0 ; 1
- movq [edx + 4*ecx], mm0 ; 3
- movq [edx + 8*ecx], mm0 ; 7
- add edx, eax
- movq [edx], mm0 ; 2
- movq [edx + 2*ecx], mm0 ; 4
- movq [edx + eax], mm0 ; 5
- movq [edx + 4*ecx], mm0 ; 6
+ STORE8x8 mm0, mm0
picpop ebx
ret
;-----------------------------------------------------------------------------
;
-; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
+; void predict_8x8c_v_mmx( uint8_t *src )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_8x8c_v_mmx :
-
- mov edx , [esp + 4]
- mov ecx , [esp + 8]
- sub edx , ecx ; edx <-- line -1
-
- movq mm0 , [edx]
- movq [edx + ecx] , mm0 ; 0
- movq [edx + 2 * ecx] , mm0 ; 1
- movq [edx + 4 * ecx] , mm0 ; 3
- movq [edx + 8 * ecx] , mm0 ; 7
- add edx , ecx ; edx <-- line 0
- movq [edx + 2 * ecx] , mm0 ; 2
- movq [edx + 4 * ecx] , mm0 ; 4
- lea edx , [edx + 4 * ecx] ; edx <-- line 4
- movq [edx + ecx] , mm0 ; 5
- movq [edx + 2 * ecx] , mm0 ; 6
-
+ mov edx, [esp + 4]
+ mov ecx, FDEC_STRIDE
+ sub edx, ecx
+ movq mm0, [edx]
+ lea eax, [ecx + 2*ecx]
+ STORE8x8 mm0, mm0
ret
;-----------------------------------------------------------------------------
;
-; void predict_8x8c_dc_core_mmxext( uint8_t *src, int i_stride, int s2, int s3 )
+; void predict_8x8c_dc_core_mmxext( uint8_t *src, int s2, int s3 )
;
;-----------------------------------------------------------------------------
picgetgot ebx
mov edx, [picesp + 4]
- mov ecx, [picesp + 8]
+ mov ecx, FDEC_STRIDE
sub edx, ecx
lea eax, [ecx + 2*ecx]
psadbw mm1, mm2 ; s1
psadbw mm0, mm2 ; s0
- paddw mm0, [picesp + 12]
- pshufw mm2, [picesp + 16], 0
+ paddw mm0, [picesp + 8]
+ pshufw mm2, [picesp + 12], 0
psrlw mm0, 3
paddw mm1, [pw_2 GLOBAL]
movq mm3, mm2
packuswb mm0, mm1 ; dc0,dc1 (b)
packuswb mm2, mm3 ; dc2,dc3 (b)
- movq [edx + ecx], mm0 ; 0
- movq [edx + 2*ecx], mm0 ; 1
- movq [edx + eax], mm0 ; 2
- movq [edx + 4*ecx], mm0 ; 3
- lea edx, [edx + 4*ecx]
- movq [edx + ecx], mm2 ; 4
- movq [edx + 2*ecx], mm2 ; 5
- movq [edx + eax], mm2 ; 6
- movq [edx + 4*ecx], mm2 ; 7
+ STORE8x8 mm0, mm2
picpop ebx
ret
;-----------------------------------------------------------------------------
;
-; void predict_8x8c_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c )
+; void predict_8x8c_p_core_mmx( uint8_t *src, int i00, int b, int c )
;
;-----------------------------------------------------------------------------
picgetgot ebx
mov edx, [picesp + 4]
- mov ecx, [picesp + 8]
- pshufw mm0, [picesp +12], 0
- pshufw mm2, [picesp +16], 0
- pshufw mm4, [picesp +20], 0
+ mov ecx, FDEC_STRIDE
+ pshufw mm0, [picesp + 8], 0
+ pshufw mm2, [picesp +12], 0
+ pshufw mm4, [picesp +16], 0
movq mm1, mm2
pmullw mm2, [pw_3210 GLOBAL]
psllw mm1, 2
;-----------------------------------------------------------------------------
;
-; void predict_16x16_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c )
+; void predict_16x16_p_core_mmx( uint8_t *src, int i00, int b, int c )
;
;-----------------------------------------------------------------------------
picgetgot ebx
mov edx, [picesp + 4]
- mov ecx, [picesp + 8]
- pshufw mm0, [picesp +12], 0
- pshufw mm2, [picesp +16], 0
- pshufw mm4, [picesp +20], 0
+ mov ecx, FDEC_STRIDE
+ pshufw mm0, [picesp + 8], 0
+ pshufw mm2, [picesp +12], 0
+ pshufw mm4, [picesp +16], 0
movq mm5, mm2
movq mm1, mm2
pmullw mm5, [pw_3210 GLOBAL]
;-----------------------------------------------------------------------------
;
-; void predict_16x16_v_mmx( uint8_t *src, int i_stride )
+; void predict_16x16_v_mmx( uint8_t *src )
;
;-----------------------------------------------------------------------------
predict_16x16_v_mmx :
mov edx, [esp + 4]
- mov ecx, [esp + 8]
+ mov ecx, FDEC_STRIDE
sub edx, ecx ; edx <-- line -1
movq mm0, [edx]
;-----------------------------------------------------------------------------
;
-; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_stride, int i_dc_left )
+; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_dc_left )
;
;-----------------------------------------------------------------------------
%macro PRED16x16_DC 3
mov edx, [%3 + 4]
- mov ecx, [%3 + 8]
+ mov ecx, FDEC_STRIDE
sub edx, ecx ; edx <-- line -1
pxor mm0, mm0
ALIGN 16
predict_16x16_dc_core_mmxext:
- PRED16x16_DC [esp+12], 5, esp
+ PRED16x16_DC [esp+8], 5, esp
ret
ALIGN 16
#include "common/clip1.h"
#include "predict.h"
-extern void predict_16x16_v_mmx( uint8_t *src, int i_stride );
-extern void predict_16x16_dc_core_mmxext( uint8_t *src, int i_stride, int i_dc_left );
-extern void predict_16x16_dc_top_mmxext( uint8_t *src, int i_stride );
-extern void predict_16x16_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c );
-extern void predict_8x8c_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c );
-extern void predict_8x8c_dc_core_mmxext( uint8_t *src, int i_stride, int s2, int s3 );
-extern void predict_8x8c_v_mmx( uint8_t *src, int i_stride );
-extern void predict_8x8_v_mmxext( uint8_t *src, int i_stride, int i_neighbors );
-extern void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, uint8_t *pix_left );
-
-static void predict_16x16_p( uint8_t *src, int i_stride )
+extern void predict_16x16_v_mmx( uint8_t *src );
+extern void predict_16x16_dc_core_mmxext( uint8_t *src, int i_dc_left );
+extern void predict_16x16_dc_top_mmxext( uint8_t *src );
+extern void predict_16x16_p_core_mmx( uint8_t *src, int i00, int b, int c );
+extern void predict_8x8c_p_core_mmx( uint8_t *src, int i00, int b, int c );
+extern void predict_8x8c_dc_core_mmxext( uint8_t *src, int s2, int s3 );
+extern void predict_8x8c_v_mmx( uint8_t *src );
+extern void predict_8x8_v_mmxext( uint8_t *src, int i_neighbors );
+extern void predict_8x8_dc_core_mmxext( uint8_t *src, int i_neighbors, uint8_t *pix_left );
+
+static void predict_16x16_p( uint8_t *src )
{
int a, b, c, i;
int H = 0;
c = ( 5 * V + 32 ) >> 6;
i00 = a - b * 7 - c * 7 + 16;
- predict_16x16_p_core_mmx( src, FDEC_STRIDE, i00, b, c );
+ predict_16x16_p_core_mmx( src, i00, b, c );
}
-static void predict_8x8c_p( uint8_t *src, int i_stride )
+static void predict_8x8c_p( uint8_t *src )
{
int a, b, c, i;
int H = 0;
c = ( 17 * V + 16 ) >> 5;
i00 = a -3*b -3*c + 16;
- predict_8x8c_p_core_mmx( src, FDEC_STRIDE, i00, b, c );
+ predict_8x8c_p_core_mmx( src, i00, b, c );
}
-static void predict_16x16_dc( uint8_t *src, int i_stride )
+static void predict_16x16_dc( uint8_t *src )
{
uint32_t dc=16;
int i;
dc += src[-1 + (i+1) * FDEC_STRIDE];
}
- predict_16x16_dc_core_mmxext( src, FDEC_STRIDE, dc );
+ predict_16x16_dc_core_mmxext( src, dc );
}
-static void predict_8x8c_dc( uint8_t *src, int i_stride )
+static void predict_8x8c_dc( uint8_t *src )
{
int s2 = 4
+ src[-1 + 0*FDEC_STRIDE]
+ src[-1 + 6*FDEC_STRIDE]
+ src[-1 + 7*FDEC_STRIDE];
- predict_8x8c_dc_core_mmxext( src, FDEC_STRIDE, s2, s3 );
+ predict_8x8c_dc_core_mmxext( src, s2, s3 );
}
#define SRC(x,y) src[(x)+(y)*FDEC_STRIDE]
-static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc( uint8_t *src, int i_neighbor )
{
uint8_t l[10];
l[0] = i_neighbor&MB_TOPLEFT ? SRC(-1,-1) : SRC(-1,0);
l[8] =
l[9] = SRC(-1,7);
- predict_8x8_dc_core_mmxext( src, FDEC_STRIDE, i_neighbor, l+1 );
+ predict_8x8_dc_core_mmxext( src, i_neighbor, l+1 );
}
#ifdef ARCH_X86_64
-static void predict_16x16_h( uint8_t *src, int i_stride )
+static void predict_16x16_h( uint8_t *src )
{
int y;
for( y = 0; y < 16; y++ )
}
}
-static void predict_8x8c_h( uint8_t *src, int i_stride )
+static void predict_8x8c_h( uint8_t *src )
{
int y;
for( y = 0; y < 8; y++ )
}
}
-static void predict_16x16_dc_left( uint8_t *src, int i_stride )
+static void predict_16x16_dc_left( uint8_t *src )
{
uint32_t s = 0;
uint64_t dc;
}
}
-static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_left( uint8_t *src )
{
int y;
uint32_t s0 = 0, s1 = 0;
}
-static void predict_8x8c_dc_top( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_top( uint8_t *src )
{
int y, x;
uint32_t s0 = 0, s1 = 0;
src += FDEC_STRIDE;\
}
-static void predict_16x16_dc( uint8_t *src, int i_stride )
+static void predict_16x16_dc( uint8_t *src )
{
uint32_t dc = 0;
int i;
PREDICT_16x16_DC(dc);
}
-static void predict_16x16_dc_left( uint8_t *src, int i_stride )
+static void predict_16x16_dc_left( uint8_t *src )
{
uint32_t dc = 0;
int i;
PREDICT_16x16_DC(dc);
}
-static void predict_16x16_dc_top( uint8_t *src, int i_stride )
+static void predict_16x16_dc_top( uint8_t *src )
{
uint32_t dc = 0;
int i;
PREDICT_16x16_DC(dc);
}
-static void predict_16x16_dc_128( uint8_t *src, int i_stride )
+static void predict_16x16_dc_128( uint8_t *src )
{
int i;
PREDICT_16x16_DC(0x80808080);
}
-static void predict_16x16_h( uint8_t *src, int i_stride )
+static void predict_16x16_h( uint8_t *src )
{
int i;
}
}
-static void predict_16x16_v( uint8_t *src, int i_stride )
+static void predict_16x16_v( uint8_t *src )
{
uint32_t v0 = *(uint32_t*)&src[ 0-FDEC_STRIDE];
uint32_t v1 = *(uint32_t*)&src[ 4-FDEC_STRIDE];
src += FDEC_STRIDE;
}
}
-static void predict_16x16_p( uint8_t *src, int i_stride )
+static void predict_16x16_p( uint8_t *src )
{
int x, y, i;
int a, b, c;
* 8x8 prediction for intra chroma block
****************************************************************************/
-static void predict_8x8c_dc_128( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_128( uint8_t *src )
{
int y;
src += FDEC_STRIDE;
}
}
-static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_left( uint8_t *src )
{
int y;
uint32_t dc0 = 0, dc1 = 0;
}
}
-static void predict_8x8c_dc_top( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_top( uint8_t *src )
{
int y, x;
uint32_t dc0 = 0, dc1 = 0;
src += FDEC_STRIDE;
}
}
-static void predict_8x8c_dc( uint8_t *src, int i_stride )
+static void predict_8x8c_dc( uint8_t *src )
{
int y;
int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
src += FDEC_STRIDE;
}
}
-static void predict_8x8c_h( uint8_t *src, int i_stride )
+static void predict_8x8c_h( uint8_t *src )
{
int i;
src += FDEC_STRIDE;
}
}
-static void predict_8x8c_v( uint8_t *src, int i_stride )
+static void predict_8x8c_v( uint8_t *src )
{
uint32_t v0 = *(uint32_t*)&src[0-FDEC_STRIDE];
uint32_t v1 = *(uint32_t*)&src[4-FDEC_STRIDE];
src += FDEC_STRIDE;
}
}
-static void predict_8x8c_p( uint8_t *src, int i_stride )
+static void predict_8x8c_p( uint8_t *src )
{
int i;
int x,y;
}\
}
-static void predict_4x4_dc_128( uint8_t *src, int i_stride )
+static void predict_4x4_dc_128( uint8_t *src )
{
PREDICT_4x4_DC(0x80808080);
}
-static void predict_4x4_dc_left( uint8_t *src, int i_stride )
+static void predict_4x4_dc_left( uint8_t *src )
{
uint32_t dc = (( src[-1+0*FDEC_STRIDE] + src[-1+FDEC_STRIDE]+
src[-1+2*FDEC_STRIDE] + src[-1+3*FDEC_STRIDE] + 2 ) >> 2)*0x01010101;
PREDICT_4x4_DC(dc);
}
-static void predict_4x4_dc_top( uint8_t *src, int i_stride )
+static void predict_4x4_dc_top( uint8_t *src )
{
uint32_t dc = (( src[0 - FDEC_STRIDE] + src[1 - FDEC_STRIDE] +
src[2 - FDEC_STRIDE] + src[3 - FDEC_STRIDE] + 2 ) >> 2)*0x01010101;
PREDICT_4x4_DC(dc);
}
-static void predict_4x4_dc( uint8_t *src, int i_stride )
+static void predict_4x4_dc( uint8_t *src )
{
uint32_t dc = (( src[-1+0*FDEC_STRIDE] + src[-1+FDEC_STRIDE] +
src[-1+2*FDEC_STRIDE] + src[-1+3*FDEC_STRIDE] +
src[2 - FDEC_STRIDE] + src[3 - FDEC_STRIDE] + 4 ) >> 3)*0x01010101;
PREDICT_4x4_DC(dc);
}
-static void predict_4x4_h( uint8_t *src, int i_stride )
+static void predict_4x4_h( uint8_t *src )
{
int i;
src += FDEC_STRIDE;
}
}
-static void predict_4x4_v( uint8_t *src, int i_stride )
+static void predict_4x4_v( uint8_t *src )
{
uint32_t top = *((uint32_t*)&src[-FDEC_STRIDE]);
int i;
const int t6 = src[6-1*FDEC_STRIDE]; \
UNUSED const int t7 = src[7-1*FDEC_STRIDE];
-static void predict_4x4_ddl( uint8_t *src, int i_stride )
+static void predict_4x4_ddl( uint8_t *src )
{
PREDICT_4x4_LOAD_TOP
PREDICT_4x4_LOAD_TOP_RIGHT
src[3*FDEC_STRIDE+3] = ( t6 + 3*t7 + 2 ) >> 2;
}
-static void predict_4x4_ddr( uint8_t *src, int i_stride )
+static void predict_4x4_ddr( uint8_t *src )
{
const int lt = src[-1-FDEC_STRIDE];
PREDICT_4x4_LOAD_LEFT
src[3*FDEC_STRIDE+0] = ( l1 + 2 * l2 + l3 + 2 ) >> 2;
}
-static void predict_4x4_vr( uint8_t *src, int i_stride )
+static void predict_4x4_vr( uint8_t *src )
{
const int lt = src[-1-FDEC_STRIDE];
PREDICT_4x4_LOAD_LEFT
src[3*FDEC_STRIDE+0]= ( l0 + 2 * l1 + l2 + 2 ) >> 2;
}
-static void predict_4x4_hd( uint8_t *src, int i_stride )
+static void predict_4x4_hd( uint8_t *src )
{
const int lt= src[-1-1*FDEC_STRIDE];
PREDICT_4x4_LOAD_LEFT
src[3*FDEC_STRIDE+1]= ( l1 + 2 * l2 + l3 + 2 ) >> 2;
}
-static void predict_4x4_vl( uint8_t *src, int i_stride )
+static void predict_4x4_vl( uint8_t *src )
{
PREDICT_4x4_LOAD_TOP
PREDICT_4x4_LOAD_TOP_RIGHT
src[3*FDEC_STRIDE+3]= ( t4 + 2 * t5 + t6 + 2 ) >> 2;
}
-static void predict_4x4_hu( uint8_t *src, int i_stride )
+static void predict_4x4_hu( uint8_t *src )
{
PREDICT_4x4_LOAD_LEFT
src += FDEC_STRIDE; \
}
-static void predict_8x8_dc_128( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc_128( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_DC(0x80808080);
}
-static void predict_8x8_dc_left( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc_left( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_LEFT
const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
PREDICT_8x8_DC(dc);
}
-static void predict_8x8_dc_top( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc_top( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP
const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
PREDICT_8x8_DC(dc);
}
-static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_LEFT
PREDICT_8x8_LOAD_TOP
+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
PREDICT_8x8_DC(dc);
}
-static void predict_8x8_h( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_h( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_LEFT
#define ROW(y) ((uint32_t*)(src+y*FDEC_STRIDE))[0] =\
ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
#undef ROW
}
-static void predict_8x8_v( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_v( uint8_t *src, int i_neighbor )
{
int y;
PREDICT_8x8_LOAD_TOP;
for( y = 1; y < 8; y++ )
*(uint64_t*)(src+y*FDEC_STRIDE) = *(uint64_t*)src;
}
-static void predict_8x8_ddl( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_ddl( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_TOPRIGHT
SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
}
-static void predict_8x8_ddr( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_ddr( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
}
-static void predict_8x8_vr( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_vr( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
SRC(7,0)= (t6 + t7 + 1) >> 1;
}
-static void predict_8x8_hd( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_hd( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
}
-static void predict_8x8_vl( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_vl( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_TOPRIGHT
SRC(7,6)= (t10 + t11 + 1) >> 1;
SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
}
-static void predict_8x8_hu( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_hu( uint8_t *src, int i_neighbor )
{
PREDICT_8x8_LOAD_LEFT
SRC(0,0)= (l0 + l1 + 1) >> 1;
#ifndef _PREDICT_H
#define _PREDICT_H 1
-typedef void (*x264_predict_t)( uint8_t *src, int i_stride );
-typedef void (*x264_predict8x8_t)( uint8_t *src, int i_stride, int i_neighbor );
+typedef void (*x264_predict_t)( uint8_t *src );
+typedef void (*x264_predict8x8_t)( uint8_t *src, int i_neighbor );
enum intra_chroma_pred_e
{
i_mode = predict_mode[i];
/* we do the prediction */
- h->predict_8x8c[i_mode]( p_dstc[0], FDEC_STRIDE );
- h->predict_8x8c[i_mode]( p_dstc[1], FDEC_STRIDE );
+ h->predict_8x8c[i_mode]( p_dstc[0] );
+ h->predict_8x8c[i_mode]( p_dstc[1] );
/* we calculate the cost */
i_sad = h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], FDEC_STRIDE,
int i_mode;
i_mode = predict_mode[i];
- h->predict_16x16[i_mode]( p_dst, FDEC_STRIDE );
+ h->predict_16x16[i_mode]( p_dst );
i_sad = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
int i_mode;
i_mode = predict_mode[i];
- h->predict_4x4[i_mode]( p_dst_by, FDEC_STRIDE );
+ h->predict_4x4[i_mode]( p_dst_by );
i_sad = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE,
p_src_by, FENC_STRIDE )
a->i_sad_i4x4 += i_best;
/* we need to encode this block now (for next ones) */
- h->predict_4x4[a->i_predict4x4[x][y]]( p_dst_by, FDEC_STRIDE );
+ h->predict_4x4[a->i_predict4x4[x][y]]( p_dst_by );
x264_mb_encode_i4x4( h, idx, a->i_qp );
h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[x][y];
int i_mode;
i_mode = predict_mode[i];
- h->predict_8x8[i_mode]( p_dst_by, FDEC_STRIDE, h->mb.i_neighbour8[idx] );
+ h->predict_8x8[i_mode]( p_dst_by, h->mb.i_neighbour8[idx] );
/* could use sa8d, but it doesn't seem worth the speed cost (without mmx at least) */
i_sad = h->pixf.mbcmp[PIXEL_8x8]( p_dst_by, FDEC_STRIDE,
a->i_sad_i8x8 += i_best;
/* we need to encode this block now (for next ones) */
- h->predict_8x8[a->i_predict8x8[x][y]]( p_dst_by, FDEC_STRIDE, h->mb.i_neighbour );
+ h->predict_8x8[a->i_predict8x8[x][y]]( p_dst_by, h->mb.i_neighbour );
x264_mb_encode_i8x8( h, idx, a->i_qp );
x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[x][y] );
const int i_mode = h->mb.i_intra16x16_pred_mode;
h->mb.b_transform_8x8 = 0;
/* do the right prediction */
- h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], FDEC_STRIDE );
+ h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
/* encode the 16x16 macroblock */
x264_mb_encode_i16x16( h, i_qp );
h->mb.b_transform_8x8 = 1;
for( i = 0; i < 4; i++ )
{
- const int i_dst = FDEC_STRIDE;
- uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
+ uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
- h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
+ h->predict_8x8[i_mode]( p_dst, h->mb.i_neighbour8[i] );
x264_mb_encode_i8x8( h, i, i_qp );
}
}
h->mb.b_transform_8x8 = 0;
for( i = 0; i < 16; i++ )
{
- const int i_dst = FDEC_STRIDE;
- uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
+ uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * FDEC_STRIDE];
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
/* emulate missing topright samples */
- *(uint32_t*) &p_dst[4 - i_dst] = p_dst[3 - i_dst] * 0x01010101U;
+ *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
- h->predict_4x4[i_mode]( p_dst, i_dst );
+ h->predict_4x4[i_mode]( p_dst );
x264_mb_encode_i4x4( h, i, i_qp );
}
}
if( IS_INTRA( h->mb.i_type ) )
{
const int i_mode = h->mb.i_chroma_pred_mode;
- h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], FDEC_STRIDE );
- h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], FDEC_STRIDE );
+ h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
+ h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
}
/* encode the 8x8 blocks */
for( i = I_PRED_CHROMA_DC; i <= I_PRED_CHROMA_P; i++ )
{
int i_cost;
- h->predict_8x8c[i]( pix, FDEC_STRIDE );
+ h->predict_8x8c[i]( pix );
i_cost = h->pixf.mbcmp[PIXEL_8x8]( pix, FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE ) + intra_penalty;
i_bcost = X264_MIN( i_bcost, i_cost );
}
used_asm = 1; \
memcpy( buf3, buf1, 32*20 );\
memcpy( buf4, buf1, 32*20 );\
- ip_c.name[dir]( buf3+48, FDEC_STRIDE, ##__VA_ARGS__ );\
- ip_a.name[dir]( buf4+48, FDEC_STRIDE, ##__VA_ARGS__ );\
+ ip_c.name[dir]( buf3+48, ##__VA_ARGS__ );\
+ ip_a.name[dir]( buf4+48, ##__VA_ARGS__ );\
if( memcmp( buf3, buf4, 32*20 ) )\
{\
fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\