void x264_predict_8x8c_v_neon( uint8_t *src );
void x264_predict_8x8c_p_neon( uint8_t *src );
-void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[33] );
-void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[33] );
+void x264_predict_8x8_dc_neon( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_h_neon( uint8_t *src, uint8_t edge[36] );
void x264_predict_16x16_dc_neon( uint8_t *src );
void x264_predict_16x16_dc_top_neon( uint8_t *src );
for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
{
- /* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
- CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
+ CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
h->intra_border_backup[i][j] += 16;
if( !PARAM_INTERLACED )
h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
#endif // !HIGH_BIT_DEPTH
#define INTRA_MBCMP_8x8( mbcmp, cpu )\
-void x264_intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[33], int res[3] )\
+void x264_intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[36], int res[3] )\
{\
ALIGNED_ARRAY_16( pixel, pix, [8*FDEC_STRIDE] );\
x264_predict_8x8_v_c( pix, edge );\
void (*intra_mbcmp_x3_4x4) ( pixel *fenc, pixel *fdec , int res[3] );
void (*intra_satd_x3_4x4) ( pixel *fenc, pixel *fdec , int res[3] );
void (*intra_sad_x3_4x4) ( pixel *fenc, pixel *fdec , int res[3] );
- void (*intra_mbcmp_x3_8x8) ( pixel *fenc, pixel edge[33], int res[3] );
- void (*intra_sa8d_x3_8x8) ( pixel *fenc, pixel edge[33], int res[3] );
- void (*intra_sad_x3_8x8) ( pixel *fenc, pixel edge[33], int res[3] );
+ void (*intra_mbcmp_x3_8x8) ( pixel *fenc, pixel edge[36], int res[3] );
+ void (*intra_sa8d_x3_8x8) ( pixel *fenc, pixel edge[36], int res[3] );
+ void (*intra_sad_x3_8x8) ( pixel *fenc, pixel edge[36], int res[3] );
} x264_pixel_function_t;
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
#define INTRA_MBCMP_8x8( mbcmp )\
-void intra_##mbcmp##_x3_8x8_altivec( uint8_t *fenc, uint8_t edge[33], int res[3] )\
+void intra_##mbcmp##_x3_8x8_altivec( uint8_t *fenc, uint8_t edge[36], int res[3] )\
{\
ALIGNED_8( uint8_t pix[8*FDEC_STRIDE] );\
x264_predict_8x8_v_c( pix, edge );\
#define PT(x) \
edge[16+x] = F2(SRC(x-1,-1), SRC(x,-1), SRC(x+1,-1));
-static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbor, int i_filters )
+static void x264_predict_8x8_filter_c( pixel *src, pixel edge[36], int i_neighbor, int i_filters )
{
/* edge[7..14] = l7..l0
* edge[15] = lt
src += FDEC_STRIDE; \
}
-static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
}
-static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_LEFT
pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3 );
PREDICT_8x8_DC( dc );
}
-static void x264_predict_8x8_dc_top_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_dc_top_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_TOP
pixel4 dc = PIXEL_SPLAT_X4( (t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3 );
PREDICT_8x8_DC( dc );
}
-void x264_predict_8x8_dc_c( pixel *src, pixel edge[33] )
+void x264_predict_8x8_dc_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_LEFT
PREDICT_8x8_LOAD_TOP
pixel4 dc = PIXEL_SPLAT_X4( (l0+l1+l2+l3+l4+l5+l6+l7+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4 );
PREDICT_8x8_DC( dc );
}
-void x264_predict_8x8_h_c( pixel *src, pixel edge[33] )
+void x264_predict_8x8_h_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_LEFT
#define ROW(y) MPIXEL_X4( src+y*FDEC_STRIDE+0 ) =\
ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
#undef ROW
}
-void x264_predict_8x8_v_c( pixel *src, pixel edge[33] )
+void x264_predict_8x8_v_c( pixel *src, pixel edge[36] )
{
pixel4 top[2] = { MPIXEL_X4( edge+16 ),
MPIXEL_X4( edge+20 ) };
MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1];
}
}
-static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_TOPRIGHT
SRC(6,7)=SRC(7,6)= F2(t13,t14,t15);
SRC(7,7)= F2(t14,t15,t15);
}
-static void x264_predict_8x8_ddr_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_ddr_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
SRC(7,0)= F2(t5,t6,t7);
}
-static void x264_predict_8x8_vr_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_vr_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
SRC(7,1)= F2(t5,t6,t7);
SRC(7,0)= F1(t6,t7);
}
-static void x264_predict_8x8_hd_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_hd_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
SRC_X4(4,1)= pack_pixel_2to4(p9,p10);
SRC_X4(4,0)= pack_pixel_2to4(p10,p11);
}
-static void x264_predict_8x8_vl_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_vl_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_TOPRIGHT
SRC(7,6)= F1(t10,t11);
SRC(7,7)= F2(t10,t11,t12);
}
-static void x264_predict_8x8_hu_c( pixel *src, pixel edge[33] )
+static void x264_predict_8x8_hu_c( pixel *src, pixel edge[36] )
{
PREDICT_8x8_LOAD_LEFT
int p1 = pack_pixel_1to2(F1(l0,l1), F2(l0,l1,l2));
#define X264_PREDICT_H
typedef void (*x264_predict_t)( pixel *src );
-typedef void (*x264_predict8x8_t)( pixel *src, pixel edge[33] );
-typedef void (*x264_predict_8x8_filter_t) ( pixel *src, pixel edge[33], int i_neighbor, int i_filters );
+typedef void (*x264_predict8x8_t)( pixel *src, pixel edge[36] );
+typedef void (*x264_predict_8x8_filter_t) ( pixel *src, pixel edge[36], int i_neighbor, int i_filters );
enum intra_chroma_pred_e
{
I_PRED_8x8_DC_128 = 11,
};
-void x264_predict_8x8_dc_c ( pixel *src, pixel edge[33] );
-void x264_predict_8x8_h_c ( pixel *src, pixel edge[33] );
-void x264_predict_8x8_v_c ( pixel *src, pixel edge[33] );
+void x264_predict_8x8_dc_c ( pixel *src, pixel edge[36] );
+void x264_predict_8x8_h_c ( pixel *src, pixel edge[36] );
+void x264_predict_8x8_v_c ( pixel *src, pixel edge[36] );
void x264_predict_4x4_dc_c ( pixel *src );
void x264_predict_4x4_h_c ( pixel *src );
void x264_predict_4x4_v_c ( pixel *src );
pb_00s_ff: times 8 db 0
pb_0s_ff: times 7 db 0
db 0xff
+shuf_fixtr: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
+shuf_nop: db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
SECTION .text
%macro PREDICT_FILTER 5
;-----------------------------------------------------------------------------
-;void predict_8x8_filter( pixel *src, pixel edge[33], int i_neighbor, int i_filters )
+;void predict_8x8_filter( pixel *src, pixel edge[36], int i_neighbor, int i_filters )
;-----------------------------------------------------------------------------
-cglobal predict_8x8_filter, 4,5,7
+cglobal predict_8x8_filter, 4,6,6
add r0, 0x58*SIZEOF_PIXEL
%define src r0-0x58*SIZEOF_PIXEL
%ifndef ARCH_X86_64
%define t1 r1
%define t4 r4
%endif
- test r3b, 0x01
+ test r3b, 1
je .check_top
+ mov t4d, r2d
+ and t4d, 8
+ neg t4
mova m0, [src+0*FDEC_STRIDEB-8*SIZEOF_PIXEL]
- punpckh%1%2 m0, [src-1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
+ punpckh%1%2 m0, [src+0*FDEC_STRIDEB-8*SIZEOF_PIXEL+t4*(FDEC_STRIDEB/8)]
mova m1, [src+2*FDEC_STRIDEB-8*SIZEOF_PIXEL]
punpckh%1%2 m1, [src+1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
punpckh%2%3 m1, m0
mova m2, m3
PALIGNR m4, m0, 7*SIZEOF_PIXEL, m0
PALIGNR m1, m2, 1*SIZEOF_PIXEL, m2
- test r2b, 0x08
- je .fix_lt_1
-.do_left:
- mova m0, m4
PRED8x8_LOWPASS %1, m2, m1, m4, m3, m5
mova [t1+8*SIZEOF_PIXEL], m2
- mova m4, m0
- PRED8x8_LOWPASS %1, m1, m3, m0, m4, m5
- movd t4, m1
+ movzx t4d, pixel [src+7*FDEC_STRIDEB-1*SIZEOF_PIXEL]
+ movzx r5d, pixel [src+6*FDEC_STRIDEB-1*SIZEOF_PIXEL]
+ lea t4d, [t4*3+2]
+ add t4d, r5d
+ shr t4d, 2
mov [t1+7*SIZEOF_PIXEL], t4%1
-.check_top:
- test r3b, 0x02
+ test r3b, 2
je .done
+.check_top:
+%if SIZEOF_PIXEL==1 && cpuflag(ssse3)
+INIT_XMM cpuname
+ movu m3, [src-1*FDEC_STRIDEB]
+ movhps m0, [src-1*FDEC_STRIDEB-8]
+ test r2b, 8
+ je .fix_lt_2
+.do_top:
+ and r2d, 4
+%ifdef PIC
+ lea r3, [shuf_fixtr]
+ pshufb m3, [r3+r2*4]
+%else
+ pshufb m3, [shuf_fixtr+r2*4] ; neighbor&MB_TOPRIGHT ? shuf_nop : shuf_fixtr
+%endif
+ psrldq m1, m3, 15
+ PALIGNR m2, m3, m0, 15, m0
+ PALIGNR m1, m3, 1, m5
+ PRED8x8_LOWPASS %1, m0, m2, m1, m3, m5
+ mova [t1+16*SIZEOF_PIXEL], m0
+ psrldq m0, 15
+ movd [t1+32*SIZEOF_PIXEL], m0
+.done:
+ REP_RET
+.fix_lt_2:
+ pslldq m0, m3, 15
+ jmp .do_top
+
+%else
mova m0, [src-1*FDEC_STRIDEB-8*SIZEOF_PIXEL]
mova m3, [src-1*FDEC_STRIDEB]
mova m1, [src-1*FDEC_STRIDEB+8*SIZEOF_PIXEL]
- mova m2, m3
- mova m4, m3
- PALIGNR m2, m0, 7*SIZEOF_PIXEL, m0
- PALIGNR m1, m4, 1*SIZEOF_PIXEL, m4
- test r2b, 0x08
+ test r2b, 8
je .fix_lt_2
- test r2b, 0x04
+ test r2b, 4
je .fix_tr_1
.do_top:
- PRED8x8_LOWPASS %1, m4, m2, m1, m3, m5
+ PALIGNR m2, m3, m0, 7*SIZEOF_PIXEL, m0
+ PALIGNR m0, m1, m3, 1*SIZEOF_PIXEL, m5
+ PRED8x8_LOWPASS %1, m4, m2, m0, m3, m5
mova [t1+16*SIZEOF_PIXEL], m4
- test r3b, 0x04
+ test r3b, 4
je .done
- test r2b, 0x04
- je .fix_tr_2
- mova m0, [src-1*FDEC_STRIDEB+8*SIZEOF_PIXEL]
- mova m2, m0
- mova m4, m0
- psrl%4 m5, m0, 7*%5
+ mova m2, m1
+ mova m4, m1
+ psrl%4 m5, m1, 7*%5
PALIGNR m2, m3, 7*SIZEOF_PIXEL, m3
PALIGNR m5, m4, 1*SIZEOF_PIXEL, m4
- PRED8x8_LOWPASS %1, m1, m2, m5, m0, m4
- jmp .do_topright
-.fix_tr_2:
- punpckh%1%2 m3, m3
- pshuf%2 m1, m3, q3333
-.do_topright:
- mova [t1+24*SIZEOF_PIXEL], m1
- psrl%4 m1, 7*%5
- movd t4, m1
- mov [t1+32*SIZEOF_PIXEL], t4%1
+ PRED8x8_LOWPASS %1, m0, m2, m5, m1, m4
+ mova [t1+24*SIZEOF_PIXEL], m0
+ psrl%4 m0, 7*%5
+ movd [t1+32*SIZEOF_PIXEL], m0
.done:
REP_RET
-.fix_lt_1:
- pxor m5, m3, m4
- psrl%4 m5, 7*%5
- psll%4 m5, 6*%5
- pxor m1, m5
- jmp .do_left
.fix_lt_2:
- pxor m5, m3, m2
- psll%4 m5, 7*%5
- psrl%4 m5, 7*%5
- pxor m2, m5
- test r2b, 0x04
+ psll%4 m0, m3, 7*%5
+ test r2b, 4
jne .do_top
.fix_tr_1:
- pxor m5, m3, m1
- psrl%4 m5, 7*%5
- psll%4 m5, 7*%5
- pxor m1, m5
+ punpckh%1%2 m1, m3, m3
+ pshuf%2 m1, m1, q3333
jmp .do_top
+%endif
%endmacro
%ifdef HIGH_BIT_DEPTH
%endif
;-----------------------------------------------------------------------------
-; void predict_8x8_h( pixel *src, pixel edge[33] )
+; void predict_8x8_h( pixel *src, pixel edge[36] )
;-----------------------------------------------------------------------------
%macro PREDICT_8x8_H 2
cglobal predict_8x8_h, 2,2
void x264_predict_8x8c_h_mmx2( uint8_t *src );
void x264_predict_8x8c_h_sse2( pixel *src );
void x264_predict_8x8c_h_ssse3( uint8_t *src );
- void x264_predict_8x8_v_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_h_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_hd_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_hu_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_dc_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_dc_top_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_dc_left_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_ddl_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[33] );
- void x264_predict_8x8_ddl_avx( pixel *src, pixel edge[33] );
- void x264_predict_8x8_ddr_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[33] );
- void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[33] );
- void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_vl_avx( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[33] );
- void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[33] );
- void x264_predict_8x8_vr_ssse3( uint16_t *src, uint16_t edge[33] );
- void x264_predict_8x8_vr_avx( pixel *src, pixel edge[33] );
- void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[33] );
- void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[33] );
- void x264_predict_8x8_hu_avx( pixel *src, pixel edge[33] );
- void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[33] );
- void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[33] );
- void x264_predict_8x8_hd_avx( pixel *src, pixel edge[33] );
- void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );
- void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[33], int i_neighbor, int i_filters );
- void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[33], int i_neighbor, int i_filters );
- void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[33], int i_neighbor, int i_filters );
+ void x264_predict_8x8_v_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[36] );
+ void x264_predict_8x8_h_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[36] );
+ void x264_predict_8x8_hd_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_hu_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_dc_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[36] );
+ void x264_predict_8x8_dc_top_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[36] );
+ void x264_predict_8x8_dc_left_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[36] );
+ void x264_predict_8x8_ddl_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_ddl_avx( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_ddr_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_vl_avx( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[36] );
+ void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_vr_ssse3( uint16_t *src, uint16_t edge[36] );
+ void x264_predict_8x8_vr_avx( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_hu_avx( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_hd_avx( pixel *src, pixel edge[36] );
+ void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[36], int i_neighbor, int i_filters );
+ void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters );
+ void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[36], int i_neighbor, int i_filters );
+ void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters );
void x264_predict_4x4_ddl_mmx2( pixel *src );
void x264_predict_4x4_ddl_sse2( uint16_t *src );
void x264_predict_4x4_ddl_avx( uint16_t *src );
t=g; g+=h; h-=t;
#define INTRA_SA8D_X3(cpu)\
-void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )\
+void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[36], int res[3] )\
{\
PREDICT_8x8_LOAD_TOP\
PREDICT_8x8_LOAD_LEFT\
INTRA_SADx3_4x4
;-----------------------------------------------------------------------------
-; void intra_sad_x3_8x8( uint8_t *fenc, uint8_t edge[33], int res[3]);
+; void intra_sad_x3_8x8( uint8_t *fenc, uint8_t edge[36], int res[3]);
;-----------------------------------------------------------------------------
;m0 = DC
/* 8x8 prediction selection */
if( flags & X264_ANALYSE_I8x8 )
{
- ALIGNED_ARRAY_16( pixel, edge,[33] );
+ ALIGNED_ARRAY_16( pixel, edge,[36] );
x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
pixel *p_src = &h->mb.pic.p_fenc[p][8*x + 8*y*FENC_STRIDE];
pixel *p_dst = &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE];
ALIGNED_ARRAY_16( dctcoef, dct8x8,[64] );
- ALIGNED_ARRAY_16( pixel, edge_buf,[33] );
+ ALIGNED_ARRAY_16( pixel, edge_buf,[36] );
if( !edge )
{
h->predict_4x4[i_mode]( p_dst );
}
-void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[33] )
+void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[36] )
{
int stride = h->fenc->i_stride[p] << MB_INTERLACED;
pixel *p_src = h->mb.pic.p_fenc_plane[p] + (idx&1)*8 + (idx>>1)*8*stride;
void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode );
void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode );
-void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[33] );
+void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[36] );
void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode );
void x264_macroblock_encode ( x264_t *h );
lowres_intra_mb:
if( !fenc->b_intra_calculated )
{
- ALIGNED_ARRAY_16( pixel, edge,[33] );
+ ALIGNED_ARRAY_16( pixel, edge,[36] );
pixel *pix = &pix1[8+FDEC_STRIDE - 1];
pixel *src = &fenc->lowres[0][i_pel_offset - 1];
const int intra_penalty = 5 * a->i_lambda;
x264_pixel_function_t pixel_asm;
x264_predict8x8_t predict_8x8[9+3];
x264_predict_8x8_filter_t predict_8x8_filter;
- ALIGNED_16( pixel edge[33] );
+ ALIGNED_16( pixel edge[36] );
uint16_t cost_mv[32];
int ret = 0, ok, used_asm;
static int check_intra( int cpu_ref, int cpu_new )
{
int ret = 0, ok = 1, used_asm = 0;
- ALIGNED_16( pixel edge[33] );
- ALIGNED_16( pixel edge2[33] );
+ ALIGNED_16( pixel edge[36] );
+ ALIGNED_16( pixel edge2[36] );
ALIGNED_16( pixel fdec[FDEC_STRIDE*20] );
struct
{
used_asm = 1;
for( int i = 0; i < 32; i++ )
{
- memcpy( edge2, edge, 33 * sizeof(pixel) );
- call_c(ip_c.predict_8x8_filter, pbuf1+48, edge, (i&24)>>1, i&7);
- call_a(ip_a.predict_8x8_filter, pbuf1+48, edge2, (i&24)>>1, i&7);
- if( memcmp( edge, edge2, 33 * sizeof(pixel) ) )
+ if( !(i&7) || ((i&MB_TOPRIGHT) && !(i&MB_TOP)) )
+ continue;
+ int neighbor = (i&24)>>1;
+ memset( edge, 0, sizeof(edge) );
+ memset( edge2, 0, sizeof(edge2) );
+ call_c( ip_c.predict_8x8_filter, pbuf1+48, edge, neighbor, i&7 );
+ call_a( ip_a.predict_8x8_filter, pbuf1+48, edge2, neighbor, i&7 );
+ if( !(neighbor&MB_TOPLEFT) )
+ edge[15] = edge2[15] = 0;
+ if( memcmp( edge+7, edge2+7, (i&MB_TOPRIGHT ? 26 : i&MB_TOP ? 17 : 8) * sizeof(pixel) ) )
{
fprintf( stderr, "predict_8x8_filter : [FAILED] %d %d\n", (i&24)>>1, i&7);
ok = 0;