}
}
-void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
+static void dequant_4x4_dc( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
{
const int i_qbits = i_qp/6 - 6;
int y;
pf->quant_2x2_dc = quant_2x2_dc;
pf->dequant_4x4 = dequant_4x4;
+ pf->dequant_4x4_dc = dequant_4x4_dc;
pf->dequant_8x8 = dequant_8x8;
pf->denoise_dct = x264_denoise_dct;
pf->quant_4x4 = x264_quant_4x4_mmx;
pf->quant_8x8 = x264_quant_8x8_mmx;
pf->dequant_4x4 = x264_dequant_4x4_mmx;
+ pf->dequant_4x4_dc = x264_dequant_4x4dc_mmxext;
pf->dequant_8x8 = x264_dequant_8x8_mmx;
if( h->param.i_cqm_preset == X264_CQM_FLAT )
{
pf->quant_4x4 = x264_quant_4x4_sse2;
pf->quant_8x8 = x264_quant_8x8_sse2;
pf->dequant_4x4 = x264_dequant_4x4_sse2;
+ pf->dequant_4x4_dc = x264_dequant_4x4dc_sse2;
pf->dequant_8x8 = x264_dequant_8x8_sse2;
if( h->param.i_cqm_preset == X264_CQM_FLAT )
{
void (*quant_4x4_dc)( int16_t dct[4][4], int mf, int bias );
void (*quant_2x2_dc)( int16_t dct[2][2], int mf, int bias );
- void (*dequant_4x4)( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp );
void (*dequant_8x8)( int16_t dct[8][8], int dequant_mf[6][8][8], int i_qp );
+ void (*dequant_4x4)( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp );
+ void (*dequant_4x4_dc)( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp );
void (*denoise_dct)( int16_t *dct, uint32_t *sum, uint16_t *offset, int size );
void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf );
-void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qscale );
-
#endif
%define t2d r1d
%endif
-;-----------------------------------------------------------------------------
-; void x264_dequant_4x4_mmx( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
-;-----------------------------------------------------------------------------
-%macro DEQUANT 4
-cglobal x264_dequant_%2x%2_%1, 0,3
+%macro DEQUANT_START 2
movifnidn t2d, r2m
imul t0d, t2d, 0x2b
shr t0d, 8 ; i_qbits = i_qp / 6
lea t1, [t0*3]
sub t2d, t1d
sub t2d, t1d ; i_mf = i_qp % 6
- shl t2d, %3+2
+ shl t2d, %1
%ifdef ARCH_X86_64
add r1, t2 ; dequant_mf[i_mf]
%else
add r1, r1m ; dequant_mf[i_mf]
mov r0, r0m ; dct
%endif
- sub t0d, %3
+ sub t0d, %2
jl .rshift32 ; negative qbits => rightshift
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void x264_dequant_4x4_mmx( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
+;-----------------------------------------------------------------------------
+%macro DEQUANT 4
+cglobal x264_dequant_%2x%2_%1, 0,3
+ DEQUANT_START %3+2, %3
.lshift:
movd m5, t0d
DEQUANT sse2, 4, 4, 2
DEQUANT sse2, 8, 6, 2
+%macro DEQUANT_DC 1
+cglobal x264_dequant_4x4dc_%1, 0,3
+ DEQUANT_START 6, 6
+
+.lshift:
+ movd m6, [r1]
+ movd m5, t0d
+ pslld m6, m5
+%if mmsize==16
+ pshuflw m6, m6, 0
+ punpcklqdq m6, m6
+%else
+ pshufw m6, m6, 0
+%endif
+%assign x 0
+%rep 16/mmsize
+ mova m0, [r0+mmsize*0+x]
+ mova m1, [r0+mmsize*1+x]
+ pmullw m0, m6
+ pmullw m1, m6
+ mova [r0+mmsize*0+x], m0
+ mova [r0+mmsize*1+x], m1
+%assign x x+mmsize*2
+%endrep
+ RET
+.rshift32:
+ neg t0d
+ movd m5, t0d
+ mova m6, [pw_1 GLOBAL]
+ mova m7, m6
+ pslld m6, m5
+ psrld m6, 1
+ movd m4, [r1]
+%if mmsize==8
+ punpcklwd m4, m4
+%else
+ pshuflw m4, m4, 0
+%endif
+ punpcklwd m4, m6
+%assign x 0
+%rep 32/mmsize
+ mova m0, [r0+x]
+ mova m1, m0
+ punpcklwd m0, m7
+ punpckhwd m1, m7
+ pmaddwd m0, m4
+ pmaddwd m1, m4
+ psrad m0, m5
+ psrad m1, m5
+ packssdw m0, m1
+ mova [r0+x], m0
+%assign x x+mmsize
+%endrep
+ RET
+%endmacro
+
+INIT_MMX
+DEQUANT_DC mmxext
+INIT_XMM
+DEQUANT_DC sse2
;-----------------------------------------------------------------------------
; void x264_denoise_dct_mmx( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
void x264_quant_4x4_ssse3( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] );
void x264_quant_8x8_ssse3( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64] );
void x264_dequant_4x4_mmx( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp );
+void x264_dequant_4x4dc_mmxext( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp );
void x264_dequant_8x8_mmx( int16_t dct[8][8], int dequant_mf[6][8][8], int i_qp );
void x264_dequant_4x4_sse2( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp );
+void x264_dequant_4x4dc_sse2( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp );
void x264_dequant_8x8_sse2( int16_t dct[8][8], int dequant_mf[6][8][8], int i_qp );
void x264_dequant_4x4_flat16_mmx( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp );
void x264_dequant_8x8_flat16_mmx( int16_t dct[8][8], int dequant_mf[6][8][8], int i_qp );
/* output samples to fdec */
h->dctf.idct4x4dc( dct_dc4x4 );
- x264_mb_dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp ); /* XXX not inversed */
+ h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp ); /* XXX not inversed */
/* calculate dct coeffs */
for( i = 0; i < 16; i++ )
for( qp = 51; qp > 0; qp-- ) \
{ \
INIT_QUANT##w() \
- call_c( qf_c.qname, (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
+ call_c1( qf_c.qname, (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
memcpy( dct2, dct1, w*w*2 ); \
call_c1( qf_c.dqname, (void*)dct1, h->dequant##w##_mf[block], qp ); \
call_a1( qf_a.dqname, (void*)dct2, h->dequant##w##_mf[block], qp ); \
TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4IY, 4 );
TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4PY, 4 );
+#define TEST_DEQUANT_DC( qname, dqname, block, w ) \
+ if( qf_a.dqname != qf_ref.dqname ) \
+ { \
+ set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
+ used_asms[1] = 1; \
+ for( qp = 51; qp > 0; qp-- ) \
+ { \
+ for( i = 0; i < 16; i++ ) \
+ dct1[i] = rand(); \
+ call_c1( qf_c.qname, (void*)dct1, h->quant##w##_mf[block][qp][0]>>1, h->quant##w##_bias[block][qp][0]>>1 ); \
+ memcpy( dct2, dct1, w*w*2 ); \
+ call_c1( qf_c.dqname, (void*)dct1, h->dequant##w##_mf[block], qp ); \
+ call_a1( qf_a.dqname, (void*)dct2, h->dequant##w##_mf[block], qp ); \
+ if( memcmp( dct1, dct2, w*w*2 ) ) \
+ { \
+ oks[1] = 0; \
+ fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
+ } \
+ call_c2( qf_c.dqname, (void*)dct1, h->dequant##w##_mf[block], qp ); \
+ call_a2( qf_a.dqname, (void*)dct2, h->dequant##w##_mf[block], qp ); \
+ } \
+ }
+
+ TEST_DEQUANT_DC( quant_4x4_dc, dequant_4x4_dc, CQM_4IY, 4 );
+
x264_cqm_delete( h );
}