#define X264_SCAN8_SIZE (6*8)
#define X264_SCAN8_0 (4+1*8)
-static const int x264_scan8[16+2*4] =
+static const int x264_scan8[16+2*4+3] =
{
/* Luma */
4+1*8, 5+1*8, 4+2*8, 5+2*8,
/* Cr */
1+4*8, 2+4*8,
1+5*8, 2+5*8,
+
+ /* Luma DC */
+ 4+5*8,
+
+ /* Chroma DC */
+ 5+5*8, 6+5*8
};
/*
0 1 2 3 4 5 6 7
2 B B L L L L
3 L L L L
4 R R L L L L
- 5 R R
+ 5 R R DyDuDv
*/
typedef struct x264_ratecontrol_t x264_ratecontrol_t;
return x264_decimate_score_internal( dct, 64 );
}
+static int ALWAYS_INLINE x264_coeff_last_internal( int16_t *l, int i_count )
+{
+ int i_last;
+ for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
+ if( *(uint64_t*)(l+i_last-3) )
+ break;
+ while( i_last >= 0 && l[i_last] == 0 )
+ i_last--;
+ return i_last;
+}
+
+static int x264_coeff_last4( int16_t *l )
+{
+ return x264_coeff_last_internal( l, 4 );
+}
+static int x264_coeff_last15( int16_t *l )
+{
+ return x264_coeff_last_internal( l, 15 );
+}
+static int x264_coeff_last16( int16_t *l )
+{
+ return x264_coeff_last_internal( l, 16 );
+}
+static int x264_coeff_last64( int16_t *l )
+{
+ return x264_coeff_last_internal( l, 64 );
+}
+
void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
{
pf->quant_8x8 = quant_8x8;
pf->decimate_score16 = x264_decimate_score16;
pf->decimate_score64 = x264_decimate_score64;
+ pf->coeff_last[DCT_CHROMA_DC] = x264_coeff_last4;
+ pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15;
+ pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16;
+ pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64;
+
#ifdef HAVE_MMX
if( cpu&X264_CPU_MMX )
{
pf->decimate_score15 = x264_decimate_score15_mmxext;
pf->decimate_score16 = x264_decimate_score16_mmxext;
pf->decimate_score64 = x264_decimate_score64_mmxext;
+ pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_mmxext;
+ pf->coeff_last[ DCT_LUMA_4x4] = x264_coeff_last16_mmxext;
+ pf->coeff_last[ DCT_LUMA_8x8] = x264_coeff_last64_mmxext;
#endif
+ pf->coeff_last[DCT_CHROMA_DC] = x264_coeff_last4_mmxext;
}
if( cpu&X264_CPU_SSE2 )
pf->decimate_score15 = x264_decimate_score15_sse2;
pf->decimate_score16 = x264_decimate_score16_sse2;
pf->decimate_score64 = x264_decimate_score64_sse2;
+ pf->coeff_last[ DCT_LUMA_AC] = x264_coeff_last15_sse2;
+ pf->coeff_last[DCT_LUMA_4x4] = x264_coeff_last16_sse2;
+ pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_sse2;
}
if( cpu&X264_CPU_SSSE3 )
pf->dequant_8x8 = x264_dequant_8x8_altivec;
}
#endif
+ pf->coeff_last[ DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
+ pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
}
int (*decimate_score15)( int16_t *dct );
int (*decimate_score16)( int16_t *dct );
int (*decimate_score64)( int16_t *dct );
+ int (*coeff_last[6])( int16_t *dct );
} x264_quant_function_t;
void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf );
DECIMATE8x8 sse2
DECIMATE8x8 ssse3
+%macro LAST_MASK_SSE2 2-3
+ movdqa xmm0, [%2+ 0]
+ pxor xmm2, xmm2
+ packsswb xmm0, [%2+16]
+ pcmpeqb xmm0, xmm2
+ pmovmskb %1, xmm0
+%endmacro
+
+%macro LAST_MASK_MMX 3
+ movq mm0, [%2+ 0]
+ movq mm1, [%2+16]
+ pxor mm2, mm2
+ packsswb mm0, [%2+ 8]
+ packsswb mm1, [%2+24]
+ pcmpeqb mm0, mm2
+ pcmpeqb mm1, mm2
+ pmovmskb %1, mm0
+ pmovmskb %3, mm1
+ shl %3, 8
+ or %1, %3
+%endmacro
+
+%ifdef ARCH_X86_64
+cglobal x264_coeff_last4_mmxext, 1,1
+ bsr rax, [r0]
+ shr eax, 4
+ RET
+%else
+cglobal x264_coeff_last4_mmxext, 0,3
+ mov edx, r0m
+ mov eax, [edx+4]
+ xor ecx, ecx
+ test eax, eax
+ cmovz eax, [edx]
+ setnz cl
+ bsr eax, eax
+ shr eax, 4
+ lea eax, [eax+ecx*2]
+ RET
+%endif
+
+%macro COEFF_LAST 1
+cglobal x264_coeff_last15_%1, 1,3
+ LAST_MASK r1d, r0-2, r2d
+ xor r1d, 0xffff
+ bsr eax, r1d
+ dec eax
+ RET
+
+cglobal x264_coeff_last16_%1, 1,3
+ LAST_MASK r1d, r0, r2d
+ xor r1d, 0xffff
+ bsr eax, r1d
+ RET
+
+%ifndef ARCH_X86_64
+%ifidn %1, mmxext
+ cglobal x264_coeff_last64_%1, 1,5
+%else
+ cglobal x264_coeff_last64_%1, 1,4
+%endif
+ LAST_MASK r1d, r0, r4d
+ LAST_MASK r2d, r0+32, r4d
+ shl r2d, 16
+ or r1d, r2d
+ LAST_MASK r2d, r0+64, r4d
+ LAST_MASK r3d, r0+96, r4d
+ shl r3d, 16
+ or r2d, r3d
+ not r1d
+ xor r2d, -1
+ jne .secondhalf
+ bsr eax, r1d
+ RET
+.secondhalf:
+ bsr eax, r2d
+ add eax, 32
+ RET
+%endif
+%endmacro
+
+%ifdef ARCH_X86_64
+ cglobal x264_coeff_last64_sse2, 1,4
+ LAST_MASK_SSE2 r1d, r0
+ LAST_MASK_SSE2 r2d, r0+32
+ LAST_MASK_SSE2 r3d, r0+64
+ LAST_MASK_SSE2 r0d, r0+96
+ shl r2d, 16
+ shl r0d, 16
+ or r1d, r2d
+ or r3d, r0d
+ shl r3, 32
+ or r1, r3
+ not r1
+ bsr rax, r1
+ RET
+%endif
+
+%ifndef ARCH_X86_64
+%define LAST_MASK LAST_MASK_MMX
+COEFF_LAST mmxext
+%endif
+%define LAST_MASK LAST_MASK_SSE2
+COEFF_LAST sse2
int x264_decimate_score64_mmxext( int16_t *dct );
int x264_decimate_score64_sse2 ( int16_t *dct );
int x264_decimate_score64_ssse3 ( int16_t *dct );
+int x264_coeff_last4_mmxext( int16_t *dct );
+int x264_coeff_last15_mmxext( int16_t *dct );
+int x264_coeff_last16_mmxext( int16_t *dct );
+int x264_coeff_last64_mmxext( int16_t *dct );
+int x264_coeff_last15_sse2( int16_t *dct );
+int x264_coeff_last16_sse2( int16_t *dct );
+int x264_coeff_last64_sse2( int16_t *dct );
#endif
break;
case DCT_CHROMA_DC:
/* no need to test skip/pcm */
+ i_idx -= 25;
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy - 1;
const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
int i_coeff_abs_m1[64];
- int UNUSED i_coeff_sign[64];
+ int i_coeff_sign[64];
int i_coeff = 0;
- int i_last = 0;
+ int i_last;
int i_sigmap_size;
int node_ctx = 0;
- int i, j;
-
- /* yes this is always aligned, and l[-1] exists in the cases where it's used (ac) */
- for( j = i_count - 4; j >= -1; j -= 4 )
- if( *(uint64_t*)(l+j) )
- break;
+ int i;
if( i_count != 64 )
{
/* coded block flag */
int ctx = 85 + x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx );
- if( j >= -1 )
+ if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )
x264_cabac_encode_decision( cb, ctx, 1 );
else
{
}
}
- for( i = j; i < j+4; i++)
- if( l[i] )
- i_last = i;
+ i_last = h->quantf.coeff_last[i_ctxBlockCat](l);
i_sigmap_size = X264_MIN( i_last+1, i_count-1 );
{\
i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
if( !RDO_SKIP_BS )\
- i_coeff_sign[i_coeff] = l[i] < 0;\
+ i_coeff_sign[i_coeff] = l[i] < 0;\
i_coeff++;\
x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 1 );\
x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), i == i_last );\
cb->f8_bits_encoded += cabac_size_unary[i_prefix][cb->state[ctx]];
cb->state[ctx] = cabac_transition_unary[i_prefix][cb->state[ctx]];
#else
- for( j = 0; j < i_prefix - 1; j++ )
+ for( i = 0; i < i_prefix - 1; i++ )
x264_cabac_encode_decision( cb, ctx, 1 );
if( i_prefix < 14 )
x264_cabac_encode_decision( cb, ctx, 0 );
if( i_mb_type == I_16x16 )
{
/* DC Luma */
- block_residual_write_cabac( h, cb, DCT_LUMA_DC, 0, h->dct.luma16x16_dc, 16 );
+ block_residual_write_cabac( h, cb, DCT_LUMA_DC, 24, h->dct.luma16x16_dc, 16 );
/* AC Luma */
if( h->mb.i_cbp_luma != 0 )
if( h->mb.i_cbp_chroma &0x03 ) /* Chroma DC residual present */
{
- block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 0, h->dct.chroma_dc[0], 4 );
- block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 1, h->dct.chroma_dc[1], 4 );
+ block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 4 );
+ block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 4 );
}
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
{
if( h->mb.i_cbp_luma & (1 << i8) )
{
if( h->mb.b_transform_8x8 )
+ {
+ *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[i8*4+0]] = 0x0101;
+ *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[i8*4+2]] = 0x0101;
block_residual_write_cabac( h, cb, DCT_LUMA_8x8, i8, h->dct.luma8x8[i8], 64 );
+ }
else
{
int i4;
for( i4 = 0; i4 < 4; i4++ )
+ {
+ h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = array_non_zero( h->dct.luma4x4[i4+i8*4] );
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
+ }
}
}
+ else
+ {
+ *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[i8*4+0]] = 0;
+ *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[i8*4+2]] = 0;
+ }
+ h->mb.cache.non_zero_count[x264_scan8[16+i8]] = array_non_zero( h->dct.luma4x4[16+i8] );
+ h->mb.cache.non_zero_count[x264_scan8[20+i8]] = array_non_zero( h->dct.luma4x4[20+i8] );
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 15 );
block_residual_write_cabac( h, cb, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 15 );
static void x264_subpartition_size_cabac( x264_t *h, x264_cabac_t *cb, int i4, int i_pixel )
{
int b_8x4 = i_pixel == PIXEL_8x4;
+ h->mb.cache.non_zero_count[x264_scan8[i4]] = array_non_zero( h->dct.luma4x4[i4+2-b_8x4] );
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
if( i_pixel == PIXEL_4x4 )
x264_cabac_mb_mvd( h, cb, 0, i4, 1, 1 );
else
{
x264_cabac_mb_mvd( h, cb, 0, i4, 1+b_8x4, 2-b_8x4 );
+ h->mb.cache.non_zero_count[x264_scan8[i4+2-b_8x4]] = array_non_zero( h->dct.luma4x4[i4+2-b_8x4] );
block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4+2-b_8x4, h->dct.luma4x4[i4+2-b_8x4], 16 );
}
}
x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
if( nnz )
{
- block_residual_write_cabac( h, cb, DCT_LUMA_8x8, 4*i8, h->dct.luma8x8[i8], 64 );
*(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[i8*4]] = 0x0101;
*(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[i8*4+2]] = 0x0101;
+ block_residual_write_cabac( h, cb, DCT_LUMA_8x8, 4*i8, h->dct.luma8x8[i8], 64 );
}
else
{
const int i_pred = x264_mb_predict_intra4x4_mode( h, i4 );
i_mode = x264_mb_pred_mode4x4_fix( i_mode );
x264_cabac_mb_intra4x4_pred_mode( cb, i_pred, i_mode );
- block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
h->mb.cache.non_zero_count[x264_scan8[i4]] = array_non_zero( h->dct.luma4x4[i4] );
+ block_residual_write_cabac( h, cb, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
}
static void x264_i8x8_chroma_size_cabac( x264_t *h, x264_cabac_t *cb )
x264_cabac_mb_cbp_chroma( h, cb );
if( h->mb.i_cbp_chroma > 0 )
{
- block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 0, h->dct.chroma_dc[0], 4 );
- block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 1, h->dct.chroma_dc[1], 4 );
+ block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 4 );
+ block_residual_write_cabac( h, cb, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 4 );
if( h->mb.i_cbp_chroma == 2 )
{
10, 4, 5, 1, 11, 6, 7, 2, 12, 8, 9, 3, 0
};
-#define BLOCK_INDEX_CHROMA_DC (-1)
-#define BLOCK_INDEX_LUMA_DC (-2)
-
static inline void bs_write_vlc( bs_t *s, vlc_t v )
{
bs_write( s, v.i_size, v.i_bits );
/****************************************************************************
* block_residual_write_cavlc:
****************************************************************************/
-static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *l, int i_count )
+static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, int i_idx, int16_t *l, int i_count )
{
+ static const int ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
int level[16], run[16];
int i_total, i_trailing;
int i_total_zero;
int i_last;
unsigned int i_sign;
int i;
+ int idx = 0;
int i_suffix_length;
+ /* x264_mb_predict_non_zero_code return 0 <-> (16+16+1)>>1 = 16 */
+ int nC = i_idx >= 25 ? 4 : ct_index[x264_mb_predict_non_zero_code( h, i_idx == 24 ? 0 : i_idx )];
- /* first find i_last */
- for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
- if( *(uint64_t*)(l+i_last-3) )
- break;
- while( i_last >= 0 && l[i_last] == 0 )
- i_last--;
+ if( !h->mb.cache.non_zero_count[x264_scan8[i_idx]] )
+ {
+ bs_write_vlc( s, x264_coeff_token[nC][0] );
+ return;
+ }
+ i_last = h->quantf.coeff_last[i_ctxBlockCat](l);
i_sign = 0;
i_total = 0;
i_trailing = 0;
i_total_zero = i_last + 1;
- if( i_last >= 0 )
+ /* level and run and total */
+ while( i_last >= 0 )
{
- int idx = 0;
-
- /* level and run and total */
- while( i_last >= 0 )
- {
- int r = 0;
- level[idx] = l[i_last];
- while( --i_last >= 0 && l[i_last] == 0 )
- r++;
- run[idx++] = r;
- }
+ int r = 0;
+ level[idx] = l[i_last];
+ while( --i_last >= 0 && l[i_last] == 0 )
+ r++;
+ run[idx++] = r;
+ }
- i_total = idx;
- i_total_zero -= idx;
+ i_total = idx;
+ i_total_zero -= idx;
- i_trailing = X264_MIN(3, idx);
- for( idx = 0; idx < i_trailing; idx++ )
+ i_trailing = X264_MIN(3, idx);
+ for( idx = 0; idx < i_trailing; idx++ )
+ {
+ if( (unsigned)(level[idx]+1) > 2 )
{
- if( (unsigned)(level[idx]+1) > 2 )
- {
- i_trailing = idx;
- break;
- }
- i_sign <<= 1;
- i_sign |= level[idx] < 0;
+ i_trailing = idx;
+ break;
}
+ i_sign <<= 1;
+ i_sign |= level[idx] < 0;
}
/* total/trailing */
- if( i_idx == BLOCK_INDEX_CHROMA_DC )
- bs_write_vlc( s, x264_coeff_token[4][i_total*4+i_trailing] );
- else
- {
- /* x264_mb_predict_non_zero_code return 0 <-> (16+16+1)>>1 = 16 */
- static const int ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3 };
- int nC = x264_mb_predict_non_zero_code( h, i_idx == BLOCK_INDEX_LUMA_DC ? 0 : i_idx );
- bs_write_vlc( s, x264_coeff_token[ct_index[nC]][i_total*4+i_trailing] );
- }
-
- if( i_total <= 0 )
- return;
+ bs_write_vlc( s, x264_coeff_token[nC][i_total*4+i_trailing] );
i_suffix_length = i_total > 10 && i_trailing < 3 ? 1 : 0;
if( i_trailing > 0 )
if( i_total < i_count )
{
- if( i_idx == BLOCK_INDEX_CHROMA_DC )
+ if( i_idx >= 25 )
bs_write_vlc( s, x264_total_zeros_dc[i_total-1][i_total_zero] );
else
bs_write_vlc( s, x264_total_zeros[i_total-1][i_total_zero] );
/* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
- && !array_non_zero(h->dct.luma16x16_dc) )
+ && !h->mb.cache.non_zero_count[x264_scan8[24]] )
{
#if !RDO_SKIP_BS
h->mb.i_qp = h->mb.i_last_qp;
for( i4 = 0; i4 < 4; i4++ )
{
h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = array_non_zero_count( h->dct.luma4x4[i4+i8*4] );
- block_residual_write_cavlc( h, s, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
+ block_residual_write_cavlc( h, s, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
}
}
cavlc_qp_delta( h, s );
/* DC Luma */
- block_residual_write_cavlc( h, s, BLOCK_INDEX_LUMA_DC , h->dct.luma16x16_dc, 16 );
+ block_residual_write_cavlc( h, s, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc, 16 );
/* AC Luma */
if( h->mb.i_cbp_luma )
for( i = 0; i < 16; i++ )
{
h->mb.cache.non_zero_count[x264_scan8[i]] = array_non_zero_count( h->dct.luma4x4[i] );
- block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 );
+ block_residual_write_cavlc( h, s, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1, 15 );
}
}
else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
if( h->mb.i_cbp_chroma )
{
/* Chroma DC residual present */
- block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[0], 4 );
- block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 );
+ block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 4 );
+ block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 4 );
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
for( i = 16; i < 24; i++ )
{
h->mb.cache.non_zero_count[x264_scan8[i]] = array_non_zero_count( h->dct.luma4x4[i] );
- block_residual_write_cavlc( h, s, i, h->dct.luma4x4[i]+1, 15 );
+ block_residual_write_cavlc( h, s, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 );
}
}
{
x264_macroblock_luma_write_cavlc( h, &s, i8, i8 );
h->mb.cache.non_zero_count[x264_scan8[16+i8]] = array_non_zero_count( h->dct.luma4x4[16+i8] );
- block_residual_write_cavlc( h, &s, 16+i8, h->dct.luma4x4[16+i8]+1, 15 );
+ block_residual_write_cavlc( h, &s, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1, 15 );
h->mb.cache.non_zero_count[x264_scan8[20+i8]] = array_non_zero_count( h->dct.luma4x4[20+i8] );
- block_residual_write_cavlc( h, &s, 20+i8, h->dct.luma4x4[20+i8]+1, 15 );
+ block_residual_write_cavlc( h, &s, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1, 15 );
i8 += x264_pixel_size[i_pixel].h >> 3;
}
s.i_bits_encoded = 0;
cavlc_mb_mvd( h, &s, 0, i4, 1+b_8x4 );
h->mb.cache.non_zero_count[x264_scan8[i4]] = array_non_zero_count( h->dct.luma4x4[i4] );
- block_residual_write_cavlc( h, &s, i4, h->dct.luma4x4[i4], 16 );
+ block_residual_write_cavlc( h, &s, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
if( i_pixel != PIXEL_4x4 )
{
i4 += 2-b_8x4;
h->mb.cache.non_zero_count[x264_scan8[i4]] = array_non_zero_count( h->dct.luma4x4[i4] );
- block_residual_write_cavlc( h, &s, i4, h->dct.luma4x4[i4], 16 );
+ block_residual_write_cavlc( h, &s, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
}
return s.i_bits_encoded;
static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
{
- int i4, i;
+ int i4;
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
+ h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4], h->dct.luma8x8[i8] );
for( i4 = 0; i4 < 4; i4++ )
{
- for( i = 0; i < 16; i++ )
- h->dct.luma4x4[i4+i8*4][i] = h->dct.luma8x8[i8][i4+i*4];
h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] = array_non_zero_count( h->dct.luma4x4[i4+i8*4] );
- block_residual_write_cavlc( h, &h->out.bs, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
+ block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4], 16 );
}
return h->out.bs.i_bits_encoded;
}
{
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
h->mb.cache.non_zero_count[x264_scan8[i4]] = array_non_zero_count( h->dct.luma4x4[i4] );
- block_residual_write_cavlc( h, &h->out.bs, i4, h->dct.luma4x4[i4], 16 );
+ block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4], 16 );
return h->out.bs.i_bits_encoded;
}
h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
if( h->mb.i_cbp_chroma )
{
- block_residual_write_cavlc( h, &h->out.bs, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[0], 4 );
- block_residual_write_cavlc( h, &h->out.bs, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 );
+ block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0], 4 );
+ block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1], 4 );
if( h->mb.i_cbp_chroma == 2 )
{
for( i = 16; i < 24; i++ )
{
h->mb.cache.non_zero_count[x264_scan8[i]] = array_non_zero_count( h->dct.luma4x4[i] );
- block_residual_write_cavlc( h, &h->out.bs, i, h->dct.luma4x4[i]+1, 15 );
+ block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1, 15 );
}
}
}
h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
h->mb.i_cbp_chroma |= nz;
}
+ h->mb.cache.non_zero_count[x264_scan8[25]] = array_non_zero( h->dct.chroma_dc[0] );
+ h->mb.cache.non_zero_count[x264_scan8[26]] = array_non_zero( h->dct.chroma_dc[1] );
if( h->mb.i_cbp_chroma )
h->mb.i_cbp_chroma = 2; /* dc+ac (we can't do only ac) */
- else if( array_non_zero( h->dct.chroma_dc ) )
+ else if( h->mb.cache.non_zero_count[x264_scan8[25]] |
+ h->mb.cache.non_zero_count[x264_scan8[26]] )
h->mb.i_cbp_chroma = 1; /* dc only */
}
h->mb.i_cbp_luma |= nz;
}
h->mb.i_cbp_luma *= 0xf;
+ h->mb.cache.non_zero_count[x264_scan8[24]] = array_non_zero( h->dct.luma16x16_dc );
}
else
{
h->mb.i_cbp_luma |= cbp << i;
}
}
+ h->mb.cache.non_zero_count[x264_scan8[24]] = 0;
}
if( h->param.b_cabac )
{
- i_cbp_dc = ( h->mb.i_type == I_16x16 && array_non_zero( h->dct.luma16x16_dc ) )
- | array_non_zero( h->dct.chroma_dc[0] ) << 1
- | array_non_zero( h->dct.chroma_dc[1] ) << 2;
+ i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
+ | h->mb.cache.non_zero_count[x264_scan8[25]] << 1
+ | h->mb.cache.non_zero_count[x264_scan8[26]] << 2;
}
/* store cbp */
}
report( "denoise dct :" );
-#define TEST_DECIMATE( qname, decname, block, w, ac, thresh ) \
+#define TEST_DECIMATE( decname, w, ac, thresh ) \
if( qf_a.decname != qf_ref.decname ) \
{ \
set_func_name( #decname ); \
}
ok = 1;
- TEST_DECIMATE( quant_8x8, decimate_score64, CQM_8IY, 8, 0, 6 );
- TEST_DECIMATE( quant_4x4, decimate_score16, CQM_4IY, 4, 0, 6 );
- TEST_DECIMATE( quant_4x4, decimate_score15, CQM_4IY, 4, 1, 7 );
+ TEST_DECIMATE( decimate_score64, 8, 0, 6 );
+ TEST_DECIMATE( decimate_score16, 4, 0, 6 );
+ TEST_DECIMATE( decimate_score15, 4, 1, 7 );
report( "decimate_score :" );
+#define TEST_LAST( last, lastname, w, ac ) \
+ if( qf_a.last != qf_ref.last ) \
+ { \
+ set_func_name( #lastname ); \
+ used_asm = 1; \
+ for( i = 0; i < 100; i++ ) \
+ { \
+ int result_c, result_a, idx, nnz=0; \
+ int max = rand() & (w*w-1); \
+ memset( dct1, 0, w*w*2 ); \
+ for( idx = ac; idx < max; idx++ ) \
+ nnz |= dct1[idx] = !(rand()&3) + (!(rand()&15))*rand(); \
+ if( !nnz ) \
+ dct1[ac] = 1; \
+ memcpy( dct2, dct1, w*w*2 ); \
+ result_c = call_c1( qf_c.last, (void*)(dct2+ac) ); \
+ result_a = call_a1( qf_a.last, (void*)(dct2+ac) ); \
+ if( result_c != result_a ) \
+ { \
+ ok = 0; \
+ fprintf( stderr, #lastname ": [FAILED]\n" ); \
+ break; \
+ } \
+ call_c2( qf_c.last, (void*)(dct2+ac) ); \
+ call_a2( qf_a.last, (void*)(dct2+ac) ); \
+ } \
+ }
+
+ ok = 1;
+ TEST_LAST( coeff_last[DCT_CHROMA_DC], coeff_last4, 2, 0 );
+ TEST_LAST( coeff_last[ DCT_LUMA_AC], coeff_last15, 4, 1 );
+ TEST_LAST( coeff_last[ DCT_LUMA_4x4], coeff_last16, 4, 0 );
+ TEST_LAST( coeff_last[ DCT_LUMA_8x8], coeff_last64, 8, 0 );
+ report( "coeff_last :" );
+
return ret;
}