X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=common%2Fset.c;h=810bf0e0908c7230d6b1acfeb971139a142cf7a7;hb=7688814a7ec994f8e5984d199b465ccc068b98af;hp=77713d4188735ace40b18b4f9958660ffb4d1d1e;hpb=08d39756a08e00ae39196b125e2cecdb08136e17;p=x264 diff --git a/common/set.c b/common/set.c index 77713d41..810bf0e0 100644 --- a/common/set.c +++ b/common/set.c @@ -1,7 +1,9 @@ /***************************************************************************** - * set.c: h264 encoder library + * set.c: quantization init ***************************************************************************** - * Copyright (C) 2005-2008 Loren Merritt + * Copyright (C) 2005-2015 x264 project + * + * Authors: Loren Merritt * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,14 +18,18 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. *****************************************************************************/ +#define _ISOC99_SOURCE #include "common.h" -#define SHIFT(x,s) ((s)<0 ? (x)<<-(s) : (s)==0 ? (x) : ((x)+(1<<((s)-1)))>>(s)) +#define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s)) #define DIV(n,d) (((n) + ((d)>>1)) / (d)) -static const int dequant4_scale[6][3] = +static const uint8_t dequant4_scale[6][3] = { { 10, 13, 16 }, { 11, 14, 18 }, @@ -32,7 +38,7 @@ static const int dequant4_scale[6][3] = { 16, 20, 25 }, { 18, 23, 29 } }; -static const int quant4_scale[6][3] = +static const uint16_t quant4_scale[6][3] = { { 13107, 8066, 5243 }, { 11916, 7490, 4660 }, @@ -42,11 +48,11 @@ static const int quant4_scale[6][3] = { 7282, 4559, 2893 }, }; -static const int quant8_scan[16] = +static const uint8_t quant8_scan[16] = { 0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1 }; -static const int dequant8_scale[6][6] = +static const uint8_t dequant8_scale[6][6] = { { 20, 18, 32, 19, 25, 24 }, { 22, 19, 35, 21, 28, 26 }, @@ -55,7 +61,7 @@ static const int dequant8_scale[6][6] = { 32, 28, 51, 30, 40, 38 }, { 36, 32, 58, 34, 46, 43 }, }; -static const int quant8_scale[6][6] = +static const uint16_t quant8_scale[6][6] = { { 13107, 11428, 20972, 12222, 16777, 15481 }, { 11916, 10826, 19174, 11058, 14980, 14290 }, @@ -71,52 +77,66 @@ int x264_cqm_init( x264_t *h ) int def_quant8[6][64]; int def_dequant4[6][16]; int def_dequant8[6][64]; - int quant4_mf[4][6][4][4]; - int quant8_mf[2][6][8][8]; - int q, i, j, i_list; + int quant4_mf[4][6][16]; + int quant8_mf[4][6][64]; int deadzone[4] = { 32 - h->param.analyse.i_luma_deadzone[1], 32 - h->param.analyse.i_luma_deadzone[0], 32 - 11, 32 - 21 }; int max_qp_err = -1; + int max_chroma_qp_err = -1; + int min_qp_err = QP_MAX+1; + int num_8x8_lists = h->sps->i_chroma_format_idc == CHROMA_444 ? 4 + : h->param.analyse.b_transform_8x8 ? 2 : 0; /* Checkasm may segfault if optimized out by --chroma-format */ - for( i = 0; i < 6; i++ ) - { - int size = i<4 ? 16 : 64; - for( j = (i<4 ? 0 : 4); j < i; j++ ) - if( !memcmp( h->pps->scaling_list[i], h->pps->scaling_list[j], size*sizeof(uint8_t) ) ) - break; - if( j < i ) - { - h-> quant4_mf[i] = h-> quant4_mf[j]; - h->dequant4_mf[i] = h->dequant4_mf[j]; - h->unquant4_mf[i] = h->unquant4_mf[j]; - } - else - { - h-> quant4_mf[i] = x264_malloc(52*size*sizeof(uint16_t) ); - h->dequant4_mf[i] = x264_malloc( 6*size*sizeof(int) ); - h->unquant4_mf[i] = x264_malloc(52*size*sizeof(int) ); - } - - for( j = (i<4 ? 0 : 4); j < i; j++ ) - if( deadzone[j&3] == deadzone[i&3] && - !memcmp( h->pps->scaling_list[i], h->pps->scaling_list[j], size*sizeof(uint8_t) ) ) - break; - if( j < i ) - h->quant4_bias[i] = h->quant4_bias[j]; - else - h->quant4_bias[i] = x264_malloc(52*size*sizeof(uint16_t) ); +#define CQM_ALLOC( w, count )\ + for( int i = 0; i < count; i++ )\ + {\ + int size = w*w;\ + int start = w == 8 ? 4 : 0;\ + int j;\ + for( j = 0; j < i; j++ )\ + if( !memcmp( h->pps->scaling_list[i+start], h->pps->scaling_list[j+start], size*sizeof(uint8_t) ) )\ + break;\ + if( j < i )\ + {\ + h-> quant##w##_mf[i] = h-> quant##w##_mf[j];\ + h->dequant##w##_mf[i] = h->dequant##w##_mf[j];\ + h->unquant##w##_mf[i] = h->unquant##w##_mf[j];\ + }\ + else\ + {\ + CHECKED_MALLOC( h-> quant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\ + CHECKED_MALLOC( h->dequant##w##_mf[i], 6*size*sizeof(int) );\ + CHECKED_MALLOC( h->unquant##w##_mf[i], (QP_MAX_SPEC+1)*size*sizeof(int) );\ + }\ + for( j = 0; j < i; j++ )\ + if( deadzone[j] == deadzone[i] &&\ + !memcmp( h->pps->scaling_list[i+start], h->pps->scaling_list[j+start], size*sizeof(uint8_t) ) )\ + break;\ + if( j < i )\ + {\ + h->quant##w##_bias[i] = h->quant##w##_bias[j];\ + h->quant##w##_bias0[i] = h->quant##w##_bias0[j];\ + }\ + else\ + {\ + CHECKED_MALLOC( h->quant##w##_bias[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\ + CHECKED_MALLOC( h->quant##w##_bias0[i], (QP_MAX_SPEC+1)*size*sizeof(udctcoef) );\ + }\ } - for( q = 0; q < 6; q++ ) + CQM_ALLOC( 4, 4 ) + CQM_ALLOC( 8, num_8x8_lists ) + + for( int q = 0; q < 6; q++ ) { - for( i = 0; i < 16; i++ ) + for( int i = 0; i < 16; i++ ) { int j = (i&1) + ((i>>2)&1); def_dequant4[q][i] = dequant4_scale[q][j]; def_quant4[q][i] = quant4_scale[q][j]; } - for( i = 0; i < 64; i++ ) + for( int i = 0; i < 64; i++ ) { int j = quant8_scan[((i>>1)&12) | (i&3)]; def_dequant8[q][i] = dequant8_scale[q][j]; @@ -124,85 +144,169 @@ int x264_cqm_init( x264_t *h ) } } - for( q = 0; q < 6; q++ ) + for( int q = 0; q < 6; q++ ) { - for( i_list = 0; i_list < 4; i_list++ ) - for( i = 0; i < 16; i++ ) + for( int i_list = 0; i_list < 4; i_list++ ) + for( int i = 0; i < 16; i++ ) { - h->dequant4_mf[i_list][q][0][i] = def_dequant4[q][i] * h->pps->scaling_list[i_list][i]; - quant4_mf[i_list][q][0][i] = DIV(def_quant4[q][i] * 16, h->pps->scaling_list[i_list][i]); + h->dequant4_mf[i_list][q][i] = def_dequant4[q][i] * h->pps->scaling_list[i_list][i]; + quant4_mf[i_list][q][i] = DIV(def_quant4[q][i] * 16, h->pps->scaling_list[i_list][i]); } - for( i_list = 0; i_list < 2; i_list++ ) - for( i = 0; i < 64; i++ ) + for( int i_list = 0; i_list < num_8x8_lists; i_list++ ) + for( int i = 0; i < 64; i++ ) { - h->dequant8_mf[i_list][q][0][i] = def_dequant8[q][i] * h->pps->scaling_list[4+i_list][i]; - quant8_mf[i_list][q][0][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]); + h->dequant8_mf[i_list][q][i] = def_dequant8[q][i] * h->pps->scaling_list[4+i_list][i]; + quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]); } } - for( q = 0; q < 52; q++ ) + for( int q = 0; q <= QP_MAX_SPEC; q++ ) { - for( i_list = 0; i_list < 4; i_list++ ) - for( i = 0; i < 16; i++ ) + int j; + for( int i_list = 0; i_list < 4; i_list++ ) + for( int i = 0; i < 16; i++ ) { - h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][0][i]; - h-> quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][0][i], q/6 - 1); + h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i]; + h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1); + if( !j ) + { + min_qp_err = X264_MIN( min_qp_err, q ); + continue; + } // round to nearest, unless that would cause the deadzone to be negative h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j ); - if( j > 0xffff && q > max_qp_err ) + h->quant4_bias0[i_list][q][i] = (1<<15)/j; + if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) ) max_qp_err = q; + if( j > 0xffff && q > max_chroma_qp_err && (i_list == CQM_4IC || i_list == CQM_4PC) ) + max_chroma_qp_err = q; } if( h->param.analyse.b_transform_8x8 ) - for( i_list = 0; i_list < 2; i_list++ ) - for( i = 0; i < 64; i++ ) + for( int i_list = 0; i_list < num_8x8_lists; i_list++ ) + for( int i = 0; i < 64; i++ ) + { + h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i]; + j = SHIFT(quant8_mf[i_list][q%6][i], q/6); + h->quant8_mf[i_list][q][i] = (uint16_t)j; + + if( !j ) + { + min_qp_err = X264_MIN( min_qp_err, q ); + continue; + } + h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j ); + h->quant8_bias0[i_list][q][i] = (1<<15)/j; + if( j > 0xffff && q > max_qp_err && (i_list == CQM_8IY || i_list == CQM_8PY) ) + max_qp_err = q; + if( j > 0xffff && q > max_chroma_qp_err && (i_list == CQM_8IC || i_list == CQM_8PC) ) + max_chroma_qp_err = q; + } + } + + /* Emergency mode denoising. */ + x264_emms(); + CHECKED_MALLOC( h->nr_offset_emergency, sizeof(*h->nr_offset_emergency)*(QP_MAX-QP_MAX_SPEC) ); + for( int q = 0; q < QP_MAX - QP_MAX_SPEC; q++ ) + for( int cat = 0; cat < 3 + CHROMA444; cat++ ) + { + int dct8x8 = cat&1; + if( !h->param.analyse.b_transform_8x8 && dct8x8 ) + continue; + + int size = dct8x8 ? 64 : 16; + udctcoef *nr_offset = h->nr_offset_emergency[q][cat]; + /* Denoise chroma first (due to h264's chroma QP offset), then luma, then DC. */ + int dc_threshold = (QP_MAX-QP_MAX_SPEC)*2/3; + int luma_threshold = (QP_MAX-QP_MAX_SPEC)*2/3; + int chroma_threshold = 0; + + for( int i = 0; i < size; i++ ) { - h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][0][i]; - h-> quant8_mf[i_list][q][i] = j = SHIFT(quant8_mf[i_list][q%6][0][i], q/6); - h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j ); - if( j > 0xffff && q > max_qp_err ) - max_qp_err = q; + int max = (1 << (7 + BIT_DEPTH)) - 1; + /* True "emergency mode": remove all DCT coefficients */ + if( q == QP_MAX - QP_MAX_SPEC - 1 ) + { + nr_offset[i] = max; + continue; + } + + int thresh = i == 0 ? dc_threshold : cat >= 2 ? chroma_threshold : luma_threshold; + if( q < thresh ) + { + nr_offset[i] = 0; + continue; + } + double pos = (double)(q-thresh+1) / (QP_MAX - QP_MAX_SPEC - thresh); + + /* XXX: this math is largely tuned for /dev/random input. */ + double start = dct8x8 ? h->unquant8_mf[CQM_8PY][QP_MAX_SPEC][i] + : h->unquant4_mf[CQM_4PY][QP_MAX_SPEC][i]; + /* Formula chosen as an exponential scale to vaguely mimic the effects + * of a higher quantizer. */ + double bias = (pow( 2, pos*(QP_MAX - QP_MAX_SPEC)/10. )*0.003-0.003) * start; + nr_offset[i] = X264_MIN( bias + 0.5, max ); } - } + } - if( !h->mb.b_lossless && max_qp_err >= h->param.rc.i_qp_min ) + if( !h->mb.b_lossless ) { - x264_log( h, X264_LOG_ERROR, "Quantization overflow.\n" ); - x264_log( h, X264_LOG_ERROR, "Your CQM is incompatible with QP < %d, but min QP is set to %d\n", - max_qp_err+1, h->param.rc.i_qp_min ); - return -1; + while( h->chroma_qp_table[SPEC_QP(h->param.rc.i_qp_min)] <= max_chroma_qp_err ) + h->param.rc.i_qp_min++; + if( min_qp_err <= h->param.rc.i_qp_max ) + h->param.rc.i_qp_max = min_qp_err-1; + if( max_qp_err >= h->param.rc.i_qp_min ) + h->param.rc.i_qp_min = max_qp_err+1; + /* If long level-codes aren't allowed, we need to allow QP high enough to avoid them. */ + if( !h->param.b_cabac && h->sps->i_profile_idc < PROFILE_HIGH ) + while( h->chroma_qp_table[SPEC_QP(h->param.rc.i_qp_max)] <= 12 || h->param.rc.i_qp_max <= 12 ) + h->param.rc.i_qp_max++; + if( h->param.rc.i_qp_min > h->param.rc.i_qp_max ) + { + x264_log( h, X264_LOG_ERROR, "Impossible QP constraints for CQM (min=%d, max=%d)\n", h->param.rc.i_qp_min, h->param.rc.i_qp_max ); + return -1; + } } return 0; +fail: + x264_cqm_delete( h ); + return -1; } +#define CQM_DELETE( n, max )\ + for( int i = 0; i < (max); i++ )\ + {\ + int j;\ + for( j = 0; j < i; j++ )\ + if( h->quant##n##_mf[i] == h->quant##n##_mf[j] )\ + break;\ + if( j == i )\ + {\ + x264_free( h-> quant##n##_mf[i] );\ + x264_free( h->dequant##n##_mf[i] );\ + x264_free( h->unquant##n##_mf[i] );\ + }\ + for( j = 0; j < i; j++ )\ + if( h->quant##n##_bias[i] == h->quant##n##_bias[j] )\ + break;\ + if( j == i )\ + {\ + x264_free( h->quant##n##_bias[i] );\ + x264_free( h->quant##n##_bias0[i] );\ + }\ + } + void x264_cqm_delete( x264_t *h ) { - int i, j; - for( i = 0; i < 6; i++ ) - { - for( j = 0; j < i; j++ ) - if( h->quant4_mf[i] == h->quant4_mf[j] ) - break; - if( j == i ) - { - x264_free( h-> quant4_mf[i] ); - x264_free( h->dequant4_mf[i] ); - x264_free( h->unquant4_mf[i] ); - } - for( j = 0; j < i; j++ ) - if( h->quant4_bias[i] == h->quant4_bias[j] ) - break; - if( j == i ) - x264_free( h->quant4_bias[i] ); - } + CQM_DELETE( 4, 4 ); + CQM_DELETE( 8, CHROMA444 ? 4 : 2 ); + x264_free( h->nr_offset_emergency ); } -int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name, - uint8_t *cqm, const uint8_t *jvt, int length ) +static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name, + uint8_t *cqm, const uint8_t *jvt, int length ) { - char *p; - char *nextvar; int i; - p = strstr( buf, name ); + char *p = strstr( buf, name ); if( !p ) { memset( cqm, 16, length ); @@ -213,7 +317,7 @@ int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name, if( *p == 'U' || *p == 'V' ) p++; - nextvar = strstr( p, "INT" ); + char *nextvar = strstr( p, "INT" ); for( i = 0; i < length && (p = strpbrk( p, " \t\n," )) && (p = strpbrk( p, "0123456789" )); i++ ) { @@ -243,12 +347,12 @@ int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name, int x264_cqm_parse_file( x264_t *h, const char *filename ) { - char *buf, *p; + char *p; int b_error = 0; h->param.i_cqm_preset = X264_CQM_CUSTOM; - buf = x264_slurp_file( filename ); + char *buf = x264_slurp_file( filename ); if( !buf ) { x264_log( h, X264_LOG_ERROR, "can't open file '%s'\n", filename ); @@ -259,11 +363,16 @@ int x264_cqm_parse_file( x264_t *h, const char *filename ) memset( p, ' ', strcspn( p, "\n" ) ); b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_LUMA", h->param.cqm_4iy, x264_cqm_jvt4i, 16 ); - b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 ); b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_LUMA", h->param.cqm_4py, x264_cqm_jvt4p, 16 ); + b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 ); b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_CHROMA", h->param.cqm_4pc, x264_cqm_jvt4p, 16 ); b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_LUMA", h->param.cqm_8iy, x264_cqm_jvt8i, 64 ); b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_LUMA", h->param.cqm_8py, x264_cqm_jvt8p, 64 ); + if( CHROMA444 ) + { + b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_CHROMA", h->param.cqm_8ic, x264_cqm_jvt8i, 64 ); + b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_CHROMA", h->param.cqm_8pc, x264_cqm_jvt8p, 64 ); + } x264_free( buf ); return b_error;