git.sesse.net Git - x264/blob - common/set.c

   1 /*****************************************************************************
   2  * set.c: quantization init
   3  *****************************************************************************
   4  * Copyright (C) 2005-2011 x264 project
   5  *
   6  * Authors: Loren Merritt <lorenm@u.washington.edu>
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  21  *
  22  * This program is also available under a commercial proprietary license.
  23  * For more information, contact us at licensing@x264.com.
  24  *****************************************************************************/
  25
  26 #define _ISOC99_SOURCE
  27 #include "common.h"
  28
  29 #define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
  30 #define DIV(n,d) (((n) + ((d)>>1)) / (d))
  31
  32 static const uint8_t dequant4_scale[6][3] =
  33 {
  34     { 10, 13, 16 },
  35     { 11, 14, 18 },
  36     { 13, 16, 20 },
  37     { 14, 18, 23 },
  38     { 16, 20, 25 },
  39     { 18, 23, 29 }
  40 };
  41 static const uint16_t quant4_scale[6][3] =
  42 {
  43     { 13107, 8066, 5243 },
  44     { 11916, 7490, 4660 },
  45     { 10082, 6554, 4194 },
  46     {  9362, 5825, 3647 },
  47     {  8192, 5243, 3355 },
  48     {  7282, 4559, 2893 },
  49 };
  50
  51 static const uint8_t quant8_scan[16] =
  52 {
  53     0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1
  54 };
  55 static const uint8_t dequant8_scale[6][6] =
  56 {
  57     { 20, 18, 32, 19, 25, 24 },
  58     { 22, 19, 35, 21, 28, 26 },
  59     { 26, 23, 42, 24, 33, 31 },
  60     { 28, 25, 45, 26, 35, 33 },
  61     { 32, 28, 51, 30, 40, 38 },
  62     { 36, 32, 58, 34, 46, 43 },
  63 };
  64 static const uint16_t quant8_scale[6][6] =
  65 {
  66     { 13107, 11428, 20972, 12222, 16777, 15481 },
  67     { 11916, 10826, 19174, 11058, 14980, 14290 },
  68     { 10082,  8943, 15978,  9675, 12710, 11985 },
  69     {  9362,  8228, 14913,  8931, 11984, 11259 },
  70     {  8192,  7346, 13159,  7740, 10486,  9777 },
  71     {  7282,  6428, 11570,  6830,  9118,  8640 }
  72 };
  73
  74 int x264_cqm_init( x264_t *h )
  75 {
  76     int def_quant4[6][16];
  77     int def_quant8[6][64];
  78     int def_dequant4[6][16];
  79     int def_dequant8[6][64];
  80     int quant4_mf[4][6][16];
  81     int quant8_mf[2][6][64];
  82     int deadzone[4] = { 32 - h->param.analyse.i_luma_deadzone[1],
  83                         32 - h->param.analyse.i_luma_deadzone[0],
  84                         32 - 11, 32 - 21 };
  85     int max_qp_err = -1;
  86     int max_chroma_qp_err = -1;
  87     int min_qp_err = QP_MAX+1;
  88
  89     for( int i = 0; i < 6; i++ )
  90     {
  91         int size = i<4 ? 16 : 64;
  92         int j;
  93         for( j = (i<4 ? 0 : 4); j < i; j++ )
  94             if( !memcmp( h->pps->scaling_list[i], h->pps->scaling_list[j], size*sizeof(uint8_t) ) )
  95                 break;
  96         if( j < i )
  97         {
  98             h->  quant4_mf[i] = h->  quant4_mf[j];
  99             h->dequant4_mf[i] = h->dequant4_mf[j];
 100             h->unquant4_mf[i] = h->unquant4_mf[j];
 101         }
 102         else
 103         {
 104             CHECKED_MALLOC( h->  quant4_mf[i], (QP_MAX+1)*size*sizeof(udctcoef) );
 105             CHECKED_MALLOC( h->dequant4_mf[i],  6*size*sizeof(int) );
 106             CHECKED_MALLOC( h->unquant4_mf[i], (QP_MAX+1)*size*sizeof(int) );
 107         }
 108
 109         for( j = (i<4 ? 0 : 4); j < i; j++ )
 110             if( deadzone[j&3] == deadzone[i&3] &&
 111                 !memcmp( h->pps->scaling_list[i], h->pps->scaling_list[j], size*sizeof(uint8_t) ) )
 112                 break;
 113         if( j < i )
 114             h->quant4_bias[i] = h->quant4_bias[j];
 115         else
 116             CHECKED_MALLOC( h->quant4_bias[i], (QP_MAX+1)*size*sizeof(udctcoef) );
 117     }
 118
 119     for( int q = 0; q < 6; q++ )
 120     {
 121         for( int i = 0; i < 16; i++ )
 122         {
 123             int j = (i&1) + ((i>>2)&1);
 124             def_dequant4[q][i] = dequant4_scale[q][j];
 125             def_quant4[q][i]   =   quant4_scale[q][j];
 126         }
 127         for( int i = 0; i < 64; i++ )
 128         {
 129             int j = quant8_scan[((i>>1)&12) | (i&3)];
 130             def_dequant8[q][i] = dequant8_scale[q][j];
 131             def_quant8[q][i]   =   quant8_scale[q][j];
 132         }
 133     }
 134
 135     for( int q = 0; q < 6; q++ )
 136     {
 137         for( int i_list = 0; i_list < 4; i_list++ )
 138             for( int i = 0; i < 16; i++ )
 139             {
 140                 h->dequant4_mf[i_list][q][i] = def_dequant4[q][i] * h->pps->scaling_list[i_list][i];
 141                      quant4_mf[i_list][q][i] = DIV(def_quant4[q][i] * 16, h->pps->scaling_list[i_list][i]);
 142             }
 143         for( int i_list = 0; i_list < 2; i_list++ )
 144             for( int i = 0; i < 64; i++ )
 145             {
 146                 h->dequant8_mf[i_list][q][i] = def_dequant8[q][i] * h->pps->scaling_list[4+i_list][i];
 147                      quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
 148             }
 149     }
 150     for( int q = 0; q < QP_MAX+1; q++ )
 151     {
 152         int j;
 153         for( int i_list = 0; i_list < 4; i_list++ )
 154             for( int i = 0; i < 16; i++ )
 155             {
 156                 h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
 157                 h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
 158                 if( !j )
 159                 {
 160                     min_qp_err = X264_MIN( min_qp_err, q );
 161                     continue;
 162                 }
 163                 // round to nearest, unless that would cause the deadzone to be negative
 164                 h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
 165                 if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) )
 166                     max_qp_err = q;
 167                 if( j > 0xffff && q > max_chroma_qp_err && (i_list == CQM_4IC || i_list == CQM_4PC) )
 168                     max_chroma_qp_err = q;
 169             }
 170         if( h->param.analyse.b_transform_8x8 )
 171             for( int i_list = 0; i_list < 2; i_list++ )
 172                 for( int i = 0; i < 64; i++ )
 173                 {
 174                     h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
 175                     j = SHIFT(quant8_mf[i_list][q%6][i], q/6);
 176                     h->quant8_mf[i_list][q][i] = (uint16_t)j;
 177
 178                     if( !j )
 179                     {
 180                         min_qp_err = X264_MIN( min_qp_err, q );
 181                         continue;
 182                     }
 183                     h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
 184                     if( j > 0xffff && q > max_qp_err )
 185                         max_qp_err = q;
 186                 }
 187     }
 188
 189     /* Emergency mode denoising. */
 190     x264_emms();
 191     CHECKED_MALLOC( h->nr_offset_emergency, sizeof(*h->nr_offset_emergency)*(QP_MAX-QP_MAX_SPEC) );
 192     for( int q = 0; q < QP_MAX - QP_MAX_SPEC; q++ )
 193         for( int cat = 0; cat <= 2; cat++ )
 194         {
 195             int dct8x8 = cat == 1;
 196             int size = dct8x8 ? 64 : 16;
 197             udctcoef *nr_offset = h->nr_offset_emergency[q][cat];
 198             /* Denoise chroma first (due to h264's chroma QP offset, then luma, then DC. */
 199             int dc_threshold =    (QP_MAX-QP_MAX_SPEC)*2/3;
 200             int luma_threshold =  (QP_MAX-QP_MAX_SPEC)*2/3;
 201             int chroma_threshold = 0;
 202
 203             for( int i = 0; i < size; i++ )
 204             {
 205                 int max = (1 << (7 + BIT_DEPTH)) - 1;
 206                 /* True "emergency mode": remove all DCT coefficients */
 207                 if( q == QP_MAX - QP_MAX_SPEC - 1 )
 208                 {
 209                     nr_offset[i] = max;
 210                     continue;
 211                 }
 212
 213                 int thresh = i == 0 ? dc_threshold : cat == 2 ? chroma_threshold : luma_threshold;
 214                 if( q < thresh )
 215                 {
 216                     nr_offset[i] = 0;
 217                     continue;
 218                 }
 219                 double pos = (double)(q-thresh+1) / (QP_MAX - QP_MAX_SPEC - thresh);
 220
 221                 /* XXX: this math is largely tuned for /dev/random input. */
 222                 double start = dct8x8 ? h->unquant8_mf[CQM_8PY][QP_MAX_SPEC][i]
 223                                       : h->unquant4_mf[CQM_4PY][QP_MAX_SPEC][i];
 224                 /* Formula chosen as an exponential scale to vaguely mimic the effects
 225                  * of a higher quantizer. */
 226                 double bias = (pow( 2, pos*(QP_MAX - QP_MAX_SPEC)/10. )*0.003-0.003) * start;
 227                 nr_offset[i] = X264_MIN( bias + 0.5, max );
 228             }
 229         }
 230
 231     if( !h->mb.b_lossless )
 232     {
 233         while( h->chroma_qp_table[h->param.rc.i_qp_min] <= max_chroma_qp_err )
 234             h->param.rc.i_qp_min++;
 235         if( min_qp_err <= h->param.rc.i_qp_max )
 236             h->param.rc.i_qp_max = min_qp_err-1;
 237         if( max_qp_err >= h->param.rc.i_qp_min )
 238             h->param.rc.i_qp_min = max_qp_err+1;
 239         if( h->param.rc.i_qp_min > h->param.rc.i_qp_max )
 240         {
 241             x264_log( h, X264_LOG_ERROR, "Impossible QP constraints for CQM (min=%d, max=%d)\n", h->param.rc.i_qp_min, h->param.rc.i_qp_max );
 242             return -1;
 243         }
 244     }
 245     return 0;
 246 fail:
 247     x264_cqm_delete( h );
 248     return -1;
 249 }
 250
 251 #define CQM_DELETE( n, max )\
 252     for( int i = 0; i < max; i++ )\
 253     {\
 254         int j;\
 255         for( j = 0; j < i; j++ )\
 256             if( h->quant##n##_mf[i] == h->quant##n##_mf[j] )\
 257                 break;\
 258         if( j == i )\
 259         {\
 260             x264_free( h->  quant##n##_mf[i] );\
 261             x264_free( h->dequant##n##_mf[i] );\
 262             x264_free( h->unquant##n##_mf[i] );\
 263         }\
 264         for( j = 0; j < i; j++ )\
 265             if( h->quant##n##_bias[i] == h->quant##n##_bias[j] )\
 266                 break;\
 267         if( j == i )\
 268             x264_free( h->quant##n##_bias[i] );\
 269     }
 270
 271 void x264_cqm_delete( x264_t *h )
 272 {
 273     CQM_DELETE( 4, 4 );
 274     CQM_DELETE( 8, 2 );
 275     x264_free( h->nr_offset_emergency );
 276 }
 277
 278 static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name,
 279                            uint8_t *cqm, const uint8_t *jvt, int length )
 280 {
 281     int i;
 282
 283     char *p = strstr( buf, name );
 284     if( !p )
 285     {
 286         memset( cqm, 16, length );
 287         return 0;
 288     }
 289
 290     p += strlen( name );
 291     if( *p == 'U' || *p == 'V' )
 292         p++;
 293
 294     char *nextvar = strstr( p, "INT" );
 295
 296     for( i = 0; i < length && (p = strpbrk( p, " \t\n," )) && (p = strpbrk( p, "0123456789" )); i++ )
 297     {
 298         int coef = -1;
 299         sscanf( p, "%d", &coef );
 300         if( i == 0 && coef == 0 )
 301         {
 302             memcpy( cqm, jvt, length );
 303             return 0;
 304         }
 305         if( coef < 1 || coef > 255 )
 306         {
 307             x264_log( h, X264_LOG_ERROR, "bad coefficient in list '%s'\n", name );
 308             return -1;
 309         }
 310         cqm[i] = coef;
 311     }
 312
 313     if( (nextvar && p > nextvar) || i != length )
 314     {
 315         x264_log( h, X264_LOG_ERROR, "not enough coefficients in list '%s'\n", name );
 316         return -1;
 317     }
 318
 319     return 0;
 320 }
 321
 322 int x264_cqm_parse_file( x264_t *h, const char *filename )
 323 {
 324     char *p;
 325     int b_error = 0;
 326
 327     h->param.i_cqm_preset = X264_CQM_CUSTOM;
 328
 329     char *buf = x264_slurp_file( filename );
 330     if( !buf )
 331     {
 332         x264_log( h, X264_LOG_ERROR, "can't open file '%s'\n", filename );
 333         return -1;
 334     }
 335
 336     while( (p = strchr( buf, '#' )) != NULL )
 337         memset( p, ' ', strcspn( p, "\n" ) );
 338
 339     b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_LUMA",   h->param.cqm_4iy, x264_cqm_jvt4i, 16 );
 340     b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA4X4_CHROMA", h->param.cqm_4ic, x264_cqm_jvt4i, 16 );
 341     b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_LUMA",   h->param.cqm_4py, x264_cqm_jvt4p, 16 );
 342     b_error |= x264_cqm_parse_jmlist( h, buf, "INTER4X4_CHROMA", h->param.cqm_4pc, x264_cqm_jvt4p, 16 );
 343     b_error |= x264_cqm_parse_jmlist( h, buf, "INTRA8X8_LUMA",   h->param.cqm_8iy, x264_cqm_jvt8i, 64 );
 344     b_error |= x264_cqm_parse_jmlist( h, buf, "INTER8X8_LUMA",   h->param.cqm_8py, x264_cqm_jvt8p, 64 );
 345
 346     x264_free( buf );
 347     return b_error;
 348 }
 349