git.sesse.net Git - x264/blob - common/quant.c

   1 /*****************************************************************************
   2  * quant.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2005 x264 project
   5  *
   6  * Authors: Christian Heine <sennindemokrit@gmx.net>
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  21  *****************************************************************************/
  22
  23 #include "common.h"
  24
  25 #ifdef HAVE_MMXEXT
  26 #include "i386/quant.h"
  27 #endif
  28
  29 #define QUANT_ONE( coef, mf ) \
  30 { \
  31     if( (coef) > 0 ) \
  32         (coef) = ( f + (coef) * (mf) ) >> i_qbits; \
  33     else \
  34         (coef) = - ( ( f - (coef) * (mf) ) >> i_qbits ); \
  35 }
  36
  37 static void quant_8x8_core( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f )
  38 {
  39     int i;
  40     for( i = 0; i < 64; i++ )
  41         QUANT_ONE( dct[0][i], quant_mf[0][i] );
  42 }
  43
  44 static void quant_4x4_core( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f )
  45 {
  46     int i;
  47     for( i = 0; i < 16; i++ )
  48         QUANT_ONE( dct[0][i], quant_mf[0][i] );
  49 }
  50
  51 static void quant_4x4_dc_core( int16_t dct[4][4], int i_quant_mf, int i_qbits, int f )
  52 {
  53     int i;
  54     for( i = 0; i < 16; i++ )
  55         QUANT_ONE( dct[0][i], i_quant_mf );
  56 }
  57
  58 static void quant_2x2_dc_core( int16_t dct[2][2], int i_quant_mf, int i_qbits, int f )
  59 {
  60     QUANT_ONE( dct[0][0], i_quant_mf );
  61     QUANT_ONE( dct[0][1], i_quant_mf );
  62     QUANT_ONE( dct[0][2], i_quant_mf );
  63     QUANT_ONE( dct[0][3], i_quant_mf );
  64 }
  65
  66 #define DEQUANT_SHL( x ) \
  67     dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][y][x] ) << i_qbits
  68
  69 #define DEQUANT_SHR( x ) \
  70     dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][y][x] + f ) >> (-i_qbits)
  71
  72 static void dequant_4x4( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
  73 {
  74     const int i_mf = i_qp%6;
  75     const int i_qbits = i_qp/6 - 4;
  76     int y;
  77
  78     if( i_qbits >= 0 )
  79     {
  80         for( y = 0; y < 4; y++ )
  81         {
  82             DEQUANT_SHL( 0 );
  83             DEQUANT_SHL( 1 );
  84             DEQUANT_SHL( 2 );
  85             DEQUANT_SHL( 3 );
  86         }
  87     }
  88     else
  89     {
  90         const int f = 1 << (-i_qbits-1);
  91         for( y = 0; y < 4; y++ )
  92         {
  93             DEQUANT_SHR( 0 );
  94             DEQUANT_SHR( 1 );
  95             DEQUANT_SHR( 2 );
  96             DEQUANT_SHR( 3 );
  97         }
  98     }
  99 }
 100
 101 static void dequant_8x8( int16_t dct[8][8], int dequant_mf[6][8][8], int i_qp )
 102 {
 103     const int i_mf = i_qp%6;
 104     const int i_qbits = i_qp/6 - 6;
 105     int y;
 106
 107     if( i_qbits >= 0 )
 108     {
 109         for( y = 0; y < 8; y++ )
 110         {
 111             DEQUANT_SHL( 0 );
 112             DEQUANT_SHL( 1 );
 113             DEQUANT_SHL( 2 );
 114             DEQUANT_SHL( 3 );
 115             DEQUANT_SHL( 4 );
 116             DEQUANT_SHL( 5 );
 117             DEQUANT_SHL( 6 );
 118             DEQUANT_SHL( 7 );
 119         }
 120     }
 121     else
 122     {
 123         const int f = 1 << (-i_qbits-1);
 124         for( y = 0; y < 8; y++ )
 125         {
 126             DEQUANT_SHR( 0 );
 127             DEQUANT_SHR( 1 );
 128             DEQUANT_SHR( 2 );
 129             DEQUANT_SHR( 3 );
 130             DEQUANT_SHR( 4 );
 131             DEQUANT_SHR( 5 );
 132             DEQUANT_SHR( 6 );
 133             DEQUANT_SHR( 7 );
 134         }
 135     }
 136 }
 137
 138 void x264_mb_dequant_2x2_dc( int16_t dct[2][2], int dequant_mf[6][4][4], int i_qp )
 139 {
 140     const int i_qbits = i_qp/6 - 5;
 141
 142     if( i_qbits >= 0 )
 143     {
 144         const int i_dmf = dequant_mf[i_qp%6][0][0] << i_qbits;
 145         dct[0][0] *= i_dmf;
 146         dct[0][1] *= i_dmf;
 147         dct[1][0] *= i_dmf;
 148         dct[1][1] *= i_dmf;
 149     }
 150     else
 151     {
 152         const int i_dmf = dequant_mf[i_qp%6][0][0];
 153         // chroma DC is truncated, not rounded
 154         dct[0][0] = ( dct[0][0] * i_dmf ) >> (-i_qbits);
 155         dct[0][1] = ( dct[0][1] * i_dmf ) >> (-i_qbits);
 156         dct[1][0] = ( dct[1][0] * i_dmf ) >> (-i_qbits);
 157         dct[1][1] = ( dct[1][1] * i_dmf ) >> (-i_qbits);
 158     }
 159 }
 160
 161 void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
 162 {
 163     const int i_qbits = i_qp/6 - 6;
 164     int y;
 165
 166     if( i_qbits >= 0 )
 167     {
 168         const int i_dmf = dequant_mf[i_qp%6][0][0] << i_qbits;
 169
 170         for( y = 0; y < 4; y++ )
 171         {
 172             dct[y][0] *= i_dmf;
 173             dct[y][1] *= i_dmf;
 174             dct[y][2] *= i_dmf;
 175             dct[y][3] *= i_dmf;
 176         }
 177     }
 178     else
 179     {
 180         const int i_dmf = dequant_mf[i_qp%6][0][0];
 181         const int f = 1 << (-i_qbits-1);
 182
 183         for( y = 0; y < 4; y++ )
 184         {
 185             dct[y][0] = ( dct[y][0] * i_dmf + f ) >> (-i_qbits);
 186             dct[y][1] = ( dct[y][1] * i_dmf + f ) >> (-i_qbits);
 187             dct[y][2] = ( dct[y][2] * i_dmf + f ) >> (-i_qbits);
 188             dct[y][3] = ( dct[y][3] * i_dmf + f ) >> (-i_qbits);
 189         }
 190     }
 191 }
 192
 193 void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
 194 {
 195     int i, j, maxQ8=0, maxQ4=0, maxQdc=0;
 196
 197     pf->quant_8x8_core = quant_8x8_core;
 198     pf->quant_4x4_core = quant_4x4_core;
 199     pf->quant_4x4_dc_core = quant_4x4_dc_core;
 200     pf->quant_2x2_dc_core = quant_2x2_dc_core;
 201
 202     pf->dequant_4x4 = dequant_4x4;
 203     pf->dequant_8x8 = dequant_8x8;
 204
 205 #ifdef HAVE_MMXEXT
 206
 207     /* determine the biggest coefficient in all quant8_mf tables */
 208     for( j = 0; j < 2; j++ )
 209         for( i = 0; i < 6*8*8; i++ )
 210         {
 211             int q = h->quant8_mf[j][0][0][i];
 212             if( maxQ8 < q )
 213                 maxQ8 = q;
 214         }
 215
 216     /* determine the biggest coefficient in all quant4_mf tables ( maxQ4 )
 217        and the biggest DC coefficient if all quant4_mf tables ( maxQdc ) */
 218     for( j = 0; j < 4; j++ )
 219         for( i = 0; i < 6*4*4; i++ )
 220         {
 221             int q = h->quant4_mf[j][0][0][i];
 222             if( maxQ4 < q )
 223                 maxQ4 = q;
 224             if( maxQdc < q && i%16 == 0 )
 225                 maxQdc = q;
 226         }
 227
 228     /* select quant_8x8 based on CPU and maxQ8 */
 229     if( maxQ8 < (1<<15) && cpu&X264_CPU_MMX )
 230         pf->quant_8x8_core = x264_quant_8x8_core15_mmx;
 231     else
 232     if( maxQ8 < (1<<16) && cpu&X264_CPU_MMXEXT )
 233         pf->quant_8x8_core = x264_quant_8x8_core16_mmxext;
 234     else
 235     if( cpu&X264_CPU_MMXEXT )
 236         pf->quant_8x8_core = x264_quant_8x8_core32_mmxext;
 237
 238     /* select quant_4x4 based on CPU and maxQ4 */
 239     if( maxQ4 < (1<<15) && cpu&X264_CPU_MMX )
 240         pf->quant_4x4_core = x264_quant_4x4_core15_mmx;
 241     else
 242     if( maxQ4 < (1<<16) && cpu&X264_CPU_MMXEXT )
 243         pf->quant_4x4_core = x264_quant_4x4_core16_mmxext;
 244     else
 245     if( cpu&X264_CPU_MMXEXT )
 246         pf->quant_4x4_core = x264_quant_4x4_core32_mmxext;
 247
 248     /* select quant_XxX_dc based on CPU and maxQdc */
 249     if( maxQdc < (1<<16) && cpu&X264_CPU_MMXEXT )
 250     {
 251         pf->quant_4x4_dc_core = x264_quant_4x4_dc_core16_mmxext;
 252         pf->quant_2x2_dc_core = x264_quant_2x2_dc_core16_mmxext;
 253     }
 254     else
 255     if( maxQdc < (1<<15) && cpu&X264_CPU_MMX )
 256     {
 257         pf->quant_4x4_dc_core = x264_quant_4x4_dc_core15_mmx;
 258         pf->quant_2x2_dc_core = x264_quant_2x2_dc_core15_mmx;
 259     }
 260     else
 261     if( cpu&X264_CPU_MMXEXT )
 262     {
 263         pf->quant_4x4_dc_core = x264_quant_4x4_dc_core32_mmxext;
 264         pf->quant_2x2_dc_core = x264_quant_2x2_dc_core32_mmxext;
 265     }
 266
 267     if( cpu&X264_CPU_MMX )
 268     {
 269         /* dequant is not subject to the above CQM-dependent overflow issues,
 270          * as long as the inputs are in the range generable by dct+quant.
 271          * that is not guaranteed by the standard, but is true within x264 */
 272         pf->dequant_4x4 = x264_dequant_4x4_mmx;
 273         pf->dequant_8x8 = x264_dequant_8x8_mmx;
 274     }
 275 #endif  /* HAVE_MMXEXT */
 276 }