git.sesse.net Git - x264/blob - common/csp.c

   1 /*****************************************************************************
   2  * csp.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2004 Laurent Aimar
   5  * $Id: csp.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include "common.h"
  25
  26 static inline void plane_copy_vflip( x264_mc_functions_t *mc,
  27                                      uint8_t *dst, int i_dst,
  28                                      uint8_t *src, int i_src, int w, int h)
  29 {
  30     mc->plane_copy( dst, i_dst, src + (h -1)*i_src, -i_src, w, h );
  31 }
  32
  33 static inline void plane_subsamplev2( uint8_t *dst, int i_dst,
  34                                       uint8_t *src, int i_src, int w, int h)
  35 {
  36     for( ; h > 0; h-- )
  37     {
  38         uint8_t *d = dst;
  39         uint8_t *s = src;
  40         int     i;
  41         for( i = 0; i < w; i++ )
  42         {
  43             *d++ = ( s[0] + s[i_src] + 1 ) >> 1;
  44             s++;
  45         }
  46         dst += i_dst;
  47         src += 2 * i_src;
  48     }
  49 }
  50
  51 static inline void plane_subsamplev2_vlip( uint8_t *dst, int i_dst,
  52                                            uint8_t *src, int i_src, int w, int h)
  53 {
  54     plane_subsamplev2( dst, i_dst, src + (2*h-1)*i_src, -i_src, w, h );
  55 }
  56
  57 static inline void plane_subsamplehv2( uint8_t *dst, int i_dst,
  58                                        uint8_t *src, int i_src, int w, int h)
  59 {
  60     for( ; h > 0; h-- )
  61     {
  62         uint8_t *d = dst;
  63         uint8_t *s = src;
  64         int     i;
  65         for( i = 0; i < w; i++ )
  66         {
  67             *d++ = ( s[0] + s[1] + s[i_src] + s[i_src+1] + 1 ) >> 2;
  68             s += 2;
  69         }
  70         dst += i_dst;
  71         src += 2 * i_src;
  72     }
  73 }
  74
  75 static inline void plane_subsamplehv2_vlip( uint8_t *dst, int i_dst,
  76                                             uint8_t *src, int i_src, int w, int h)
  77 {
  78     plane_subsamplehv2( dst, i_dst, src + (2*h-1)*i_src, -i_src, w, h );
  79 }
  80
  81 static void i420_to_i420( x264_mc_functions_t *mc,
  82                           x264_frame_t *frm, x264_image_t *img,
  83                           int i_width, int i_height )
  84 {
  85     if( img->i_csp & X264_CSP_VFLIP )
  86     {
  87         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
  88                           img->plane[0], img->i_stride[0],
  89                           i_width, i_height );
  90         plane_copy_vflip( mc, frm->plane[1], frm->i_stride[1],
  91                           img->plane[1], img->i_stride[1],
  92                           i_width / 2, i_height / 2 );
  93         plane_copy_vflip( mc, frm->plane[2], frm->i_stride[2],
  94                           img->plane[2], img->i_stride[2],
  95                           i_width / 2, i_height / 2 );
  96     }
  97     else
  98     {
  99         mc->plane_copy( frm->plane[0], frm->i_stride[0],
 100                         img->plane[0], img->i_stride[0],
 101                         i_width, i_height );
 102         mc->plane_copy( frm->plane[1], frm->i_stride[1],
 103                         img->plane[1], img->i_stride[1],
 104                         i_width / 2, i_height / 2 );
 105         mc->plane_copy( frm->plane[2], frm->i_stride[2],
 106                         img->plane[2], img->i_stride[2],
 107                         i_width / 2, i_height / 2 );
 108     }
 109 }
 110
 111 static void yv12_to_i420( x264_mc_functions_t *mc,
 112                           x264_frame_t *frm, x264_image_t *img,
 113                           int i_width, int i_height )
 114 {
 115     if( img->i_csp & X264_CSP_VFLIP )
 116     {
 117         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
 118                           img->plane[0], img->i_stride[0],
 119                           i_width, i_height );
 120         plane_copy_vflip( mc, frm->plane[2], frm->i_stride[2],
 121                           img->plane[1], img->i_stride[1],
 122                           i_width / 2, i_height / 2 );
 123         plane_copy_vflip( mc, frm->plane[1], frm->i_stride[1],
 124                           img->plane[2], img->i_stride[2],
 125                           i_width / 2, i_height / 2 );
 126     }
 127     else
 128     {
 129         mc->plane_copy( frm->plane[0], frm->i_stride[0],
 130                         img->plane[0], img->i_stride[0],
 131                         i_width, i_height );
 132         mc->plane_copy( frm->plane[2], frm->i_stride[2],
 133                         img->plane[1], img->i_stride[1],
 134                         i_width / 2, i_height / 2 );
 135         mc->plane_copy( frm->plane[1], frm->i_stride[1],
 136                         img->plane[2], img->i_stride[2],
 137                         i_width / 2, i_height / 2 );
 138     }
 139 }
 140
 141 static void i422_to_i420( x264_mc_functions_t *mc,
 142                           x264_frame_t *frm, x264_image_t *img,
 143                           int i_width, int i_height )
 144 {
 145     if( img->i_csp & X264_CSP_VFLIP )
 146     {
 147         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
 148                           img->plane[0], img->i_stride[0],
 149                           i_width, i_height );
 150
 151         plane_subsamplev2_vlip( frm->plane[1], frm->i_stride[1],
 152                                 img->plane[1], img->i_stride[1],
 153                                 i_width / 2, i_height / 2 );
 154         plane_subsamplev2_vlip( frm->plane[2], frm->i_stride[2],
 155                                 img->plane[2], img->i_stride[2],
 156                                 i_width / 2, i_height / 2 );
 157     }
 158     else
 159     {
 160         mc->plane_copy( frm->plane[0], frm->i_stride[0],
 161                         img->plane[0], img->i_stride[0],
 162                         i_width, i_height );
 163
 164         plane_subsamplev2( frm->plane[1], frm->i_stride[1],
 165                            img->plane[1], img->i_stride[1],
 166                            i_width / 2, i_height / 2 );
 167         plane_subsamplev2( frm->plane[2], frm->i_stride[2],
 168                            img->plane[2], img->i_stride[2],
 169                            i_width / 2, i_height / 2 );
 170     }
 171 }
 172
 173 static void i444_to_i420( x264_mc_functions_t *mc,
 174                           x264_frame_t *frm, x264_image_t *img,
 175                           int i_width, int i_height )
 176 {
 177     if( img->i_csp & X264_CSP_VFLIP )
 178     {
 179         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
 180                           img->plane[0], img->i_stride[0],
 181                           i_width, i_height );
 182
 183         plane_subsamplehv2_vlip( frm->plane[1], frm->i_stride[1],
 184                                  img->plane[1], img->i_stride[1],
 185                                  i_width / 2, i_height / 2 );
 186         plane_subsamplehv2_vlip( frm->plane[2], frm->i_stride[2],
 187                                  img->plane[2], img->i_stride[2],
 188                                  i_width / 2, i_height / 2 );
 189     }
 190     else
 191     {
 192         mc->plane_copy( frm->plane[0], frm->i_stride[0],
 193                         img->plane[0], img->i_stride[0],
 194                         i_width, i_height );
 195
 196         plane_subsamplehv2( frm->plane[1], frm->i_stride[1],
 197                             img->plane[1], img->i_stride[1],
 198                             i_width / 2, i_height / 2 );
 199         plane_subsamplehv2( frm->plane[2], frm->i_stride[2],
 200                             img->plane[2], img->i_stride[2],
 201                             i_width / 2, i_height / 2 );
 202     }
 203 }
 204 static void yuyv_to_i420( x264_mc_functions_t *mc,
 205                           x264_frame_t *frm, x264_image_t *img,
 206                           int i_width, int i_height )
 207 {
 208     uint8_t *src = img->plane[0];
 209     int     i_src= img->i_stride[0];
 210
 211     uint8_t *y   = frm->plane[0];
 212     uint8_t *u   = frm->plane[1];
 213     uint8_t *v   = frm->plane[2];
 214
 215     if( img->i_csp & X264_CSP_VFLIP )
 216     {
 217         src += ( i_height - 1 ) * i_src;
 218         i_src = -i_src;
 219     }
 220
 221     for( ; i_height > 0; i_height -= 2 )
 222     {
 223         uint8_t *ss = src;
 224         uint8_t *yy = y;
 225         uint8_t *uu = u;
 226         uint8_t *vv = v;
 227         int w;
 228
 229         for( w = i_width; w > 0; w -= 2 )
 230         {
 231             *yy++ = ss[0];
 232             *yy++ = ss[2];
 233
 234             *uu++ = ( ss[1] + ss[1+i_src] + 1 ) >> 1;
 235             *vv++ = ( ss[3] + ss[3+i_src] + 1 ) >> 1;
 236
 237             ss += 4;
 238         }
 239         src += i_src;
 240         y += frm->i_stride[0];
 241         u += frm->i_stride[1];
 242         v += frm->i_stride[2];
 243
 244         ss = src;
 245         yy = y;
 246         for( w = i_width; w > 0; w -= 2 )
 247         {
 248             *yy++ = ss[0];
 249             *yy++ = ss[2];
 250             ss += 4;
 251         }
 252         src += i_src;
 253         y += frm->i_stride[0];
 254     }
 255 }
 256
 257 /* Same value than in XviD */
 258 #define BITS 8
 259 #define FIX(f) ((int)((f) * (1 << BITS) + 0.5))
 260
 261 #define Y_R   FIX(0.257)
 262 #define Y_G   FIX(0.504)
 263 #define Y_B   FIX(0.098)
 264 #define Y_ADD 16
 265
 266 #define U_R   FIX(0.148)
 267 #define U_G   FIX(0.291)
 268 #define U_B   FIX(0.439)
 269 #define U_ADD 128
 270
 271 #define V_R   FIX(0.439)
 272 #define V_G   FIX(0.368)
 273 #define V_B   FIX(0.071)
 274 #define V_ADD 128
 275 #define RGB_TO_I420( name, POS_R, POS_G, POS_B, S_RGB ) \
 276 static void name( x264_mc_functions_t *mc,              \
 277                   x264_frame_t *frm, x264_image_t *img, \
 278                   int i_width, int i_height )           \
 279 {                                                       \
 280     uint8_t *src = img->plane[0];                       \
 281     int     i_src= img->i_stride[0];                    \
 282     int     i_y  = frm->i_stride[0];                    \
 283     uint8_t *y   = frm->plane[0];                       \
 284     uint8_t *u   = frm->plane[1];                       \
 285     uint8_t *v   = frm->plane[2];                       \
 286                                                         \
 287     if( img->i_csp & X264_CSP_VFLIP )                   \
 288     {                                                   \
 289         src += ( i_height - 1 ) * i_src;                \
 290         i_src = -i_src;                                 \
 291     }                                                   \
 292                                                         \
 293     for(  ; i_height > 0; i_height -= 2 )               \
 294     {                                                   \
 295         uint8_t *ss = src;                              \
 296         uint8_t *yy = y;                                \
 297         uint8_t *uu = u;                                \
 298         uint8_t *vv = v;                                \
 299         int w;                                          \
 300                                                         \
 301         for( w = i_width; w > 0; w -= 2 )               \
 302         {                                               \
 303             int cr = 0,cg = 0,cb = 0;                   \
 304             int r, g, b;                                \
 305                                                         \
 306             /* Luma */                                  \
 307             cr = r = ss[POS_R];                         \
 308             cg = g = ss[POS_G];                         \
 309             cb = b = ss[POS_B];                         \
 310                                                         \
 311             yy[0] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);    \
 312                                                         \
 313             cr+= r = ss[POS_R+i_src];                   \
 314             cg+= g = ss[POS_G+i_src];                   \
 315             cb+= b = ss[POS_B+i_src];                   \
 316             yy[i_y] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);  \
 317             yy++;                                       \
 318             ss += S_RGB;                                \
 319                                                         \
 320             cr+= r = ss[POS_R];                         \
 321             cg+= g = ss[POS_G];                         \
 322             cb+= b = ss[POS_B];                         \
 323                                                         \
 324             yy[0] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);    \
 325                                                         \
 326             cr+= r = ss[POS_R+i_src];                   \
 327             cg+= g = ss[POS_G+i_src];                   \
 328             cb+= b = ss[POS_B+i_src];                   \
 329             yy[i_y] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);  \
 330             yy++;                                       \
 331             ss += S_RGB;                                \
 332                                                         \
 333             /* Chroma */                                \
 334             *uu++ = (uint8_t)(U_ADD + ((-U_R * cr - U_G * cg + U_B * cb) >> (BITS+2)) ); \
 335             *vv++ = (uint8_t)(V_ADD + (( V_R * cr - V_G * cg - V_B * cb) >> (BITS+2)) ); \
 336         }                                               \
 337                                                         \
 338         src += 2*i_src;                                   \
 339         y += 2*frm->i_stride[0];                        \
 340         u += frm->i_stride[1];                          \
 341         v += frm->i_stride[2];                          \
 342     }                                                   \
 343 }
 344
 345 RGB_TO_I420( rgb_to_i420,  0, 1, 2, 3 );
 346 RGB_TO_I420( bgr_to_i420,  2, 1, 0, 3 );
 347 RGB_TO_I420( bgra_to_i420, 2, 1, 0, 4 );
 348
 349 void x264_csp_init( int cpu, int i_csp, x264_csp_function_t *pf )
 350 {
 351     switch( i_csp )
 352     {
 353         case X264_CSP_I420:
 354             pf->convert[X264_CSP_I420] = i420_to_i420;
 355             pf->convert[X264_CSP_I422] = i422_to_i420;
 356             pf->convert[X264_CSP_I444] = i444_to_i420;
 357             pf->convert[X264_CSP_YV12] = yv12_to_i420;
 358             pf->convert[X264_CSP_YUYV] = yuyv_to_i420;
 359             pf->convert[X264_CSP_RGB ] =  rgb_to_i420;
 360             pf->convert[X264_CSP_BGR ] =  bgr_to_i420;
 361             pf->convert[X264_CSP_BGRA] = bgra_to_i420;
 362             break;
 363
 364         default:
 365             /* For now, can't happen */
 366             fprintf( stderr, "arg in x264_csp_init\n" );
 367             exit( -1 );
 368             break;
 369     }
 370 }
 371