git.sesse.net Git - x264/blob - common/csp.c

   1 /*****************************************************************************
   2  * csp.c: h264 encoder library
   3  *****************************************************************************
   4  * Copyright (C) 2004 Laurent Aimar
   5  * $Id: csp.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  22  *****************************************************************************/
  23
  24 #include <stdio.h>
  25 #include <string.h>
  26
  27 #include "common.h"
  28
  29 static inline void plane_copy_vflip( x264_mc_functions_t *mc,
  30                                      uint8_t *dst, int i_dst,
  31                                      uint8_t *src, int i_src, int w, int h)
  32 {
  33     mc->plane_copy( dst, i_dst, src + (h -1)*i_src, -i_src, w, h );
  34 }
  35
  36 static inline void plane_subsamplev2( uint8_t *dst, int i_dst,
  37                                       uint8_t *src, int i_src, int w, int h)
  38 {
  39     for( ; h > 0; h-- )
  40     {
  41         uint8_t *d = dst;
  42         uint8_t *s = src;
  43         int     i;
  44         for( i = 0; i < w; i++ )
  45         {
  46             *d++ = ( s[0] + s[i_src] + 1 ) >> 1;
  47             s++;
  48         }
  49         dst += i_dst;
  50         src += 2 * i_src;
  51     }
  52 }
  53
  54 static inline void plane_subsamplev2_vlip( uint8_t *dst, int i_dst,
  55                                            uint8_t *src, int i_src, int w, int h)
  56 {
  57     plane_subsamplev2( dst, i_dst, src + (2*h-1)*i_src, -i_src, w, h );
  58 }
  59
  60 static inline void plane_subsamplehv2( uint8_t *dst, int i_dst,
  61                                        uint8_t *src, int i_src, int w, int h)
  62 {
  63     for( ; h > 0; h-- )
  64     {
  65         uint8_t *d = dst;
  66         uint8_t *s = src;
  67         int     i;
  68         for( i = 0; i < w; i++ )
  69         {
  70             *d++ = ( s[0] + s[1] + s[i_src] + s[i_src+1] + 1 ) >> 2;
  71             s += 2;
  72         }
  73         dst += i_dst;
  74         src += 2 * i_src;
  75     }
  76 }
  77
  78 static inline void plane_subsamplehv2_vlip( uint8_t *dst, int i_dst,
  79                                             uint8_t *src, int i_src, int w, int h)
  80 {
  81     plane_subsamplehv2( dst, i_dst, src + (2*h-1)*i_src, -i_src, w, h );
  82 }
  83
  84 static void i420_to_i420( x264_mc_functions_t *mc,
  85                           x264_frame_t *frm, x264_image_t *img,
  86                           int i_width, int i_height )
  87 {
  88     if( img->i_csp & X264_CSP_VFLIP )
  89     {
  90         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
  91                           img->plane[0], img->i_stride[0],
  92                           i_width, i_height );
  93         plane_copy_vflip( mc, frm->plane[1], frm->i_stride[1],
  94                           img->plane[1], img->i_stride[1],
  95                           i_width / 2, i_height / 2 );
  96         plane_copy_vflip( mc, frm->plane[2], frm->i_stride[2],
  97                           img->plane[2], img->i_stride[2],
  98                           i_width / 2, i_height / 2 );
  99     }
 100     else
 101     {
 102         mc->plane_copy( frm->plane[0], frm->i_stride[0],
 103                         img->plane[0], img->i_stride[0],
 104                         i_width, i_height );
 105         mc->plane_copy( frm->plane[1], frm->i_stride[1],
 106                         img->plane[1], img->i_stride[1],
 107                         i_width / 2, i_height / 2 );
 108         mc->plane_copy( frm->plane[2], frm->i_stride[2],
 109                         img->plane[2], img->i_stride[2],
 110                         i_width / 2, i_height / 2 );
 111     }
 112 }
 113
 114 static void yv12_to_i420( x264_mc_functions_t *mc,
 115                           x264_frame_t *frm, x264_image_t *img,
 116                           int i_width, int i_height )
 117 {
 118     if( img->i_csp & X264_CSP_VFLIP )
 119     {
 120         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
 121                           img->plane[0], img->i_stride[0],
 122                           i_width, i_height );
 123         plane_copy_vflip( mc, frm->plane[2], frm->i_stride[2],
 124                           img->plane[1], img->i_stride[1],
 125                           i_width / 2, i_height / 2 );
 126         plane_copy_vflip( mc, frm->plane[1], frm->i_stride[1],
 127                           img->plane[2], img->i_stride[2],
 128                           i_width / 2, i_height / 2 );
 129     }
 130     else
 131     {
 132         mc->plane_copy( frm->plane[0], frm->i_stride[0],
 133                         img->plane[0], img->i_stride[0],
 134                         i_width, i_height );
 135         mc->plane_copy( frm->plane[2], frm->i_stride[2],
 136                         img->plane[1], img->i_stride[1],
 137                         i_width / 2, i_height / 2 );
 138         mc->plane_copy( frm->plane[1], frm->i_stride[1],
 139                         img->plane[2], img->i_stride[2],
 140                         i_width / 2, i_height / 2 );
 141     }
 142 }
 143
 144 static void i422_to_i420( x264_mc_functions_t *mc,
 145                           x264_frame_t *frm, x264_image_t *img,
 146                           int i_width, int i_height )
 147 {
 148     if( img->i_csp & X264_CSP_VFLIP )
 149     {
 150         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
 151                           img->plane[0], img->i_stride[0],
 152                           i_width, i_height );
 153
 154         plane_subsamplev2_vlip( frm->plane[1], frm->i_stride[1],
 155                                 img->plane[1], img->i_stride[1],
 156                                 i_width / 2, i_height / 2 );
 157         plane_subsamplev2_vlip( frm->plane[2], frm->i_stride[2],
 158                                 img->plane[2], img->i_stride[2],
 159                                 i_width / 2, i_height / 2 );
 160     }
 161     else
 162     {
 163         mc->plane_copy( frm->plane[0], frm->i_stride[0],
 164                         img->plane[0], img->i_stride[0],
 165                         i_width, i_height );
 166
 167         plane_subsamplev2( frm->plane[1], frm->i_stride[1],
 168                            img->plane[1], img->i_stride[1],
 169                            i_width / 2, i_height / 2 );
 170         plane_subsamplev2( frm->plane[2], frm->i_stride[2],
 171                            img->plane[2], img->i_stride[2],
 172                            i_width / 2, i_height / 2 );
 173     }
 174 }
 175
 176 static void i444_to_i420( x264_mc_functions_t *mc,
 177                           x264_frame_t *frm, x264_image_t *img,
 178                           int i_width, int i_height )
 179 {
 180     if( img->i_csp & X264_CSP_VFLIP )
 181     {
 182         plane_copy_vflip( mc, frm->plane[0], frm->i_stride[0],
 183                           img->plane[0], img->i_stride[0],
 184                           i_width, i_height );
 185
 186         plane_subsamplehv2_vlip( frm->plane[1], frm->i_stride[1],
 187                                  img->plane[1], img->i_stride[1],
 188                                  i_width / 2, i_height / 2 );
 189         plane_subsamplehv2_vlip( frm->plane[2], frm->i_stride[2],
 190                                  img->plane[2], img->i_stride[2],
 191                                  i_width / 2, i_height / 2 );
 192     }
 193     else
 194     {
 195         mc->plane_copy( frm->plane[0], frm->i_stride[0],
 196                         img->plane[0], img->i_stride[0],
 197                         i_width, i_height );
 198
 199         plane_subsamplehv2( frm->plane[1], frm->i_stride[1],
 200                             img->plane[1], img->i_stride[1],
 201                             i_width / 2, i_height / 2 );
 202         plane_subsamplehv2( frm->plane[2], frm->i_stride[2],
 203                             img->plane[2], img->i_stride[2],
 204                             i_width / 2, i_height / 2 );
 205     }
 206 }
 207 static void yuyv_to_i420( x264_mc_functions_t *mc,
 208                           x264_frame_t *frm, x264_image_t *img,
 209                           int i_width, int i_height )
 210 {
 211     uint8_t *src = img->plane[0];
 212     int     i_src= img->i_stride[0];
 213
 214     uint8_t *y   = frm->plane[0];
 215     uint8_t *u   = frm->plane[1];
 216     uint8_t *v   = frm->plane[2];
 217
 218     if( img->i_csp & X264_CSP_VFLIP )
 219     {
 220         src += ( i_height - 1 ) * i_src;
 221         i_src = -i_src;
 222     }
 223
 224     for( ; i_height > 0; i_height -= 2 )
 225     {
 226         uint8_t *ss = src;
 227         uint8_t *yy = y;
 228         uint8_t *uu = u;
 229         uint8_t *vv = v;
 230         int w;
 231
 232         for( w = i_width; w > 0; w -= 2 )
 233         {
 234             *yy++ = ss[0];
 235             *yy++ = ss[2];
 236
 237             *uu++ = ( ss[1] + ss[1+i_src] + 1 ) >> 1;
 238             *vv++ = ( ss[3] + ss[3+i_src] + 1 ) >> 1;
 239
 240             ss += 4;
 241         }
 242         src += i_src;
 243         y += frm->i_stride[0];
 244         u += frm->i_stride[1];
 245         v += frm->i_stride[2];
 246
 247         ss = src;
 248         yy = y;
 249         for( w = i_width; w > 0; w -= 2 )
 250         {
 251             *yy++ = ss[0];
 252             *yy++ = ss[2];
 253             ss += 4;
 254         }
 255         src += i_src;
 256         y += frm->i_stride[0];
 257     }
 258 }
 259
 260 /* Same value than in XviD */
 261 #define BITS 8
 262 #define FIX(f) ((int)((f) * (1 << BITS) + 0.5))
 263
 264 #define Y_R   FIX(0.257)
 265 #define Y_G   FIX(0.504)
 266 #define Y_B   FIX(0.098)
 267 #define Y_ADD 16
 268
 269 #define U_R   FIX(0.148)
 270 #define U_G   FIX(0.291)
 271 #define U_B   FIX(0.439)
 272 #define U_ADD 128
 273
 274 #define V_R   FIX(0.439)
 275 #define V_G   FIX(0.368)
 276 #define V_B   FIX(0.071)
 277 #define V_ADD 128
 278 #define RGB_TO_I420( name, POS_R, POS_G, POS_B, S_RGB ) \
 279 static void name( x264_mc_functions_t *mc,              \
 280                   x264_frame_t *frm, x264_image_t *img, \
 281                   int i_width, int i_height )           \
 282 {                                                       \
 283     uint8_t *src = img->plane[0];                       \
 284     int     i_src= img->i_stride[0];                    \
 285     int     i_y  = frm->i_stride[0];                    \
 286     uint8_t *y   = frm->plane[0];                       \
 287     uint8_t *u   = frm->plane[1];                       \
 288     uint8_t *v   = frm->plane[2];                       \
 289                                                         \
 290     if( img->i_csp & X264_CSP_VFLIP )                   \
 291     {                                                   \
 292         src += ( i_height - 1 ) * i_src;                \
 293         i_src = -i_src;                                 \
 294     }                                                   \
 295                                                         \
 296     for(  ; i_height > 0; i_height -= 2 )               \
 297     {                                                   \
 298         uint8_t *ss = src;                              \
 299         uint8_t *yy = y;                                \
 300         uint8_t *uu = u;                                \
 301         uint8_t *vv = v;                                \
 302         int w;                                          \
 303                                                         \
 304         for( w = i_width; w > 0; w -= 2 )               \
 305         {                                               \
 306             int cr = 0,cg = 0,cb = 0;                   \
 307             int r, g, b;                                \
 308                                                         \
 309             /* Luma */                                  \
 310             cr = r = ss[POS_R];                         \
 311             cg = g = ss[POS_G];                         \
 312             cb = b = ss[POS_B];                         \
 313                                                         \
 314             yy[0] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);    \
 315                                                         \
 316             cr+= r = ss[POS_R+i_src];                   \
 317             cg+= g = ss[POS_G+i_src];                   \
 318             cb+= b = ss[POS_B+i_src];                   \
 319             yy[i_y] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);  \
 320             yy++;                                       \
 321             ss += S_RGB;                                \
 322                                                         \
 323             cr+= r = ss[POS_R];                         \
 324             cg+= g = ss[POS_G];                         \
 325             cb+= b = ss[POS_B];                         \
 326                                                         \
 327             yy[0] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);    \
 328                                                         \
 329             cr+= r = ss[POS_R+i_src];                   \
 330             cg+= g = ss[POS_G+i_src];                   \
 331             cb+= b = ss[POS_B+i_src];                   \
 332             yy[i_y] = Y_ADD + ((Y_R * r + Y_G * g + Y_B * b) >> BITS);  \
 333             yy++;                                       \
 334             ss += S_RGB;                                \
 335                                                         \
 336             /* Chroma */                                \
 337             *uu++ = (uint8_t)(U_ADD + ((-U_R * cr - U_G * cg + U_B * cb) >> (BITS+2)) ); \
 338             *vv++ = (uint8_t)(V_ADD + (( V_R * cr - V_G * cg - V_B * cb) >> (BITS+2)) ); \
 339         }                                               \
 340                                                         \
 341         src += 2*i_src;                                   \
 342         y += 2*frm->i_stride[0];                        \
 343         u += frm->i_stride[1];                          \
 344         v += frm->i_stride[2];                          \
 345     }                                                   \
 346 }
 347
 348 RGB_TO_I420( rgb_to_i420,  0, 1, 2, 3 );
 349 RGB_TO_I420( bgr_to_i420,  2, 1, 0, 3 );
 350 RGB_TO_I420( bgra_to_i420, 2, 1, 0, 4 );
 351
 352 void x264_csp_init( int cpu, int i_csp, x264_csp_function_t *pf )
 353 {
 354     switch( i_csp )
 355     {
 356         case X264_CSP_I420:
 357             pf->convert[X264_CSP_I420] = i420_to_i420;
 358             pf->convert[X264_CSP_I422] = i422_to_i420;
 359             pf->convert[X264_CSP_I444] = i444_to_i420;
 360             pf->convert[X264_CSP_YV12] = yv12_to_i420;
 361             pf->convert[X264_CSP_YUYV] = yuyv_to_i420;
 362             pf->convert[X264_CSP_RGB ] =  rgb_to_i420;
 363             pf->convert[X264_CSP_BGR ] =  bgr_to_i420;
 364             pf->convert[X264_CSP_BGRA] = bgra_to_i420;
 365             break;
 366
 367         default:
 368             /* For now, can't happen */
 369             fprintf( stderr, "arg in x264_csp_init\n" );
 370             exit( -1 );
 371             break;
 372     }
 373 }
 374