git.sesse.net Git - x264/blob - common/rectangle.h

   1 /*****************************************************************************
   2  * rectangle.h: rectangle filling
   3  *****************************************************************************
   4  * Copyright (C) 2003-2016 x264 project
   5  *
   6  * Authors: Fiona Glaser <fiona@x264.com>
   7  *          Loren Merritt <lorenm@u.washington.edu>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  22  *
  23  * This program is also available under a commercial proprietary license.
  24  * For more information, contact us at licensing@x264.com.
  25  *****************************************************************************/
  26
  27 /* This function should only be called with constant w / h / s arguments! */
  28 static ALWAYS_INLINE void x264_macroblock_cache_rect( void *dst, int w, int h, int s, uint32_t v )
  29 {
  30     uint8_t *d = dst;
  31     uint16_t v2 = s == 2 ? v : v * 0x101;
  32     uint32_t v4 = s == 4 ? v : s == 2 ? v * 0x10001 : v * 0x1010101;
  33     uint64_t v8 = v4 + ((uint64_t)v4 << 32);
  34     s *= 8;
  35
  36     if( w == 2 )
  37     {
  38         M16( d+s*0 ) = v2;
  39         if( h == 1 ) return;
  40         M16( d+s*1 ) = v2;
  41         if( h == 2 ) return;
  42         M16( d+s*2 ) = v2;
  43         M16( d+s*3 ) = v2;
  44     }
  45     else if( w == 4 )
  46     {
  47         M32( d+s*0 ) = v4;
  48         if( h == 1 ) return;
  49         M32( d+s*1 ) = v4;
  50         if( h == 2 ) return;
  51         M32( d+s*2 ) = v4;
  52         M32( d+s*3 ) = v4;
  53     }
  54     else if( w == 8 )
  55     {
  56         if( WORD_SIZE == 8 )
  57         {
  58             M64( d+s*0 ) = v8;
  59             if( h == 1 ) return;
  60             M64( d+s*1 ) = v8;
  61             if( h == 2 ) return;
  62             M64( d+s*2 ) = v8;
  63             M64( d+s*3 ) = v8;
  64         }
  65         else
  66         {
  67             M32( d+s*0+0 ) = v4;
  68             M32( d+s*0+4 ) = v4;
  69             if( h == 1 ) return;
  70             M32( d+s*1+0 ) = v4;
  71             M32( d+s*1+4 ) = v4;
  72             if( h == 2 ) return;
  73             M32( d+s*2+0 ) = v4;
  74             M32( d+s*2+4 ) = v4;
  75             M32( d+s*3+0 ) = v4;
  76             M32( d+s*3+4 ) = v4;
  77         }
  78     }
  79     else if( w == 16 )
  80     {
  81         /* height 1, width 16 doesn't occur */
  82         assert( h != 1 );
  83 #if HAVE_VECTOREXT && defined(__SSE__)
  84         v4si v16 = {v,v,v,v};
  85
  86         M128( d+s*0+0 ) = (__m128)v16;
  87         M128( d+s*1+0 ) = (__m128)v16;
  88         if( h == 2 ) return;
  89         M128( d+s*2+0 ) = (__m128)v16;
  90         M128( d+s*3+0 ) = (__m128)v16;
  91 #else
  92         if( WORD_SIZE == 8 )
  93         {
  94             do
  95             {
  96                 M64( d+s*0+0 ) = v8;
  97                 M64( d+s*0+8 ) = v8;
  98                 M64( d+s*1+0 ) = v8;
  99                 M64( d+s*1+8 ) = v8;
 100                 h -= 2;
 101                 d += s*2;
 102             } while( h );
 103         }
 104         else
 105         {
 106             do
 107             {
 108                 M32( d+ 0 ) = v4;
 109                 M32( d+ 4 ) = v4;
 110                 M32( d+ 8 ) = v4;
 111                 M32( d+12 ) = v4;
 112                 d += s;
 113             } while( --h );
 114         }
 115 #endif
 116     }
 117     else
 118         assert(0);
 119 }
 120
 121 extern void (*x264_cache_mv_func_table[10])(void *, uint32_t);\
 122 extern void (*x264_cache_mvd_func_table[10])(void *, uint32_t);\
 123 extern void (*x264_cache_ref_func_table[10])(void *, uint32_t);\
 124
 125 #define x264_macroblock_cache_mv_ptr( a, x, y, w, h, l, mv ) x264_macroblock_cache_mv( a, x, y, w, h, l, M32( mv ) )
 126 static ALWAYS_INLINE void x264_macroblock_cache_mv( x264_t *h, int x, int y, int width, int height, int i_list, uint32_t mv )
 127 {
 128     void *mv_cache = &h->mb.cache.mv[i_list][X264_SCAN8_0+x+8*y];
 129     if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
 130         x264_cache_mv_func_table[width + (height<<1)-3]( mv_cache, mv );
 131     else
 132         x264_macroblock_cache_rect( mv_cache, width*4, height, 4, mv );
 133 }
 134 static ALWAYS_INLINE void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width, int height, int i_list, uint16_t mvd )
 135 {
 136     void *mvd_cache = &h->mb.cache.mvd[i_list][X264_SCAN8_0+x+8*y];
 137     if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
 138         x264_cache_mvd_func_table[width + (height<<1)-3]( mvd_cache, mvd );
 139     else
 140         x264_macroblock_cache_rect( mvd_cache, width*2, height, 2, mvd );
 141 }
 142 static ALWAYS_INLINE void x264_macroblock_cache_ref( x264_t *h, int x, int y, int width, int height, int i_list, uint8_t ref )
 143 {
 144     void *ref_cache = &h->mb.cache.ref[i_list][X264_SCAN8_0+x+8*y];
 145     if( x264_nonconstant_p( width ) || x264_nonconstant_p( height ) )
 146         x264_cache_ref_func_table[width + (height<<1)-3]( ref_cache, ref );
 147     else
 148         x264_macroblock_cache_rect( ref_cache, width, height, 1, ref );
 149 }
 150 static ALWAYS_INLINE void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
 151 {
 152     x264_macroblock_cache_rect( &h->mb.cache.skip[X264_SCAN8_0+x+8*y], width, height, 1, b_skip );
 153 }
 154 static ALWAYS_INLINE void x264_macroblock_cache_intra8x8_pred( x264_t *h, int x, int y, int i_mode )
 155 {
 156     x264_macroblock_cache_rect( &h->mb.cache.intra4x4_pred_mode[X264_SCAN8_0+x+8*y], 2, 2, 1, i_mode );
 157 }