git.sesse.net Git - x264/blob - common/arm/mc-c.c

   1 /*****************************************************************************
   2  * mc-c.c: h264 encoder library (Motion Compensation)
   3  *****************************************************************************
   4  * Copyright (C) 2009 x264 project
   5  *
   6  * Authors: David Conrad <lessen42@gmail.com>
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  21  *****************************************************************************/
  22
  23 #include "common/common.h"
  24 #include "mc.h"
  25
  26 void x264_prefetch_ref_arm( uint8_t *, int, int );
  27 void x264_prefetch_fenc_arm( uint8_t *, int, uint8_t *, int, int );
  28
  29 void *x264_memcpy_aligned_neon( void * dst, const void * src, size_t n );
  30 void x264_memzero_aligned_neon( void *dst, int n );
  31
  32 void x264_pixel_avg_16x16_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  33 void x264_pixel_avg_16x8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  34 void x264_pixel_avg_8x16_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  35 void x264_pixel_avg_8x8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  36 void x264_pixel_avg_8x4_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  37 void x264_pixel_avg_4x8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  38 void x264_pixel_avg_4x4_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  39 void x264_pixel_avg_4x2_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
  40
  41 void x264_pixel_avg2_w4_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
  42 void x264_pixel_avg2_w8_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
  43 void x264_pixel_avg2_w16_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
  44 void x264_pixel_avg2_w20_neon( uint8_t *, int, uint8_t *, int, uint8_t *, int );
  45
  46 void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
  47 void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
  48 void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
  49 void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
  50
  51 void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
  52 void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
  53
  54 static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
  55 {
  56     NULL,
  57     x264_pixel_avg2_w4_neon,
  58     x264_pixel_avg2_w8_neon,
  59     x264_pixel_avg2_w16_neon,   // no slower than w12, so no point in a separate function
  60     x264_pixel_avg2_w16_neon,
  61     x264_pixel_avg2_w20_neon,
  62 };
  63
  64 static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, int, uint8_t *, int, int ) =
  65 {
  66     NULL,
  67     x264_mc_copy_w4_neon,
  68     x264_mc_copy_w8_neon,
  69     NULL,
  70     x264_mc_copy_w16_neon,
  71 };
  72
  73 static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  74 static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
  75
  76 static void mc_luma_neon( uint8_t *dst,    int i_dst_stride,
  77                           uint8_t *src[4], int i_src_stride,
  78                           int mvx, int mvy,
  79                           int i_width, int i_height, const x264_weight_t *weight )
  80 {
  81     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
  82     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
  83     uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset;
  84     if ( (mvy&3) == 3 )             // explict if() to force conditional add
  85         src1 += i_src_stride;
  86
  87     if( qpel_idx & 5 ) /* qpel interpolation needed */
  88     {
  89         uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
  90         x264_pixel_avg_wtab_neon[i_width>>2](
  91                 dst, i_dst_stride, src1, i_src_stride,
  92                 src2, i_height );
  93         if( weight->weightfn )
  94             weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );
  95     }
  96     else if( weight->weightfn )
  97         weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
  98     else
  99         x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
 100 }
 101
 102 static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
 103                               uint8_t *src[4], int i_src_stride,
 104                               int mvx, int mvy,
 105                               int i_width, int i_height, const x264_weight_t *weight )
 106 {
 107     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
 108     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
 109     uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset;
 110     if ( (mvy&3) == 3 )             // explict if() to force conditional add
 111         src1 += i_src_stride;
 112
 113     if( qpel_idx & 5 ) /* qpel interpolation needed */
 114     {
 115         uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
 116         x264_pixel_avg_wtab_neon[i_width>>2](
 117                 dst, *i_dst_stride, src1, i_src_stride,
 118                 src2, i_height );
 119         if( weight->weightfn )
 120             weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height );
 121         return dst;
 122     }
 123     else if( weight->weightfn )
 124     {
 125         weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height );
 126         return dst;
 127     }
 128     else
 129     {
 130         *i_dst_stride = i_src_stride;
 131         return src1;
 132     }
 133 }
 134
 135 void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
 136 void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
 137 void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
 138
 139 static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
 140                               int stride, int width, int height, int16_t *buf )
 141 {
 142     int realign = (intptr_t)src & 15;
 143     src -= realign;
 144     dstv -= realign;
 145     dstc -= realign;
 146     dsth -= realign;
 147     width += realign;
 148     while( height-- )
 149     {
 150         x264_hpel_filter_v_neon( dstv, src, buf+8, stride, width );
 151         x264_hpel_filter_c_neon( dstc, buf+8, width );
 152         x264_hpel_filter_h_neon( dsth, src, width );
 153         dsth += stride;
 154         dstv += stride;
 155         dstc += stride;
 156         src  += stride;
 157     }
 158 }
 159
 160 void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
 161 {
 162     if( !(cpu&X264_CPU_ARMV6) )
 163         return;
 164
 165     pf->prefetch_fenc = x264_prefetch_fenc_arm;
 166     pf->prefetch_ref  = x264_prefetch_ref_arm;
 167
 168     if( !(cpu&X264_CPU_NEON) )
 169         return;
 170
 171     pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
 172     pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
 173     pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_neon;
 174     pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_neon;
 175
 176     pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_neon;
 177     pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_neon;
 178     pf->avg[PIXEL_8x16]  = x264_pixel_avg_8x16_neon;
 179     pf->avg[PIXEL_8x8]   = x264_pixel_avg_8x8_neon;
 180     pf->avg[PIXEL_8x4]   = x264_pixel_avg_8x4_neon;
 181     pf->avg[PIXEL_4x8]   = x264_pixel_avg_4x8_neon;
 182     pf->avg[PIXEL_4x4]   = x264_pixel_avg_4x4_neon;
 183     pf->avg[PIXEL_4x2]   = x264_pixel_avg_4x2_neon;
 184
 185     pf->memcpy_aligned  = x264_memcpy_aligned_neon;
 186     pf->memzero_aligned = x264_memzero_aligned_neon;
 187
 188     pf->mc_chroma = x264_mc_chroma_neon;
 189     pf->mc_luma = mc_luma_neon;
 190     pf->get_ref = get_ref_neon;
 191     pf->hpel_filter = hpel_filter_neon;
 192     pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
 193 }