git.sesse.net Git - x264/blob - encoder/me.c

   1 /*****************************************************************************
   2  * me.c: h264 encoder library (Motion Estimation)
   3  *****************************************************************************
   4  * Copyright (C) 2003 Laurent Aimar
   5  * $Id: me.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
   6  *
   7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
   8  *          Loren Merritt <lorenm@u.washington.edu>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28
  29 #include "common/common.h"
  30 #include "me.h"
  31
  32 /* presets selected from good points on the speed-vs-quality curve of several test videos
  33  * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel }
  34  * where me_* are the number of EPZS iterations run on all candidate block types,
  35  * and refine_* are run only on the winner. */
  36 static const int subpel_iterations[][4] =
  37    {{1,0,0,0},
  38     {1,1,0,0},
  39     {1,2,0,0},
  40     {0,2,1,0},
  41     {0,2,1,1},
  42     {0,2,1,2}};
  43
  44 static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters );
  45
  46 #define COST_MV( mx, my ) \
  47 { \
  48     int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], m->i_stride[0],     \
  49                    &p_fref[(my)*m->i_stride[0]+(mx)], m->i_stride[0] ) \
  50              + p_cost_mvx[ (mx)<<2 ]  \
  51              + p_cost_mvy[ (my)<<2 ]; \
  52     if( cost < bcost ) \
  53     {                  \
  54         bcost = cost;  \
  55         bmx = mx;      \
  56         bmy = my;      \
  57     } \
  58 }
  59
  60 void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh )
  61 {
  62     const int i_pixel = m->i_pixel;
  63     const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
  64     int bmx, bmy, bcost;
  65     int omx, omy;
  66     uint8_t *p_fref = m->p_fref[0];
  67     int i_iter;
  68
  69     const int mv_x_min = h->mb.mv_min_fpel[0];
  70     const int mv_y_min = h->mb.mv_min_fpel[1];
  71     const int mv_x_max = h->mb.mv_max_fpel[0];
  72     const int mv_y_max = h->mb.mv_max_fpel[1];
  73
  74     const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
  75     const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
  76
  77
  78     /* init with mvp */
  79     /* XXX: We don't need to clamp because the way diamond work, we will
  80      * never go outside padded picture, and predict mv won't compute vector
  81      * with componant magnitude greater.
  82      * XXX: if some vector can go outside, (accelerator, ....) you need to clip
  83      * them yourself */
  84     bmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max );
  85     bmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max );
  86     bcost = COST_MAX;
  87     COST_MV( bmx, bmy );
  88     /* I don't know why this helps */
  89     bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ];
  90
  91     /* try extra predictors if provided */
  92     for( i_iter = 0; i_iter < i_mvc; i_iter++ )
  93     {
  94         const int mx = x264_clip3( ( mvc[i_iter][0] + 2 ) >> 2, mv_x_min, mv_x_max );
  95         const int my = x264_clip3( ( mvc[i_iter][1] + 2 ) >> 2, mv_y_min, mv_y_max );
  96         if( mx != bmx || my != bmy )
  97             COST_MV( mx, my );
  98     }
  99
 100     COST_MV( 0, 0 );
 101
 102     if( h->mb.i_subpel_refine >= 2 )
 103     {
 104         /* hexagon search */
 105         /* Don't need to test mv_range each time, we won't go outside picture+padding */
 106         omx = bmx;
 107         omy = bmy;
 108         for( i_iter = 0; i_iter < 8; i_iter++ )
 109         {
 110             COST_MV( omx-2, omy   );
 111             COST_MV( omx-1, omy+2 );
 112             COST_MV( omx+1, omy+2 );
 113             COST_MV( omx+2, omy   );
 114             COST_MV( omx+1, omy-2 );
 115             COST_MV( omx-1, omy-2 );
 116
 117             if( bmx == omx && bmy == omy )
 118                 break;
 119             omx = bmx;
 120             omy = bmy;
 121         }
 122
 123         /* square refine */
 124         COST_MV( omx-1, omy-1 );
 125         COST_MV( omx-1, omy   );
 126         COST_MV( omx-1, omy+1 );
 127         COST_MV( omx  , omy-1 );
 128         COST_MV( omx  , omy+1 );
 129         COST_MV( omx+1, omy-1 );
 130         COST_MV( omx+1, omy   );
 131         COST_MV( omx+1, omy+1 );
 132     }
 133     else
 134     {
 135         /* diamond search */
 136         for( i_iter = 0; i_iter < 16; i_iter++ )
 137         {
 138             omx = bmx;
 139             omy = bmy;
 140             COST_MV( omx  , omy-1 );
 141             COST_MV( omx  , omy+1 );
 142             COST_MV( omx-1, omy   );
 143             COST_MV( omx+1, omy   );
 144             if( bmx == omx && bmy == omy )
 145                 break;
 146         }
 147     }
 148
 149     /* -> qpel mv */
 150     m->mv[0] = bmx << 2;
 151     m->mv[1] = bmy << 2;
 152
 153     /* compute the real cost */
 154     m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
 155     m->cost = h->pixf.satd[i_pixel]( m->p_fenc[0], m->i_stride[0],
 156                     &p_fref[bmy * m->i_stride[0] + bmx], m->i_stride[0] )
 157             + m->cost_mv;
 158     if( b_chroma_me )
 159     {
 160         const int bw = x264_pixel_size[m->i_pixel].w;
 161         const int bh = x264_pixel_size[m->i_pixel].h;
 162         DECLARE_ALIGNED( uint8_t, pix[8*8*2], 16 );
 163         h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix, 8, m->mv[0], m->mv[1], bw/2, bh/2 );
 164         h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix+8*8, 8, m->mv[0], m->mv[1], bw/2, bh/2 );
 165         m->cost += h->pixf.satd[i_pixel+3]( m->p_fenc[1], m->i_stride[1], pix, 8 )
 166                  + h->pixf.satd[i_pixel+3]( m->p_fenc[2], m->i_stride[1], pix+8*8, 8 );
 167     }
 168
 169     /* subpel refine */
 170     if( h->mb.i_subpel_refine >= 3 )
 171     {
 172         int hpel, qpel;
 173
 174         /* early termination (when examining multiple reference frames)
 175          * FIXME: this can update fullpel_thresh even if the match
 176          *        ref is rejected after subpel refinement */
 177         if( p_fullpel_thresh )
 178         {
 179             if( (m->cost*7)>>3 > *p_fullpel_thresh )
 180                 return;
 181             else if( m->cost < *p_fullpel_thresh )
 182                 *p_fullpel_thresh = m->cost;
 183         }
 184
 185         hpel = subpel_iterations[h->mb.i_subpel_refine][2];
 186         qpel = subpel_iterations[h->mb.i_subpel_refine][3];
 187         refine_subpel( h, m, hpel, qpel );
 188     }
 189 }
 190 #undef COST_MV
 191
 192 void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
 193 {
 194     int hpel = subpel_iterations[h->mb.i_subpel_refine][0];
 195     int qpel = subpel_iterations[h->mb.i_subpel_refine][1];
 196 //  if( hpel || qpel )
 197         refine_subpel( h, m, hpel, qpel );
 198 }
 199
 200 #define COST_MV( mx, my, dir ) \
 201 { \
 202     int stride = 16; \
 203     uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \
 204     int cost = h->pixf.satd[i_pixel]( m->p_fenc[0], m->i_stride[0], src, stride ) \
 205              + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
 206     if( b_chroma_me && cost < bcost ) \
 207     { \
 208         h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix, 8, mx, my, bw/2, bh/2 ); \
 209         cost += h->pixf.satd[i_pixel+3]( m->p_fenc[1], m->i_stride[1], pix, 8 ); \
 210         if( cost < bcost ) \
 211         { \
 212             h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix, 8, mx, my, bw/2, bh/2 ); \
 213             cost += h->pixf.satd[i_pixel+3]( m->p_fenc[2], m->i_stride[1], pix, 8 ); \
 214         } \
 215     } \
 216     if( cost < bcost ) \
 217     {                  \
 218         bcost = cost;  \
 219         bdir = dir;    \
 220     } \
 221 }
 222
 223 static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters )
 224 {
 225     const int bw = x264_pixel_size[m->i_pixel].w;
 226     const int bh = x264_pixel_size[m->i_pixel].h;
 227     const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
 228     const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
 229     const int i_pixel = m->i_pixel;
 230     const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
 231
 232     DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
 233     int step, i;
 234
 235     int bmx = m->mv[0];
 236     int bmy = m->mv[1];
 237
 238     for( step = 2; step >= 1; step-- )
 239     {
 240         for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
 241         {
 242             int bcost = COST_MAX;
 243             int bdir = 0;
 244             COST_MV( bmx, bmy - step, 0 );
 245             COST_MV( bmx, bmy + step, 1 );
 246             COST_MV( bmx - step, bmy, 2 );
 247             COST_MV( bmx + step, bmy, 3 );
 248
 249             if( bcost < m->cost )
 250             {
 251                 m->cost = bcost;
 252                 if( bdir == 0 )      bmy -= step;
 253                 else if( bdir == 1 ) bmy += step;
 254                 else if( bdir == 2 ) bmx -= step;
 255                 else if( bdir == 3 ) bmx += step;
 256             }
 257             else break;
 258         }
 259     }
 260
 261     m->mv[0] = bmx;
 262     m->mv[1] = bmy;
 263     m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];
 264 }
 265