* Motion estimation
* Copyright (c) 2002-2004 Michael Niedermayer
*
- * This file is part of FFmpeg.
+ * This file is part of Libav.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
*/
/**
- * @file motion_est_template.c
+ * @file
* Motion estimation template.
*/
-//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
+//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
#define LOAD_COMMON\
- uint32_t attribute_unused * const score_map= c->score_map;\
- const int attribute_unused xmin= c->xmin;\
- const int attribute_unused ymin= c->ymin;\
- const int attribute_unused xmax= c->xmax;\
- const int attribute_unused ymax= c->ymax;\
+ uint32_t av_unused * const score_map= c->score_map;\
+ const int av_unused xmin= c->xmin;\
+ const int av_unused ymin= c->ymin;\
+ const int av_unused xmax= c->xmax;\
+ const int av_unused ymax= c->ymax;\
uint8_t *mv_penalty= c->current_mv_penalty;\
const int pred_x= c->pred_x;\
const int pred_y= c->pred_y;\
{\
const int hx= 2*(x)+(dx);\
const int hy= 2*(y)+(dy);\
- d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
+ d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
}
const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+ (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
-#if 1
int key;
int map_generation= c->map_generation;
#ifndef NDEBUG
assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
-#endif
if(t<=b){
CHECK_HALF_MV(0, 1, mx ,my-1)
if(l<=r){
{\
const int hx= 4*(x)+(dx);\
const int hy= 4*(y)+(dy);\
- d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
+ d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
}
}\
}
-#define CHECK_CLIPED_MV(ax,ay)\
+#define CHECK_CLIPPED_MV(ax,ay)\
{\
const int Lx= ax;\
const int Ly= ay;\
const int qpel= flags&FLAG_QPEL;\
const int shift= 1+qpel;\
-static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
+static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
int src_index, int ref_index, int const penalty_factor,
int size, int h, int flags)
{
if(x!=best[0] || y!=best[1])
dia_size=0;
-#if 0
-{
-int dx, dy, i;
-static int stats[8*8];
-dx= FFABS(x-best[0]);
-dy= FFABS(y-best[1]);
-if(dy>dx){
- dx^=dy; dy^=dx; dx^=dy;
-}
-stats[dy*8 + dx] ++;
-if(256*256*256*64 % (stats[0]+1)==0){
- for(i=0; i<64; i++){
- if((i&7)==0) printf("\n");
- printf("%8d ", stats[i]);
- }
- printf("\n");
-}
-}
-#endif
}
return dmin;
}
LOAD_COMMON
LOAD_COMMON2
int map_generation= c->map_generation;
- int x,y,i,d;
- static const int hex[6][2]={{-2, 0}, { 2,0}, {-1,-2}, {1,-2}, {-1,2},{1,2}};
+ int x,y,d;
+ const int dec= dia_size & (dia_size-1);
cmpf= s->dsp.me_cmp[size];
chroma_cmpf= s->dsp.me_cmp[size+1];
- for(;dia_size; dia_size--){
+ for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
do{
x= best[0];
y= best[1];
- for(i=0; i<6; i++){
- CHECK_CLIPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
+
+ CHECK_CLIPPED_MV(x -dia_size , y);
+ CHECK_CLIPPED_MV(x+ dia_size , y);
+ CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
+ CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
+ if(dia_size>1){
+ CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
+ CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
}
}while(best[0] != x || best[1] != y);
}
- do{
- x= best[0];
- y= best[1];
- CHECK_CLIPED_MV(x+1, y);
- CHECK_CLIPED_MV(x, y+1);
- CHECK_CLIPED_MV(x-1, y);
- CHECK_CLIPED_MV(x, y-1);
- }while(best[0] != x || best[1] != y);
-
return dmin;
}
LOAD_COMMON
LOAD_COMMON2
int map_generation= c->map_generation;
- int x,y,i,d, dia_size;
+ int x,y,i,d;
+ int dia_size= c->dia_size&0xFF;
+ const int dec= dia_size & (dia_size-1);
static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
{ 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
cmpf= s->dsp.me_cmp[size];
chroma_cmpf= s->dsp.me_cmp[size+1];
- for(dia_size= c->dia_size&0xFF; dia_size; dia_size--){
+ for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
do{
x= best[0];
y= best[1];
for(i=0; i<8; i++){
- CHECK_CLIPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
+ CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
}
}while(best[0] != x || best[1] != y);
}
x= best[0];
y= best[1];
- CHECK_CLIPED_MV(x+1, y);
- CHECK_CLIPED_MV(x, y+1);
- CHECK_CLIPED_MV(x-1, y);
- CHECK_CLIPED_MV(x, y-1);
+ CHECK_CLIPPED_MV(x+1, y);
+ CHECK_CLIPPED_MV(x, y+1);
+ CHECK_CLIPPED_MV(x-1, y);
+ CHECK_CLIPPED_MV(x, y-1);
return dmin;
}
for(j=1; j<=dia_size/4; j++){
for(i=0; i<16; i++){
- CHECK_CLIPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
+ CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
}
}
- return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 1);
+ return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
+}
+
+static int full_search(MpegEncContext * s, int *best, int dmin,
+ int src_index, int ref_index, int const penalty_factor,
+ int size, int h, int flags)
+{
+ MotionEstContext * const c= &s->me;
+ me_cmp_func cmpf, chroma_cmpf;
+ LOAD_COMMON
+ LOAD_COMMON2
+ int map_generation= c->map_generation;
+ int x,y, d;
+ const int dia_size= c->dia_size&0xFF;
+
+ cmpf= s->dsp.me_cmp[size];
+ chroma_cmpf= s->dsp.me_cmp[size+1];
+
+ for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
+ for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
+ CHECK_MV(x, y);
+ }
+ }
+
+ x= best[0];
+ y= best[1];
+ d= dmin;
+ CHECK_CLIPPED_MV(x , y);
+ CHECK_CLIPPED_MV(x+1, y);
+ CHECK_CLIPPED_MV(x, y+1);
+ CHECK_CLIPPED_MV(x-1, y);
+ CHECK_CLIPPED_MV(x, y-1);
+ best[0]= x;
+ best[1]= y;
+
+ return d;
}
#define SAB_CHECK_MV(ax,ay)\
cmpf= s->dsp.me_cmp[size];
chroma_cmpf= s->dsp.me_cmp[size+1];
- for(j=i=0; i<ME_MAP_SIZE; i++){
+ /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
+ become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
+ */
+ for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
uint32_t key= map[i];
key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
- assert(j<MAX_SAB_SIZE); //max j = number of predictors
-
minima[j].height= score_map[i];
minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
+
+ // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
+ if( minima[j].x > xmax || minima[j].x < xmin
+ || minima[j].y > ymax || minima[j].y < ymin)
+ continue;
+
minima[j].checked=0;
if(minima[j].x || minima[j].y)
minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
if(x!=best[0] || y!=best[1])
dia_size=0;
-#if 0
-{
-int dx, dy, i;
-static int stats[8*8];
-dx= FFABS(x-best[0]);
-dy= FFABS(y-best[1]);
-stats[dy*8 + dx] ++;
-if(256*256*256*64 % (stats[0]+1)==0){
- for(i=0; i<64; i++){
- if((i&7)==0) printf("\n");
- printf("%6d ", stats[i]);
- }
- printf("\n");
-}
-}
-#endif
}
return dmin;
}
-static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
+static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
int src_index, int ref_index, int const penalty_factor,
int size, int h, int flags){
MotionEstContext * const c= &s->me;
return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
else if(c->dia_size<2)
return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
+ else if(c->dia_size>1024)
+ return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
else if(c->dia_size>768)
return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
else if(c->dia_size>512)
return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
}
-static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
+/**
+ @param P[10][2] a list of candidate mvs to check before starting the
+ iterative search. If one of the candidates is close to the optimal mv, then
+ it takes fewer iterations. And it increases the chance that we find the
+ optimal mv.
+ */
+static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
int ref_mv_scale, int flags, int size, int h)
{
MotionEstContext * const c= &s->me;
- int best[2]={0, 0};
- int d, dmin;
+ int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
+ i.e. the difference between the position of the
+ block currently being encoded and the position of
+ the block chosen to predict it from. */
+ int d; ///< the score (cmp + penalty) of any given mv
+ int dmin; /**< the best value of d, i.e. the score
+ corresponding to the mv stored in best[]. */
int map_generation;
int penalty_factor;
const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
map[0]= map_generation;
score_map[0]= dmin;
+ //FIXME precalc first term below?
+ if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
+ dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
+
/* first line */
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
- CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
}else{
if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
return dmin;
}
CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
- CHECK_CLIPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
- CHECK_CLIPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
- CHECK_CLIPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
- CHECK_CLIPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
- CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
+ CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
+ CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
+ CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
}
if(dmin>h*h*4){
if(c->pre_pass){
- CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
if(!s->first_slice_line)
- CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
}else{
- CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
- CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
}
}
/* first line */
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
- CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
}else{
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
- CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
}
if(dmin>64*4){
- CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
- CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
}
/* first line */
if (s->first_slice_line) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
- CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
}else{
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
- CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
}
if(dmin>64*4){
- CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
- CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
+ CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
}