X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fmotion_est.c;h=50078f93cf98a958c4e9fc06ec92d509408deef0;hb=d7e5aebae7652ac766034f1d90e5a4f62677fb3c;hp=5187ef7bb3c656bac662ad8f8bc94efb76d3433f;hpb=b74ec69366ca84e1edb7df10d1111224ec8477ca;p=ffmpeg

diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 5187ef7bb3c..50078f93cf9 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -1,6 +1,6 @@
 /*
  * Motion estimation
- * Copyright (c) 2000,2001 Fabrice Bellard.
+ * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
  * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
@@ -23,15 +23,17 @@
  */
 
 /**
- * @file motion_est.c
+ * @file
  * Motion estimation.
  */
 
 #include <stdlib.h>
 #include <stdio.h>
 #include <limits.h>
+#include "libavutil/intmath.h"
 #include "avcodec.h"
 #include "dsputil.h"
+#include "mathops.h"
 #include "mpegvideo.h"
 
 #undef NDEBUG
@@ -102,25 +104,17 @@ static int get_flags(MotionEstContext *c, int direct, int chroma){
            + (chroma ? FLAG_CHROMA : 0);
 }
 
-/*! \brief compares a block (either a full macroblock or a partition thereof)
-    against a proposed motion-compensated prediction of that block
- */
-static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
                       const int size, const int h, int ref_index, int src_index,
-                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
     MotionEstContext * const c= &s->me;
     const int stride= c->stride;
-    const int uvstride= c->uvstride;
-    const int qpel= flags&FLAG_QPEL;
-    const int chroma= flags&FLAG_CHROMA;
-    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
     const int hx= subx + (x<<(1+qpel));
     const int hy= suby + (y<<(1+qpel));
     uint8_t * const * const ref= c->ref[ref_index];
     uint8_t * const * const src= c->src[src_index];
     int d;
     //FIXME check chroma 4mv, (no crashes ...)
-    if(flags&FLAG_DIRECT){
         assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
         if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
             const int time_pp= s->pp_time;
@@ -179,7 +173,22 @@ static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, con
             d = cmp_func(s, c->temp, src[0], stride, 16);
         }else
             d= 256*256*256*32;
-    }else{
+    return d;
+}
+
+static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+                      const int size, const int h, int ref_index, int src_index,
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
+    MotionEstContext * const c= &s->me;
+    const int stride= c->stride;
+    const int uvstride= c->uvstride;
+    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
+    const int hx= subx + (x<<(1+qpel));
+    const int hy= suby + (y<<(1+qpel));
+    uint8_t * const * const ref= c->ref[ref_index];
+    uint8_t * const * const src= c->src[src_index];
+    int d;
+    //FIXME check chroma 4mv, (no crashes ...)
         int uvdxy;              /* no, it might not be used uninitialized */
         if(dxy){
             if(qpel){
@@ -210,16 +219,71 @@ static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, con
             d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
             d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
         }
+    return d;
+}
+
+static int cmp_simple(MpegEncContext *s, const int x, const int y,
+                      int ref_index, int src_index,
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
+    return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
+}
+
+static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
+                      const int size, const int h, int ref_index, int src_index,
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
+    if(flags&FLAG_DIRECT){
+        return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
+    }else{
+        return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
     }
-#if 0
-    if(full_pel){
-        const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
-        score_map[index]= d;
+}
+
+static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+                      const int size, const int h, int ref_index, int src_index,
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
+    if(flags&FLAG_DIRECT){
+        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
+    }else{
+        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
     }
+}
 
-    d += (c->mv_penalty[hx - c->pred_x] + c->mv_penalty[hy - c->pred_y])*c->penalty_factor;
-#endif
-    return d;
+/*! \brief compares a block (either a full macroblock or a partition thereof)
+    against a proposed motion-compensated prediction of that block
+ */
+static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+                      const int size, const int h, int ref_index, int src_index,
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
+    if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
+       && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
+       && flags==0 && h==16 && size==0 && subx==0 && suby==0){
+        return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
+    }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
+       && subx==0 && suby==0){
+        return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
+    }else{
+        return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
+    }
+}
+
+static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+                      const int size, const int h, int ref_index, int src_index,
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
+    if(flags&FLAG_DIRECT){
+        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
+    }else{
+        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
+    }
+}
+
+static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
+                      const int size, const int h, int ref_index, int src_index,
+                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
+    if(flags&FLAG_DIRECT){
+        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
+    }else{
+        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
+    }
 }
 
 #include "motion_est_template.c"
@@ -240,7 +304,8 @@ int ff_init_me(MpegEncContext *s){
         av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
         return -1;
     }
-    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1){
+    //special case of snow is needed because snow uses its own iterative ME code
+    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1 && s->avctx->codec_id != CODEC_ID_SNOW){
         av_log(s->avctx, AV_LOG_ERROR, "me_method is only allowed to be set to zero and epzs; for hex,umh,full and others see dia_size\n");
         return -1;
     }
@@ -306,8 +371,6 @@ int ff_init_me(MpegEncContext *s){
         c->sub_motion_search= no_sub_motion_search;
     }
 
-    c->temp= c->scratchpad;
-
     return 0;
 }
 
@@ -849,8 +912,8 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int
         }
 
         if(USES_LIST(mb_type, 0)){
-            int field_select0= p->ref_index[0][xy ];
-            int field_select1= p->ref_index[0][xy2];
+            int field_select0= p->ref_index[0][4*mb_xy  ];
+            int field_select1= p->ref_index[0][4*mb_xy+2];
             assert(field_select0==0 ||field_select0==1);
             assert(field_select1==0 ||field_select1==1);
             init_interlaced_ref(s, 0);
@@ -877,8 +940,8 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int
             d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
         }
         if(USES_LIST(mb_type, 1)){
-            int field_select0= p->ref_index[1][xy ];
-            int field_select1= p->ref_index[1][xy2];
+            int field_select0= p->ref_index[1][4*mb_xy  ];
+            int field_select1= p->ref_index[1][4*mb_xy+2];
             assert(field_select0==0 ||field_select0==1);
             assert(field_select1==0 ||field_select1==1);
             init_interlaced_ref(s, 2);
@@ -1412,12 +1475,13 @@ static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
     const int ymin= c->ymin<<shift;
     const int xmax= c->xmax<<shift;
     const int ymax= c->ymax<<shift;
-    uint8_t map[8][8][8][8];
+#define HASH(fx,fy,bx,by) ((fx)+17*(fy)+63*(bx)+117*(by))
+#define HASH8(fx,fy,bx,by) ((uint8_t)HASH(fx,fy,bx,by))
+    int hashidx= HASH(motion_fx,motion_fy, motion_bx, motion_by);
+    uint8_t map[256];
 
     memset(map,0,sizeof(map));
-#define BIDIR_MAP(fx,fy,bx,by) \
-    map[(motion_fx+fx)&7][(motion_fy+fy)&7][(motion_bx+bx)&7][(motion_by+by)&7]
-    BIDIR_MAP(0,0,0,0) = 1;
+    map[hashidx&255] = 1;
 
     fbmin= check_bidir_mv(s, motion_fx, motion_fy,
                           motion_bx, motion_by,
@@ -1426,14 +1490,53 @@ static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
                           0, 16);
 
     if(s->avctx->bidir_refine){
-        int score, end;
+        int end;
+        static const uint8_t limittab[5]={0,8,32,64,80};
+        const int limit= limittab[s->avctx->bidir_refine];
+        static const int8_t vect[][4]={
+{ 0, 0, 0, 1}, { 0, 0, 0,-1}, { 0, 0, 1, 0}, { 0, 0,-1, 0}, { 0, 1, 0, 0}, { 0,-1, 0, 0}, { 1, 0, 0, 0}, {-1, 0, 0, 0},
+
+{ 0, 0, 1, 1}, { 0, 0,-1,-1}, { 0, 1, 1, 0}, { 0,-1,-1, 0}, { 1, 1, 0, 0}, {-1,-1, 0, 0}, { 1, 0, 0, 1}, {-1, 0, 0,-1},
+{ 0, 1, 0, 1}, { 0,-1, 0,-1}, { 1, 0, 1, 0}, {-1, 0,-1, 0},
+{ 0, 0,-1, 1}, { 0, 0, 1,-1}, { 0,-1, 1, 0}, { 0, 1,-1, 0}, {-1, 1, 0, 0}, { 1,-1, 0, 0}, { 1, 0, 0,-1}, {-1, 0, 0, 1},
+{ 0,-1, 0, 1}, { 0, 1, 0,-1}, {-1, 0, 1, 0}, { 1, 0,-1, 0},
+
+{ 0, 1, 1, 1}, { 0,-1,-1,-1}, { 1, 1, 1, 0}, {-1,-1,-1, 0}, { 1, 1, 0, 1}, {-1,-1, 0,-1}, { 1, 0, 1, 1}, {-1, 0,-1,-1},
+{ 0,-1, 1, 1}, { 0, 1,-1,-1}, {-1, 1, 1, 0}, { 1,-1,-1, 0}, { 1, 1, 0,-1}, {-1,-1, 0, 1}, { 1, 0,-1, 1}, {-1, 0, 1,-1},
+{ 0, 1,-1, 1}, { 0,-1, 1,-1}, { 1,-1, 1, 0}, {-1, 1,-1, 0}, {-1, 1, 0, 1}, { 1,-1, 0,-1}, { 1, 0, 1,-1}, {-1, 0,-1, 1},
+{ 0, 1, 1,-1}, { 0,-1,-1, 1}, { 1, 1,-1, 0}, {-1,-1, 1, 0}, { 1,-1, 0, 1}, {-1, 1, 0,-1}, {-1, 0, 1, 1}, { 1, 0,-1,-1},
+
+{ 1, 1, 1, 1}, {-1,-1,-1,-1},
+{ 1, 1, 1,-1}, {-1,-1,-1, 1}, { 1, 1,-1, 1}, {-1,-1, 1,-1}, { 1,-1, 1, 1}, {-1, 1,-1,-1}, {-1, 1, 1, 1}, { 1,-1,-1,-1},
+{ 1, 1,-1,-1}, {-1,-1, 1, 1}, { 1,-1,-1, 1}, {-1, 1, 1,-1}, { 1,-1, 1,-1}, {-1, 1,-1, 1},
+        };
+        static const uint8_t hash[]={
+HASH8( 0, 0, 0, 1), HASH8( 0, 0, 0,-1), HASH8( 0, 0, 1, 0), HASH8( 0, 0,-1, 0), HASH8( 0, 1, 0, 0), HASH8( 0,-1, 0, 0), HASH8( 1, 0, 0, 0), HASH8(-1, 0, 0, 0),
+
+HASH8( 0, 0, 1, 1), HASH8( 0, 0,-1,-1), HASH8( 0, 1, 1, 0), HASH8( 0,-1,-1, 0), HASH8( 1, 1, 0, 0), HASH8(-1,-1, 0, 0), HASH8( 1, 0, 0, 1), HASH8(-1, 0, 0,-1),
+HASH8( 0, 1, 0, 1), HASH8( 0,-1, 0,-1), HASH8( 1, 0, 1, 0), HASH8(-1, 0,-1, 0),
+HASH8( 0, 0,-1, 1), HASH8( 0, 0, 1,-1), HASH8( 0,-1, 1, 0), HASH8( 0, 1,-1, 0), HASH8(-1, 1, 0, 0), HASH8( 1,-1, 0, 0), HASH8( 1, 0, 0,-1), HASH8(-1, 0, 0, 1),
+HASH8( 0,-1, 0, 1), HASH8( 0, 1, 0,-1), HASH8(-1, 0, 1, 0), HASH8( 1, 0,-1, 0),
+
+HASH8( 0, 1, 1, 1), HASH8( 0,-1,-1,-1), HASH8( 1, 1, 1, 0), HASH8(-1,-1,-1, 0), HASH8( 1, 1, 0, 1), HASH8(-1,-1, 0,-1), HASH8( 1, 0, 1, 1), HASH8(-1, 0,-1,-1),
+HASH8( 0,-1, 1, 1), HASH8( 0, 1,-1,-1), HASH8(-1, 1, 1, 0), HASH8( 1,-1,-1, 0), HASH8( 1, 1, 0,-1), HASH8(-1,-1, 0, 1), HASH8( 1, 0,-1, 1), HASH8(-1, 0, 1,-1),
+HASH8( 0, 1,-1, 1), HASH8( 0,-1, 1,-1), HASH8( 1,-1, 1, 0), HASH8(-1, 1,-1, 0), HASH8(-1, 1, 0, 1), HASH8( 1,-1, 0,-1), HASH8( 1, 0, 1,-1), HASH8(-1, 0,-1, 1),
+HASH8( 0, 1, 1,-1), HASH8( 0,-1,-1, 1), HASH8( 1, 1,-1, 0), HASH8(-1,-1, 1, 0), HASH8( 1,-1, 0, 1), HASH8(-1, 1, 0,-1), HASH8(-1, 0, 1, 1), HASH8( 1, 0,-1,-1),
+
+HASH8( 1, 1, 1, 1), HASH8(-1,-1,-1,-1),
+HASH8( 1, 1, 1,-1), HASH8(-1,-1,-1, 1), HASH8( 1, 1,-1, 1), HASH8(-1,-1, 1,-1), HASH8( 1,-1, 1, 1), HASH8(-1, 1,-1,-1), HASH8(-1, 1, 1, 1), HASH8( 1,-1,-1,-1),
+HASH8( 1, 1,-1,-1), HASH8(-1,-1, 1, 1), HASH8( 1,-1,-1, 1), HASH8(-1, 1, 1,-1), HASH8( 1,-1, 1,-1), HASH8(-1, 1,-1, 1),
+};
+
 #define CHECK_BIDIR(fx,fy,bx,by)\
-    if( !BIDIR_MAP(fx,fy,bx,by)\
+    if( !map[(hashidx+HASH(fx,fy,bx,by))&255]\
        &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
        &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
-        BIDIR_MAP(fx,fy,bx,by) = 1;\
+        int score;\
+        map[(hashidx+HASH(fx,fy,bx,by))&255] = 1;\
         score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
         if(score < fbmin){\
+            hashidx += HASH(fx,fy,bx,by);\
             fbmin= score;\
             motion_fx+=fx;\
             motion_fy+=fy;\
@@ -1446,34 +1549,45 @@ static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
 CHECK_BIDIR(a,b,c,d)\
 CHECK_BIDIR(-(a),-(b),-(c),-(d))
 
-#define CHECK_BIDIRR(a,b,c,d)\
-CHECK_BIDIR2(a,b,c,d)\
-CHECK_BIDIR2(b,c,d,a)\
-CHECK_BIDIR2(c,d,a,b)\
-CHECK_BIDIR2(d,a,b,c)
-
         do{
+            int i;
+            int borderdist=0;
             end=1;
 
-            CHECK_BIDIRR( 0, 0, 0, 1)
-            if(s->avctx->bidir_refine > 1){
-                CHECK_BIDIRR( 0, 0, 1, 1)
-                CHECK_BIDIR2( 0, 1, 0, 1)
-                CHECK_BIDIR2( 1, 0, 1, 0)
-                CHECK_BIDIRR( 0, 0,-1, 1)
-                CHECK_BIDIR2( 0,-1, 0, 1)
-                CHECK_BIDIR2(-1, 0, 1, 0)
-                if(s->avctx->bidir_refine > 2){
-                    CHECK_BIDIRR( 0, 1, 1, 1)
-                    CHECK_BIDIRR( 0,-1, 1, 1)
-                    CHECK_BIDIRR( 0, 1,-1, 1)
-                    CHECK_BIDIRR( 0, 1, 1,-1)
-                    if(s->avctx->bidir_refine > 3){
-                        CHECK_BIDIR2( 1, 1, 1, 1)
-                        CHECK_BIDIRR( 1, 1, 1,-1)
-                        CHECK_BIDIR2( 1, 1,-1,-1)
-                        CHECK_BIDIR2( 1,-1,-1, 1)
-                        CHECK_BIDIR2( 1,-1, 1,-1)
+            CHECK_BIDIR2(0,0,0,1)
+            CHECK_BIDIR2(0,0,1,0)
+            CHECK_BIDIR2(0,1,0,0)
+            CHECK_BIDIR2(1,0,0,0)
+
+            for(i=8; i<limit; i++){
+                int fx= motion_fx+vect[i][0];
+                int fy= motion_fy+vect[i][1];
+                int bx= motion_bx+vect[i][2];
+                int by= motion_by+vect[i][3];
+                if(borderdist<=0){
+                    int a= (xmax - FFMAX(fx,bx))|(FFMIN(fx,bx) - xmin);
+                    int b= (ymax - FFMAX(fy,by))|(FFMIN(fy,by) - ymin);
+                    if((a|b) < 0)
+                        map[(hashidx+hash[i])&255] = 1;
+                }
+                if(!map[(hashidx+hash[i])&255]){
+                    int score;
+                    map[(hashidx+hash[i])&255] = 1;
+                    score= check_bidir_mv(s, fx, fy, bx, by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);
+                    if(score < fbmin){
+                        hashidx += hash[i];
+                        fbmin= score;
+                        motion_fx=fx;
+                        motion_fy=fy;
+                        motion_bx=bx;
+                        motion_by=by;
+                        end=0;
+                        borderdist--;
+                        if(borderdist<=0){
+                            int a= FFMIN(xmax - FFMAX(fx,bx), FFMIN(fx,bx) - xmin);
+                            int b= FFMIN(ymax - FFMAX(fy,by), FFMIN(fy,by) - ymin);
+                            borderdist= FFMIN(a,b);
+                        }
                     }
                 }
             }