3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Motion estimation template.
27 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
29 uint32_t av_unused * const score_map= c->score_map;\
30 const int av_unused xmin= c->xmin;\
31 const int av_unused ymin= c->ymin;\
32 const int av_unused xmax= c->xmax;\
33 const int av_unused ymax= c->ymax;\
34 uint8_t *mv_penalty= c->current_mv_penalty;\
35 const int pred_x= c->pred_x;\
36 const int pred_y= c->pred_y;\
38 #define CHECK_HALF_MV(dx, dy, x, y)\
40 const int hx= 2*(x)+(dx);\
41 const int hy= 2*(y)+(dy);\
42 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
43 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
44 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
47 static int hpel_motion_search(MpegEncContext * s,
48 int *mx_ptr, int *my_ptr, int dmin,
49 int src_index, int ref_index,
52 MotionEstContext * const c= &s->me;
53 const int mx = *mx_ptr;
54 const int my = *my_ptr;
55 const int penalty_factor= c->sub_penalty_factor;
56 me_cmp_func cmp_sub, chroma_cmp_sub;
60 int flags= c->sub_flags;
64 cmp_sub= s->dsp.me_sub_cmp[size];
65 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
67 if(c->skip){ //FIXME move out of hpel?
73 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
74 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
75 if(mx || my || size>0)
76 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
79 if (mx > xmin && mx < xmax &&
80 my > ymin && my < ymax) {
82 const int index= (my<<ME_MAP_SHIFT) + mx;
83 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
84 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
85 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
86 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
87 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
88 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
89 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
90 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
92 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
94 unsigned map_generation= c->map_generation;
95 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
96 av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
97 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
98 av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
99 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
100 av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
101 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
102 av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
105 CHECK_HALF_MV(0, 1, mx ,my-1)
107 CHECK_HALF_MV(1, 1, mx-1, my-1)
109 CHECK_HALF_MV(1, 1, mx , my-1)
111 CHECK_HALF_MV(1, 1, mx-1, my )
113 CHECK_HALF_MV(1, 0, mx-1, my )
115 CHECK_HALF_MV(1, 1, mx , my-1)
117 CHECK_HALF_MV(1, 1, mx-1, my-1)
119 CHECK_HALF_MV(1, 1, mx , my )
121 CHECK_HALF_MV(1, 0, mx , my )
126 CHECK_HALF_MV(1, 1, mx-1, my-1)
128 CHECK_HALF_MV(1, 1, mx , my )
130 CHECK_HALF_MV(1, 0, mx-1, my)
131 CHECK_HALF_MV(1, 1, mx-1, my)
134 CHECK_HALF_MV(1, 1, mx , my-1)
136 CHECK_HALF_MV(1, 1, mx-1, my)
138 CHECK_HALF_MV(1, 0, mx , my)
139 CHECK_HALF_MV(1, 1, mx , my)
141 CHECK_HALF_MV(0, 1, mx , my)
143 av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
152 static int no_sub_motion_search(MpegEncContext * s,
153 int *mx_ptr, int *my_ptr, int dmin,
154 int src_index, int ref_index,
162 static inline int get_mb_score(MpegEncContext *s, int mx, int my,
163 int src_index, int ref_index, int size,
166 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
167 MotionEstContext * const c= &s->me;
168 const int penalty_factor= c->mb_penalty_factor;
169 const int flags= c->mb_flags;
170 const int qpel= flags & FLAG_QPEL;
171 const int mask= 1+2*qpel;
172 me_cmp_func cmp_sub, chroma_cmp_sub;
179 cmp_sub= s->dsp.mb_cmp[size];
180 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
182 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
183 //FIXME check cbp before adding penalty for (0,0) vector
184 if(add_rate && (mx || my || size>0))
185 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
190 int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
191 int ref_index, int size, int h, int add_rate)
193 return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
196 #define CHECK_QUARTER_MV(dx, dy, x, y)\
198 const int hx= 4*(x)+(dx);\
199 const int hy= 4*(y)+(dy);\
200 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
201 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
202 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
205 static int qpel_motion_search(MpegEncContext * s,
206 int *mx_ptr, int *my_ptr, int dmin,
207 int src_index, int ref_index,
210 MotionEstContext * const c= &s->me;
211 const int mx = *mx_ptr;
212 const int my = *my_ptr;
213 const int penalty_factor= c->sub_penalty_factor;
214 const unsigned map_generation = c->map_generation;
215 const int subpel_quality= c->avctx->me_subpel_quality;
216 uint32_t *map= c->map;
217 me_cmp_func cmpf, chroma_cmpf;
218 me_cmp_func cmp_sub, chroma_cmp_sub;
221 int flags= c->sub_flags;
223 cmpf= s->dsp.me_cmp[size];
224 chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
227 cmp_sub= s->dsp.me_sub_cmp[size];
228 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
230 if(c->skip){ //FIXME somehow move up (benchmark)
236 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
237 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
238 if(mx || my || size>0)
239 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
242 if (mx > xmin && mx < xmax &&
243 my > ymin && my < ymax) {
244 int bx=4*mx, by=4*my;
247 const int index= (my<<ME_MAP_SHIFT) + mx;
248 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
249 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
250 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
251 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
252 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
256 memset(best, 64, sizeof(int)*8);
257 if(s->me.dia_size>=2){
258 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
259 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
260 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
261 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
263 for(ny= -3; ny <= 3; ny++){
264 for(nx= -3; nx <= 3; nx++){
265 //FIXME this could overflow (unlikely though)
266 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
267 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
268 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
269 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
272 if((nx&3)==0 && (ny&3)==0) continue;
274 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
276 // if(nx&1) score-=1024*c->penalty_factor;
277 // if(ny&1) score-=1024*c->penalty_factor;
281 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
282 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
284 best_pos[i][0]= nx + 4*mx;
285 best_pos[i][1]= ny + 4*my;
293 //FIXME this could overflow (unlikely though)
294 const int cx = 4*(r - l);
295 const int cx2= r + l - 2*c;
296 const int cy = 4*(b - t);
297 const int cy2= b + t - 2*c;
300 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
301 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
303 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
306 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
308 av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
309 av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
310 av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
311 av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
312 av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
314 for(ny= -3; ny <= 3; ny++){
315 for(nx= -3; nx <= 3; nx++){
316 //FIXME this could overflow (unlikely though)
317 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
320 if((nx&3)==0 && (ny&3)==0) continue;
322 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
323 // if(nx&1) score-=32*c->penalty_factor;
324 // if(ny&1) score-=32*c->penalty_factor;
328 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
329 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
331 best_pos[i][0]= nx + 4*mx;
332 best_pos[i][1]= ny + 4*my;
339 for(i=0; i<subpel_quality; i++){
342 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
345 av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
358 #define CHECK_MV(x,y)\
360 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
361 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
362 av_assert2((x) >= xmin);\
363 av_assert2((x) <= xmax);\
364 av_assert2((y) >= ymin);\
365 av_assert2((y) <= ymax);\
366 if(map[index]!=key){\
367 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
369 score_map[index]= d;\
370 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
371 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
375 #define CHECK_CLIPPED_MV(ax,ay)\
379 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
380 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
384 #define CHECK_MV_DIR(x,y,new_dir)\
386 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
387 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
388 if(map[index]!=key){\
389 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
391 score_map[index]= d;\
392 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
402 #define check(x,y,S,v)\
403 if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
404 if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
405 if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
406 if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
408 #define LOAD_COMMON2\
409 uint32_t *map= c->map;\
410 const int qpel= flags&FLAG_QPEL;\
411 const int shift= 1+qpel;\
413 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
414 int src_index, int ref_index, int const penalty_factor,
415 int size, int h, int flags)
417 MotionEstContext * const c= &s->me;
418 me_cmp_func cmpf, chroma_cmpf;
422 unsigned map_generation = c->map_generation;
424 cmpf= s->dsp.me_cmp[size];
425 chroma_cmpf= s->dsp.me_cmp[size+1];
427 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
428 const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
429 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
430 if(map[index]!=key){ //this will be executed only very rarey
431 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
438 const int dir= next_dir;
439 const int x= best[0];
440 const int y= best[1];
443 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
444 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
445 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
446 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
454 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
455 int src_index, int ref_index, int const penalty_factor,
456 int size, int h, int flags)
458 MotionEstContext * const c= &s->me;
459 me_cmp_func cmpf, chroma_cmpf;
463 unsigned map_generation = c->map_generation;
465 cmpf= s->dsp.me_cmp[size];
466 chroma_cmpf= s->dsp.me_cmp[size+1];
468 for(dia_size=1; dia_size<=4; dia_size++){
470 const int x= best[0];
471 const int y= best[1];
473 if(dia_size&(dia_size-1)) continue;
475 if( x + dia_size > xmax
476 || x - dia_size < xmin
477 || y + dia_size > ymax
478 || y - dia_size < ymin)
481 for(dir= 0; dir<dia_size; dir+=2){
484 CHECK_MV(x + dir , y + dia_size - dir);
485 CHECK_MV(x + dia_size - dir, y - dir );
486 CHECK_MV(x - dir , y - dia_size + dir);
487 CHECK_MV(x - dia_size + dir, y + dir );
490 if(x!=best[0] || y!=best[1])
496 static int hex_search(MpegEncContext * s, int *best, int dmin,
497 int src_index, int ref_index, int const penalty_factor,
498 int size, int h, int flags, int dia_size)
500 MotionEstContext * const c= &s->me;
501 me_cmp_func cmpf, chroma_cmpf;
504 unsigned map_generation = c->map_generation;
506 const int dec= dia_size & (dia_size-1);
508 cmpf= s->dsp.me_cmp[size];
509 chroma_cmpf= s->dsp.me_cmp[size+1];
511 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
516 CHECK_CLIPPED_MV(x -dia_size , y);
517 CHECK_CLIPPED_MV(x+ dia_size , y);
518 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
519 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
521 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
522 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
524 }while(best[0] != x || best[1] != y);
530 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
531 int src_index, int ref_index, int const penalty_factor,
532 int size, int h, int flags)
534 MotionEstContext * const c= &s->me;
535 me_cmp_func cmpf, chroma_cmpf;
538 unsigned map_generation = c->map_generation;
540 int dia_size= c->dia_size&0xFF;
541 const int dec= dia_size & (dia_size-1);
542 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
543 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
545 cmpf= s->dsp.me_cmp[size];
546 chroma_cmpf= s->dsp.me_cmp[size+1];
548 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
553 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
555 }while(best[0] != x || best[1] != y);
560 CHECK_CLIPPED_MV(x+1, y);
561 CHECK_CLIPPED_MV(x, y+1);
562 CHECK_CLIPPED_MV(x-1, y);
563 CHECK_CLIPPED_MV(x, y-1);
568 static int umh_search(MpegEncContext * s, int *best, int dmin,
569 int src_index, int ref_index, int const penalty_factor,
570 int size, int h, int flags)
572 MotionEstContext * const c= &s->me;
573 me_cmp_func cmpf, chroma_cmpf;
576 unsigned map_generation = c->map_generation;
577 int x,y,x2,y2, i, j, d;
578 const int dia_size= c->dia_size&0xFE;
579 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
580 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
581 {-2, 3}, { 0, 4}, { 2, 3},
582 {-2,-3}, { 0,-4}, { 2,-3},};
584 cmpf= s->dsp.me_cmp[size];
585 chroma_cmpf= s->dsp.me_cmp[size+1];
589 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
592 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
598 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
599 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
604 //FIXME prevent the CLIP stuff
606 for(j=1; j<=dia_size/4; j++){
608 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
612 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
615 static int full_search(MpegEncContext * s, int *best, int dmin,
616 int src_index, int ref_index, int const penalty_factor,
617 int size, int h, int flags)
619 MotionEstContext * const c= &s->me;
620 me_cmp_func cmpf, chroma_cmpf;
623 unsigned map_generation = c->map_generation;
625 const int dia_size= c->dia_size&0xFF;
627 cmpf= s->dsp.me_cmp[size];
628 chroma_cmpf= s->dsp.me_cmp[size+1];
630 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
631 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
639 CHECK_CLIPPED_MV(x , y);
640 CHECK_CLIPPED_MV(x+1, y);
641 CHECK_CLIPPED_MV(x, y+1);
642 CHECK_CLIPPED_MV(x-1, y);
643 CHECK_CLIPPED_MV(x, y-1);
650 #define SAB_CHECK_MV(ax,ay)\
652 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
653 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
654 if(map[index]!=key){\
655 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
657 score_map[index]= d;\
658 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
659 if(d < minima[minima_count-1].height){\
662 while(d >= minima[j].height) j++;\
664 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
666 minima[j].checked= 0;\
667 minima[j].height= d;\
677 #define MAX_SAB_SIZE ME_MAP_SIZE
678 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
679 int src_index, int ref_index, int const penalty_factor,
680 int size, int h, int flags)
682 MotionEstContext * const c= &s->me;
683 me_cmp_func cmpf, chroma_cmpf;
684 Minima minima[MAX_SAB_SIZE];
685 const int minima_count= FFABS(c->dia_size);
689 unsigned map_generation = c->map_generation;
691 av_assert1(minima_count <= MAX_SAB_SIZE);
693 cmpf= s->dsp.me_cmp[size];
694 chroma_cmpf= s->dsp.me_cmp[size+1];
696 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
697 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
699 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
700 uint32_t key= map[i];
702 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
704 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
706 minima[j].height= score_map[i];
707 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
708 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
709 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
710 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
712 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
713 if( minima[j].x > xmax || minima[j].x < xmin
714 || minima[j].y > ymax || minima[j].y < ymin)
718 if(minima[j].x || minima[j].y)
719 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
724 qsort(minima, j, sizeof(Minima), minima_cmp);
726 for(; j<minima_count; j++){
727 minima[j].height=256*256*256*64;
729 minima[j].x= minima[j].y=0;
732 for(i=0; i<minima_count; i++){
733 const int x= minima[i].x;
734 const int y= minima[i].y;
737 if(minima[i].checked) continue;
739 if( x >= xmax || x <= xmin
740 || y >= ymax || y <= ymin)
745 SAB_CHECK_MV(x , y-1)
746 SAB_CHECK_MV(x , y+1)
748 minima[i].checked= 1;
751 best[0]= minima[0].x;
752 best[1]= minima[0].y;
753 dmin= minima[0].height;
755 if( best[0] < xmax && best[0] > xmin
756 && best[1] < ymax && best[1] > ymin){
758 //ensure that the refernece samples for hpel refinement are in the map
759 CHECK_MV(best[0]-1, best[1])
760 CHECK_MV(best[0]+1, best[1])
761 CHECK_MV(best[0], best[1]-1)
762 CHECK_MV(best[0], best[1]+1)
767 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
768 int src_index, int ref_index, int const penalty_factor,
769 int size, int h, int flags)
771 MotionEstContext * const c= &s->me;
772 me_cmp_func cmpf, chroma_cmpf;
776 unsigned map_generation = c->map_generation;
778 cmpf= s->dsp.me_cmp[size];
779 chroma_cmpf= s->dsp.me_cmp[size+1];
781 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
783 const int x= best[0];
784 const int y= best[1];
786 start= FFMAX(0, y + dia_size - ymax);
787 end = FFMIN(dia_size, xmax - x + 1);
788 for(dir= start; dir<end; dir++){
791 //check(x + dir,y + dia_size - dir,0, a0)
792 CHECK_MV(x + dir , y + dia_size - dir);
795 start= FFMAX(0, x + dia_size - xmax);
796 end = FFMIN(dia_size, y - ymin + 1);
797 for(dir= start; dir<end; dir++){
800 //check(x + dia_size - dir, y - dir,0, a1)
801 CHECK_MV(x + dia_size - dir, y - dir );
804 start= FFMAX(0, -y + dia_size + ymin );
805 end = FFMIN(dia_size, x - xmin + 1);
806 for(dir= start; dir<end; dir++){
809 //check(x - dir,y - dia_size + dir,0, a2)
810 CHECK_MV(x - dir , y - dia_size + dir);
813 start= FFMAX(0, -x + dia_size + xmin );
814 end = FFMIN(dia_size, ymax - y + 1);
815 for(dir= start; dir<end; dir++){
818 //check(x - dia_size + dir, y + dir,0, a3)
819 CHECK_MV(x - dia_size + dir, y + dir );
822 if(x!=best[0] || y!=best[1])
828 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
829 int src_index, int ref_index, int const penalty_factor,
830 int size, int h, int flags){
831 MotionEstContext * const c= &s->me;
833 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
834 else if(c->dia_size<-1)
835 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
836 else if(c->dia_size<2)
837 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838 else if(c->dia_size>1024)
839 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840 else if(c->dia_size>768)
841 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842 else if(c->dia_size>512)
843 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
844 else if(c->dia_size>256)
845 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
847 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
851 @param P a list of candidate mvs to check before starting the
852 iterative search. If one of the candidates is close to the optimal mv, then
853 it takes fewer iterations. And it increases the chance that we find the
856 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
857 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
858 int ref_mv_scale, int flags, int size, int h)
860 MotionEstContext * const c= &s->me;
861 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
862 i.e. the difference between the position of the
863 block currently being encoded and the position of
864 the block chosen to predict it from. */
865 int d; ///< the score (cmp + penalty) of any given mv
866 int dmin; /**< the best value of d, i.e. the score
867 corresponding to the mv stored in best[]. */
868 unsigned map_generation;
870 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
871 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
872 me_cmp_func cmpf, chroma_cmpf;
878 penalty_factor= c->pre_penalty_factor;
879 cmpf= s->dsp.me_pre_cmp[size];
880 chroma_cmpf= s->dsp.me_pre_cmp[size+1];
882 penalty_factor= c->penalty_factor;
883 cmpf= s->dsp.me_cmp[size];
884 chroma_cmpf= s->dsp.me_cmp[size+1];
887 map_generation= update_map_generation(c);
890 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
891 map[0]= map_generation;
894 //FIXME precalc first term below?
895 if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
896 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
899 if (s->first_slice_line) {
900 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
901 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
902 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
904 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
905 && ( P_LEFT[0] |P_LEFT[1]
907 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
913 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
914 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
915 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
916 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
917 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
918 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
919 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
920 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
921 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
922 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
926 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
927 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
928 if(!s->first_slice_line)
929 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
930 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
932 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
933 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
934 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
935 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
936 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
940 if(c->avctx->last_predictor_count){
941 const int count= c->avctx->last_predictor_count;
942 const int xstart= FFMAX(0, s->mb_x - count);
943 const int ystart= FFMAX(0, s->mb_y - count);
944 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
945 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
948 for(mb_y=ystart; mb_y<yend; mb_y++){
950 for(mb_x=xstart; mb_x<xend; mb_x++){
951 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
952 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
953 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
955 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
961 //check(best[0],best[1],0, b0)
962 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
964 //check(best[0],best[1],0, b1)
971 //this function is dedicated to the braindamaged gcc
972 int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
973 int P[10][2], int src_index, int ref_index,
974 int16_t (*last_mv)[2], int ref_mv_scale,
977 MotionEstContext * const c= &s->me;
978 //FIXME convert other functions in the same way if faster
979 if(c->flags==0 && h==16 && size==0){
980 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
982 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
984 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
988 static int epzs_motion_search4(MpegEncContext * s,
989 int *mx_ptr, int *my_ptr, int P[10][2],
990 int src_index, int ref_index, int16_t (*last_mv)[2],
993 MotionEstContext * const c= &s->me;
996 unsigned map_generation;
997 const int penalty_factor= c->penalty_factor;
1000 const int ref_mv_stride= s->mb_stride;
1001 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1002 me_cmp_func cmpf, chroma_cmpf;
1004 int flags= c->flags;
1007 cmpf= s->dsp.me_cmp[size];
1008 chroma_cmpf= s->dsp.me_cmp[size+1];
1010 map_generation= update_map_generation(c);
1015 if (s->first_slice_line) {
1016 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1017 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1018 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1019 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1021 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1022 //FIXME try some early stop
1023 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1024 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1025 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1026 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1027 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1028 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1031 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1032 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1033 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1034 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1035 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1038 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1046 //try to merge with above FIXME (needs PSNR test)
1047 static int epzs_motion_search2(MpegEncContext * s,
1048 int *mx_ptr, int *my_ptr, int P[10][2],
1049 int src_index, int ref_index, int16_t (*last_mv)[2],
1052 MotionEstContext * const c= &s->me;
1055 unsigned map_generation;
1056 const int penalty_factor= c->penalty_factor;
1057 const int size=0; //FIXME pass as arg
1059 const int ref_mv_stride= s->mb_stride;
1060 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1061 me_cmp_func cmpf, chroma_cmpf;
1063 int flags= c->flags;
1066 cmpf= s->dsp.me_cmp[size];
1067 chroma_cmpf= s->dsp.me_cmp[size+1];
1069 map_generation= update_map_generation(c);
1074 if (s->first_slice_line) {
1075 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1076 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1077 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1078 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1080 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1081 //FIXME try some early stop
1082 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1083 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1084 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1085 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1086 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1087 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1090 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1091 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1092 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1093 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1094 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1097 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);