3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Motion estimation template.
27 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
29 uint32_t av_unused * const score_map= c->score_map;\
30 const int av_unused xmin= c->xmin;\
31 const int av_unused ymin= c->ymin;\
32 const int av_unused xmax= c->xmax;\
33 const int av_unused ymax= c->ymax;\
34 uint8_t *mv_penalty= c->current_mv_penalty;\
35 const int pred_x= c->pred_x;\
36 const int pred_y= c->pred_y;\
38 #define CHECK_HALF_MV(dx, dy, x, y)\
40 const int hx= 2*(x)+(dx);\
41 const int hy= 2*(y)+(dy);\
42 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
43 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
44 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
47 static int hpel_motion_search(MpegEncContext * s,
48 int *mx_ptr, int *my_ptr, int dmin,
49 int src_index, int ref_index,
52 MotionEstContext * const c= &s->me;
53 const int mx = *mx_ptr;
54 const int my = *my_ptr;
55 const int penalty_factor= c->sub_penalty_factor;
56 me_cmp_func cmp_sub, chroma_cmp_sub;
60 int flags= c->sub_flags;
64 cmp_sub= s->dsp.me_sub_cmp[size];
65 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
67 if(c->skip){ //FIXME move out of hpel?
73 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
74 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
75 if(mx || my || size>0)
76 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
79 if (mx > xmin && mx < xmax &&
80 my > ymin && my < ymax) {
82 const int index= (my<<ME_MAP_SHIFT) + mx;
83 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
84 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
85 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
86 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
87 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
88 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
89 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
90 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
94 unsigned map_generation= c->map_generation;
95 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
96 av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
97 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
98 av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
99 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
100 av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
101 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
102 av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
105 CHECK_HALF_MV(0, 1, mx ,my-1)
107 CHECK_HALF_MV(1, 1, mx-1, my-1)
109 CHECK_HALF_MV(1, 1, mx , my-1)
111 CHECK_HALF_MV(1, 1, mx-1, my )
113 CHECK_HALF_MV(1, 0, mx-1, my )
115 CHECK_HALF_MV(1, 1, mx , my-1)
117 CHECK_HALF_MV(1, 1, mx-1, my-1)
119 CHECK_HALF_MV(1, 1, mx , my )
121 CHECK_HALF_MV(1, 0, mx , my )
126 CHECK_HALF_MV(1, 1, mx-1, my-1)
128 CHECK_HALF_MV(1, 1, mx , my )
130 CHECK_HALF_MV(1, 0, mx-1, my)
131 CHECK_HALF_MV(1, 1, mx-1, my)
134 CHECK_HALF_MV(1, 1, mx , my-1)
136 CHECK_HALF_MV(1, 1, mx-1, my)
138 CHECK_HALF_MV(1, 0, mx , my)
139 CHECK_HALF_MV(1, 1, mx , my)
141 CHECK_HALF_MV(0, 1, mx , my)
143 av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
152 static int no_sub_motion_search(MpegEncContext * s,
153 int *mx_ptr, int *my_ptr, int dmin,
154 int src_index, int ref_index,
162 static inline int get_mb_score(MpegEncContext *s, int mx, int my,
163 int src_index, int ref_index, int size,
166 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
167 MotionEstContext * const c= &s->me;
168 const int penalty_factor= c->mb_penalty_factor;
169 const int flags= c->mb_flags;
170 const int qpel= flags & FLAG_QPEL;
171 const int mask= 1+2*qpel;
172 me_cmp_func cmp_sub, chroma_cmp_sub;
179 cmp_sub= s->dsp.mb_cmp[size];
180 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
182 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
183 //FIXME check cbp before adding penalty for (0,0) vector
184 if(add_rate && (mx || my || size>0))
185 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
190 int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
191 int ref_index, int size, int h, int add_rate)
193 return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
196 #define CHECK_QUARTER_MV(dx, dy, x, y)\
198 const int hx= 4*(x)+(dx);\
199 const int hy= 4*(y)+(dy);\
200 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
201 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
202 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
205 static int qpel_motion_search(MpegEncContext * s,
206 int *mx_ptr, int *my_ptr, int dmin,
207 int src_index, int ref_index,
210 MotionEstContext * const c= &s->me;
211 const int mx = *mx_ptr;
212 const int my = *my_ptr;
213 const int penalty_factor= c->sub_penalty_factor;
214 const unsigned map_generation = c->map_generation;
215 const int subpel_quality= c->avctx->me_subpel_quality;
216 uint32_t *map= c->map;
217 me_cmp_func cmpf, chroma_cmpf;
218 me_cmp_func cmp_sub, chroma_cmp_sub;
221 int flags= c->sub_flags;
223 cmpf= s->dsp.me_cmp[size];
224 chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
227 cmp_sub= s->dsp.me_sub_cmp[size];
228 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
230 if(c->skip){ //FIXME somehow move up (benchmark)
236 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
237 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
238 if(mx || my || size>0)
239 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
242 if (mx > xmin && mx < xmax &&
243 my > ymin && my < ymax) {
244 int bx=4*mx, by=4*my;
247 const int index= (my<<ME_MAP_SHIFT) + mx;
248 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
249 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
250 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
251 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
252 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
256 memset(best, 64, sizeof(int)*8);
257 if(s->me.dia_size>=2){
258 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
259 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
260 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
261 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
263 for(ny= -3; ny <= 3; ny++){
264 for(nx= -3; nx <= 3; nx++){
265 //FIXME this could overflow (unlikely though)
266 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
267 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
268 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
269 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
272 if((nx&3)==0 && (ny&3)==0) continue;
274 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
276 // if(nx&1) score-=1024*c->penalty_factor;
277 // if(ny&1) score-=1024*c->penalty_factor;
281 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
282 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
284 best_pos[i][0]= nx + 4*mx;
285 best_pos[i][1]= ny + 4*my;
293 //FIXME this could overflow (unlikely though)
294 const int cx = 4*(r - l);
295 const int cx2= r + l - 2*c;
296 const int cy = 4*(b - t);
297 const int cy2= b + t - 2*c;
300 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
301 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
303 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
306 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
308 av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
309 av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
310 av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
311 av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
312 av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
314 for(ny= -3; ny <= 3; ny++){
315 for(nx= -3; nx <= 3; nx++){
316 //FIXME this could overflow (unlikely though)
317 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
320 if((nx&3)==0 && (ny&3)==0) continue;
322 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
323 // if(nx&1) score-=32*c->penalty_factor;
324 // if(ny&1) score-=32*c->penalty_factor;
328 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
329 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
331 best_pos[i][0]= nx + 4*mx;
332 best_pos[i][1]= ny + 4*my;
339 for(i=0; i<subpel_quality; i++){
342 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
345 av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
358 #define CHECK_MV(x,y)\
360 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
361 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
362 av_assert2((x) >= xmin);\
363 av_assert2((x) <= xmax);\
364 av_assert2((y) >= ymin);\
365 av_assert2((y) <= ymax);\
366 if(map[index]!=key){\
367 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
369 score_map[index]= d;\
370 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
371 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
375 #define CHECK_CLIPPED_MV(ax,ay)\
379 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
380 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
384 #define CHECK_MV_DIR(x,y,new_dir)\
386 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
387 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
388 if(map[index]!=key){\
389 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
391 score_map[index]= d;\
392 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
402 #define check(x,y,S,v)\
403 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
404 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
405 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
406 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
408 #define LOAD_COMMON2\
409 uint32_t *map= c->map;\
410 const int qpel= flags&FLAG_QPEL;\
411 const int shift= 1+qpel;\
413 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
414 int src_index, int ref_index, int const penalty_factor,
415 int size, int h, int flags)
417 MotionEstContext * const c= &s->me;
418 me_cmp_func cmpf, chroma_cmpf;
422 unsigned map_generation = c->map_generation;
424 cmpf= s->dsp.me_cmp[size];
425 chroma_cmpf= s->dsp.me_cmp[size+1];
427 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
428 const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
429 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
430 if(map[index]!=key){ //this will be executed only very rarey
431 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
438 const int dir= next_dir;
439 const int x= best[0];
440 const int y= best[1];
443 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
444 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
445 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
446 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
454 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
455 int src_index, int ref_index, int const penalty_factor,
456 int size, int h, int flags)
458 MotionEstContext * const c= &s->me;
459 me_cmp_func cmpf, chroma_cmpf;
463 unsigned map_generation = c->map_generation;
465 cmpf= s->dsp.me_cmp[size];
466 chroma_cmpf= s->dsp.me_cmp[size+1];
468 for(dia_size=1; dia_size<=4; dia_size++){
470 const int x= best[0];
471 const int y= best[1];
473 if(dia_size&(dia_size-1)) continue;
475 if( x + dia_size > xmax
476 || x - dia_size < xmin
477 || y + dia_size > ymax
478 || y - dia_size < ymin)
481 for(dir= 0; dir<dia_size; dir+=2){
484 CHECK_MV(x + dir , y + dia_size - dir);
485 CHECK_MV(x + dia_size - dir, y - dir );
486 CHECK_MV(x - dir , y - dia_size + dir);
487 CHECK_MV(x - dia_size + dir, y + dir );
490 if(x!=best[0] || y!=best[1])
496 static int hex_search(MpegEncContext * s, int *best, int dmin,
497 int src_index, int ref_index, int const penalty_factor,
498 int size, int h, int flags, int dia_size)
500 MotionEstContext * const c= &s->me;
501 me_cmp_func cmpf, chroma_cmpf;
504 unsigned map_generation = c->map_generation;
506 const int dec= dia_size & (dia_size-1);
508 cmpf= s->dsp.me_cmp[size];
509 chroma_cmpf= s->dsp.me_cmp[size+1];
511 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
516 CHECK_CLIPPED_MV(x -dia_size , y);
517 CHECK_CLIPPED_MV(x+ dia_size , y);
518 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
519 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
521 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
522 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
524 }while(best[0] != x || best[1] != y);
530 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
531 int src_index, int ref_index, int const penalty_factor,
532 int size, int h, int flags)
534 MotionEstContext * const c= &s->me;
535 me_cmp_func cmpf, chroma_cmpf;
538 unsigned map_generation = c->map_generation;
540 int dia_size= c->dia_size&0xFF;
541 const int dec= dia_size & (dia_size-1);
542 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
543 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
545 cmpf= s->dsp.me_cmp[size];
546 chroma_cmpf= s->dsp.me_cmp[size+1];
548 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
553 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
555 }while(best[0] != x || best[1] != y);
560 CHECK_CLIPPED_MV(x+1, y);
561 CHECK_CLIPPED_MV(x, y+1);
562 CHECK_CLIPPED_MV(x-1, y);
563 CHECK_CLIPPED_MV(x, y-1);
568 static int umh_search(MpegEncContext * s, int *best, int dmin,
569 int src_index, int ref_index, int const penalty_factor,
570 int size, int h, int flags)
572 MotionEstContext * const c= &s->me;
573 me_cmp_func cmpf, chroma_cmpf;
576 unsigned map_generation = c->map_generation;
577 int x,y,x2,y2, i, j, d;
578 const int dia_size= c->dia_size&0xFE;
579 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
580 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
581 {-2, 3}, { 0, 4}, { 2, 3},
582 {-2,-3}, { 0,-4}, { 2,-3},};
584 cmpf= s->dsp.me_cmp[size];
585 chroma_cmpf= s->dsp.me_cmp[size+1];
589 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
592 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
598 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
599 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
604 //FIXME prevent the CLIP stuff
606 for(j=1; j<=dia_size/4; j++){
608 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
612 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
615 static int full_search(MpegEncContext * s, int *best, int dmin,
616 int src_index, int ref_index, int const penalty_factor,
617 int size, int h, int flags)
619 MotionEstContext * const c= &s->me;
620 me_cmp_func cmpf, chroma_cmpf;
623 unsigned map_generation = c->map_generation;
625 const int dia_size= c->dia_size&0xFF;
627 cmpf= s->dsp.me_cmp[size];
628 chroma_cmpf= s->dsp.me_cmp[size+1];
630 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
631 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
639 CHECK_CLIPPED_MV(x , y);
640 CHECK_CLIPPED_MV(x+1, y);
641 CHECK_CLIPPED_MV(x, y+1);
642 CHECK_CLIPPED_MV(x-1, y);
643 CHECK_CLIPPED_MV(x, y-1);
650 #define SAB_CHECK_MV(ax,ay)\
652 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
653 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
654 if(map[index]!=key){\
655 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
657 score_map[index]= d;\
658 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
659 if(d < minima[minima_count-1].height){\
662 while(d >= minima[j].height) j++;\
664 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
666 minima[j].checked= 0;\
667 minima[j].height= d;\
677 #define MAX_SAB_SIZE ME_MAP_SIZE
678 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
679 int src_index, int ref_index, int const penalty_factor,
680 int size, int h, int flags)
682 MotionEstContext * const c= &s->me;
683 me_cmp_func cmpf, chroma_cmpf;
684 Minima minima[MAX_SAB_SIZE];
685 const int minima_count= FFABS(c->dia_size);
689 unsigned map_generation = c->map_generation;
691 cmpf= s->dsp.me_cmp[size];
692 chroma_cmpf= s->dsp.me_cmp[size+1];
694 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
695 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
697 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
698 uint32_t key= map[i];
700 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
702 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
704 minima[j].height= score_map[i];
705 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
706 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
707 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
708 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
710 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
711 if( minima[j].x > xmax || minima[j].x < xmin
712 || minima[j].y > ymax || minima[j].y < ymin)
716 if(minima[j].x || minima[j].y)
717 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
722 qsort(minima, j, sizeof(Minima), minima_cmp);
724 for(; j<minima_count; j++){
725 minima[j].height=256*256*256*64;
727 minima[j].x= minima[j].y=0;
730 for(i=0; i<minima_count; i++){
731 const int x= minima[i].x;
732 const int y= minima[i].y;
735 if(minima[i].checked) continue;
737 if( x >= xmax || x <= xmin
738 || y >= ymax || y <= ymin)
743 SAB_CHECK_MV(x , y-1)
744 SAB_CHECK_MV(x , y+1)
746 minima[i].checked= 1;
749 best[0]= minima[0].x;
750 best[1]= minima[0].y;
751 dmin= minima[0].height;
753 if( best[0] < xmax && best[0] > xmin
754 && best[1] < ymax && best[1] > ymin){
756 //ensure that the refernece samples for hpel refinement are in the map
757 CHECK_MV(best[0]-1, best[1])
758 CHECK_MV(best[0]+1, best[1])
759 CHECK_MV(best[0], best[1]-1)
760 CHECK_MV(best[0], best[1]+1)
765 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
766 int src_index, int ref_index, int const penalty_factor,
767 int size, int h, int flags)
769 MotionEstContext * const c= &s->me;
770 me_cmp_func cmpf, chroma_cmpf;
774 unsigned map_generation = c->map_generation;
776 cmpf= s->dsp.me_cmp[size];
777 chroma_cmpf= s->dsp.me_cmp[size+1];
779 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
781 const int x= best[0];
782 const int y= best[1];
784 start= FFMAX(0, y + dia_size - ymax);
785 end = FFMIN(dia_size, xmax - x + 1);
786 for(dir= start; dir<end; dir++){
789 //check(x + dir,y + dia_size - dir,0, a0)
790 CHECK_MV(x + dir , y + dia_size - dir);
793 start= FFMAX(0, x + dia_size - xmax);
794 end = FFMIN(dia_size, y - ymin + 1);
795 for(dir= start; dir<end; dir++){
798 //check(x + dia_size - dir, y - dir,0, a1)
799 CHECK_MV(x + dia_size - dir, y - dir );
802 start= FFMAX(0, -y + dia_size + ymin );
803 end = FFMIN(dia_size, x - xmin + 1);
804 for(dir= start; dir<end; dir++){
807 //check(x - dir,y - dia_size + dir,0, a2)
808 CHECK_MV(x - dir , y - dia_size + dir);
811 start= FFMAX(0, -x + dia_size + xmin );
812 end = FFMIN(dia_size, ymax - y + 1);
813 for(dir= start; dir<end; dir++){
816 //check(x - dia_size + dir, y + dir,0, a3)
817 CHECK_MV(x - dia_size + dir, y + dir );
820 if(x!=best[0] || y!=best[1])
826 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
827 int src_index, int ref_index, int const penalty_factor,
828 int size, int h, int flags){
829 MotionEstContext * const c= &s->me;
831 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
832 else if(c->dia_size<-1)
833 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
834 else if(c->dia_size<2)
835 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
836 else if(c->dia_size>1024)
837 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838 else if(c->dia_size>768)
839 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840 else if(c->dia_size>512)
841 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
842 else if(c->dia_size>256)
843 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
845 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
849 @param P a list of candidate mvs to check before starting the
850 iterative search. If one of the candidates is close to the optimal mv, then
851 it takes fewer iterations. And it increases the chance that we find the
854 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
855 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
856 int ref_mv_scale, int flags, int size, int h)
858 MotionEstContext * const c= &s->me;
859 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
860 i.e. the difference between the position of the
861 block currently being encoded and the position of
862 the block chosen to predict it from. */
863 int d; ///< the score (cmp + penalty) of any given mv
864 int dmin; /**< the best value of d, i.e. the score
865 corresponding to the mv stored in best[]. */
866 unsigned map_generation;
868 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
869 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
870 me_cmp_func cmpf, chroma_cmpf;
876 penalty_factor= c->pre_penalty_factor;
877 cmpf= s->dsp.me_pre_cmp[size];
878 chroma_cmpf= s->dsp.me_pre_cmp[size+1];
880 penalty_factor= c->penalty_factor;
881 cmpf= s->dsp.me_cmp[size];
882 chroma_cmpf= s->dsp.me_cmp[size+1];
885 map_generation= update_map_generation(c);
888 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
889 map[0]= map_generation;
892 //FIXME precalc first term below?
893 if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
894 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
897 if (s->first_slice_line) {
898 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
899 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
900 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
902 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
903 && ( P_LEFT[0] |P_LEFT[1]
905 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
911 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
912 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
913 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
914 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
915 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
916 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
917 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
918 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
919 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
920 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
924 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
925 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
926 if(!s->first_slice_line)
927 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
928 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
930 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
931 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
932 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
933 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
934 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
938 if(c->avctx->last_predictor_count){
939 const int count= c->avctx->last_predictor_count;
940 const int xstart= FFMAX(0, s->mb_x - count);
941 const int ystart= FFMAX(0, s->mb_y - count);
942 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
943 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
946 for(mb_y=ystart; mb_y<yend; mb_y++){
948 for(mb_x=xstart; mb_x<xend; mb_x++){
949 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
950 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
951 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
953 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
959 //check(best[0],best[1],0, b0)
960 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
962 //check(best[0],best[1],0, b1)
969 //this function is dedicated to the braindamaged gcc
970 int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
971 int P[10][2], int src_index, int ref_index,
972 int16_t (*last_mv)[2], int ref_mv_scale,
975 MotionEstContext * const c= &s->me;
976 //FIXME convert other functions in the same way if faster
977 if(c->flags==0 && h==16 && size==0){
978 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
980 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
982 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
986 static int epzs_motion_search4(MpegEncContext * s,
987 int *mx_ptr, int *my_ptr, int P[10][2],
988 int src_index, int ref_index, int16_t (*last_mv)[2],
991 MotionEstContext * const c= &s->me;
994 unsigned map_generation;
995 const int penalty_factor= c->penalty_factor;
998 const int ref_mv_stride= s->mb_stride;
999 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1000 me_cmp_func cmpf, chroma_cmpf;
1002 int flags= c->flags;
1005 cmpf= s->dsp.me_cmp[size];
1006 chroma_cmpf= s->dsp.me_cmp[size+1];
1008 map_generation= update_map_generation(c);
1013 if (s->first_slice_line) {
1014 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1015 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1016 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1017 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1019 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1020 //FIXME try some early stop
1021 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1022 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1023 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1024 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1025 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1026 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1029 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1030 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1031 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1032 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1033 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1036 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1044 //try to merge with above FIXME (needs PSNR test)
1045 static int epzs_motion_search2(MpegEncContext * s,
1046 int *mx_ptr, int *my_ptr, int P[10][2],
1047 int src_index, int ref_index, int16_t (*last_mv)[2],
1050 MotionEstContext * const c= &s->me;
1053 unsigned map_generation;
1054 const int penalty_factor= c->penalty_factor;
1055 const int size=0; //FIXME pass as arg
1057 const int ref_mv_stride= s->mb_stride;
1058 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1059 me_cmp_func cmpf, chroma_cmpf;
1061 int flags= c->flags;
1064 cmpf= s->dsp.me_cmp[size];
1065 chroma_cmpf= s->dsp.me_cmp[size+1];
1067 map_generation= update_map_generation(c);
1072 if (s->first_slice_line) {
1073 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1074 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1075 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1076 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1078 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1079 //FIXME try some early stop
1080 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1081 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1082 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1083 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1084 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1085 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1088 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1089 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1090 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1091 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1092 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1095 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);