3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Motion estimation template.
27 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
29 uint32_t av_unused * const score_map= c->score_map;\
30 const int av_unused xmin= c->xmin;\
31 const int av_unused ymin= c->ymin;\
32 const int av_unused xmax= c->xmax;\
33 const int av_unused ymax= c->ymax;\
34 uint8_t *mv_penalty= c->current_mv_penalty;\
35 const int pred_x= c->pred_x;\
36 const int pred_y= c->pred_y;\
38 #define CHECK_HALF_MV(dx, dy, x, y)\
40 const int hx= 2*(x)+(dx);\
41 const int hy= 2*(y)+(dy);\
42 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
43 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
44 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
47 static int hpel_motion_search(MpegEncContext * s,
48 int *mx_ptr, int *my_ptr, int dmin,
49 int src_index, int ref_index,
52 MotionEstContext * const c= &s->me;
53 const int mx = *mx_ptr;
54 const int my = *my_ptr;
55 const int penalty_factor= c->sub_penalty_factor;
56 me_cmp_func cmp_sub, chroma_cmp_sub;
60 int flags= c->sub_flags;
64 cmp_sub= s->dsp.me_sub_cmp[size];
65 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
67 if(c->skip){ //FIXME move out of hpel?
73 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
74 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
75 if(mx || my || size>0)
76 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
79 if (mx > xmin && mx < xmax &&
80 my > ymin && my < ymax) {
82 const int index= (my<<ME_MAP_SHIFT) + mx;
83 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
84 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
85 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
86 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
87 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
88 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
89 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
90 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
93 unsigned map_generation= c->map_generation;
95 uint32_t *map= c->map;
97 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
98 assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
99 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
100 assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
101 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
102 assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
103 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
104 assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
106 CHECK_HALF_MV(0, 1, mx ,my-1)
108 CHECK_HALF_MV(1, 1, mx-1, my-1)
110 CHECK_HALF_MV(1, 1, mx , my-1)
112 CHECK_HALF_MV(1, 1, mx-1, my )
114 CHECK_HALF_MV(1, 0, mx-1, my )
116 CHECK_HALF_MV(1, 1, mx , my-1)
118 CHECK_HALF_MV(1, 1, mx-1, my-1)
120 CHECK_HALF_MV(1, 1, mx , my )
122 CHECK_HALF_MV(1, 0, mx , my )
127 CHECK_HALF_MV(1, 1, mx-1, my-1)
129 CHECK_HALF_MV(1, 1, mx , my )
131 CHECK_HALF_MV(1, 0, mx-1, my)
132 CHECK_HALF_MV(1, 1, mx-1, my)
135 CHECK_HALF_MV(1, 1, mx , my-1)
137 CHECK_HALF_MV(1, 1, mx-1, my)
139 CHECK_HALF_MV(1, 0, mx , my)
140 CHECK_HALF_MV(1, 1, mx , my)
142 CHECK_HALF_MV(0, 1, mx , my)
144 assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
153 static int no_sub_motion_search(MpegEncContext * s,
154 int *mx_ptr, int *my_ptr, int dmin,
155 int src_index, int ref_index,
163 static inline int get_mb_score(MpegEncContext *s, int mx, int my,
164 int src_index, int ref_index, int size,
167 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
168 MotionEstContext * const c= &s->me;
169 const int penalty_factor= c->mb_penalty_factor;
170 const int flags= c->mb_flags;
171 const int qpel= flags & FLAG_QPEL;
172 const int mask= 1+2*qpel;
173 me_cmp_func cmp_sub, chroma_cmp_sub;
180 cmp_sub= s->dsp.mb_cmp[size];
181 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
184 // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
186 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
187 //FIXME check cbp before adding penalty for (0,0) vector
188 if(add_rate && (mx || my || size>0))
189 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
194 int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
195 int ref_index, int size, int h, int add_rate)
197 return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
200 #define CHECK_QUARTER_MV(dx, dy, x, y)\
202 const int hx= 4*(x)+(dx);\
203 const int hy= 4*(y)+(dy);\
204 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
205 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
206 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
209 static int qpel_motion_search(MpegEncContext * s,
210 int *mx_ptr, int *my_ptr, int dmin,
211 int src_index, int ref_index,
214 MotionEstContext * const c= &s->me;
215 const int mx = *mx_ptr;
216 const int my = *my_ptr;
217 const int penalty_factor= c->sub_penalty_factor;
218 const unsigned map_generation = c->map_generation;
219 const int subpel_quality= c->avctx->me_subpel_quality;
220 uint32_t *map= c->map;
221 me_cmp_func cmpf, chroma_cmpf;
222 me_cmp_func cmp_sub, chroma_cmp_sub;
225 int flags= c->sub_flags;
227 cmpf= s->dsp.me_cmp[size];
228 chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
231 cmp_sub= s->dsp.me_sub_cmp[size];
232 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
234 if(c->skip){ //FIXME somehow move up (benchmark)
240 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
241 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
242 if(mx || my || size>0)
243 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
246 if (mx > xmin && mx < xmax &&
247 my > ymin && my < ymax) {
248 int bx=4*mx, by=4*my;
251 const int index= (my<<ME_MAP_SHIFT) + mx;
252 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
253 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
254 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
255 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
256 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
260 memset(best, 64, sizeof(int)*8);
261 if(s->me.dia_size>=2){
262 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
263 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
264 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
265 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
267 for(ny= -3; ny <= 3; ny++){
268 for(nx= -3; nx <= 3; nx++){
269 //FIXME this could overflow (unlikely though)
270 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
271 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
272 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
273 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
276 if((nx&3)==0 && (ny&3)==0) continue;
278 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
280 // if(nx&1) score-=1024*c->penalty_factor;
281 // if(ny&1) score-=1024*c->penalty_factor;
285 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
286 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
288 best_pos[i][0]= nx + 4*mx;
289 best_pos[i][1]= ny + 4*my;
297 //FIXME this could overflow (unlikely though)
298 const int cx = 4*(r - l);
299 const int cx2= r + l - 2*c;
300 const int cy = 4*(b - t);
301 const int cy2= b + t - 2*c;
304 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
305 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
307 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
310 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
312 assert(16*cx2 + 4*cx + 32*c == 32*r);
313 assert(16*cx2 - 4*cx + 32*c == 32*l);
314 assert(16*cy2 + 4*cy + 32*c == 32*b);
315 assert(16*cy2 - 4*cy + 32*c == 32*t);
316 assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
318 for(ny= -3; ny <= 3; ny++){
319 for(nx= -3; nx <= 3; nx++){
320 //FIXME this could overflow (unlikely though)
321 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
324 if((nx&3)==0 && (ny&3)==0) continue;
326 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
327 // if(nx&1) score-=32*c->penalty_factor;
328 // if(ny&1) score-=32*c->penalty_factor;
332 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
333 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
335 best_pos[i][0]= nx + 4*mx;
336 best_pos[i][1]= ny + 4*my;
343 for(i=0; i<subpel_quality; i++){
346 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
349 assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
362 #define CHECK_MV(x,y)\
364 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
365 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
366 assert((x) >= xmin);\
367 assert((x) <= xmax);\
368 assert((y) >= ymin);\
369 assert((y) <= ymax);\
370 /*printf("check_mv %d %d\n", x, y);*/\
371 if(map[index]!=key){\
372 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
374 score_map[index]= d;\
375 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
376 /*printf("score:%d\n", d);*/\
377 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
381 #define CHECK_CLIPPED_MV(ax,ay)\
385 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
386 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
390 #define CHECK_MV_DIR(x,y,new_dir)\
392 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
393 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
394 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
395 if(map[index]!=key){\
396 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
398 score_map[index]= d;\
399 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
400 /*printf("score:%d\n", d);*/\
410 #define check(x,y,S,v)\
411 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
412 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
413 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
414 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
416 #define LOAD_COMMON2\
417 uint32_t *map= c->map;\
418 const int qpel= flags&FLAG_QPEL;\
419 const int shift= 1+qpel;\
421 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
422 int src_index, int ref_index, int const penalty_factor,
423 int size, int h, int flags)
425 MotionEstContext * const c= &s->me;
426 me_cmp_func cmpf, chroma_cmpf;
430 unsigned map_generation = c->map_generation;
432 cmpf= s->dsp.me_cmp[size];
433 chroma_cmpf= s->dsp.me_cmp[size+1];
435 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
436 const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
437 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
438 if(map[index]!=key){ //this will be executed only very rarey
439 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
446 const int dir= next_dir;
447 const int x= best[0];
448 const int y= best[1];
452 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
453 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
454 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
455 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
463 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
464 int src_index, int ref_index, int const penalty_factor,
465 int size, int h, int flags)
467 MotionEstContext * const c= &s->me;
468 me_cmp_func cmpf, chroma_cmpf;
472 unsigned map_generation = c->map_generation;
474 cmpf= s->dsp.me_cmp[size];
475 chroma_cmpf= s->dsp.me_cmp[size+1];
477 for(dia_size=1; dia_size<=4; dia_size++){
479 const int x= best[0];
480 const int y= best[1];
482 if(dia_size&(dia_size-1)) continue;
484 if( x + dia_size > xmax
485 || x - dia_size < xmin
486 || y + dia_size > ymax
487 || y - dia_size < ymin)
490 for(dir= 0; dir<dia_size; dir+=2){
493 CHECK_MV(x + dir , y + dia_size - dir);
494 CHECK_MV(x + dia_size - dir, y - dir );
495 CHECK_MV(x - dir , y - dia_size + dir);
496 CHECK_MV(x - dia_size + dir, y + dir );
499 if(x!=best[0] || y!=best[1])
505 static int hex_search(MpegEncContext * s, int *best, int dmin,
506 int src_index, int ref_index, int const penalty_factor,
507 int size, int h, int flags, int dia_size)
509 MotionEstContext * const c= &s->me;
510 me_cmp_func cmpf, chroma_cmpf;
513 unsigned map_generation = c->map_generation;
515 const int dec= dia_size & (dia_size-1);
517 cmpf= s->dsp.me_cmp[size];
518 chroma_cmpf= s->dsp.me_cmp[size+1];
520 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
525 CHECK_CLIPPED_MV(x -dia_size , y);
526 CHECK_CLIPPED_MV(x+ dia_size , y);
527 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
528 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
530 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
531 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
533 }while(best[0] != x || best[1] != y);
539 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
540 int src_index, int ref_index, int const penalty_factor,
541 int size, int h, int flags)
543 MotionEstContext * const c= &s->me;
544 me_cmp_func cmpf, chroma_cmpf;
547 unsigned map_generation = c->map_generation;
549 int dia_size= c->dia_size&0xFF;
550 const int dec= dia_size & (dia_size-1);
551 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
552 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
554 cmpf= s->dsp.me_cmp[size];
555 chroma_cmpf= s->dsp.me_cmp[size+1];
557 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
562 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
564 }while(best[0] != x || best[1] != y);
569 CHECK_CLIPPED_MV(x+1, y);
570 CHECK_CLIPPED_MV(x, y+1);
571 CHECK_CLIPPED_MV(x-1, y);
572 CHECK_CLIPPED_MV(x, y-1);
577 static int umh_search(MpegEncContext * s, int *best, int dmin,
578 int src_index, int ref_index, int const penalty_factor,
579 int size, int h, int flags)
581 MotionEstContext * const c= &s->me;
582 me_cmp_func cmpf, chroma_cmpf;
585 unsigned map_generation = c->map_generation;
586 int x,y,x2,y2, i, j, d;
587 const int dia_size= c->dia_size&0xFE;
588 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
589 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
590 {-2, 3}, { 0, 4}, { 2, 3},
591 {-2,-3}, { 0,-4}, { 2,-3},};
593 cmpf= s->dsp.me_cmp[size];
594 chroma_cmpf= s->dsp.me_cmp[size+1];
598 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
601 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
607 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
608 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
613 //FIXME prevent the CLIP stuff
615 for(j=1; j<=dia_size/4; j++){
617 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
621 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
624 static int full_search(MpegEncContext * s, int *best, int dmin,
625 int src_index, int ref_index, int const penalty_factor,
626 int size, int h, int flags)
628 MotionEstContext * const c= &s->me;
629 me_cmp_func cmpf, chroma_cmpf;
632 unsigned map_generation = c->map_generation;
634 const int dia_size= c->dia_size&0xFF;
636 cmpf= s->dsp.me_cmp[size];
637 chroma_cmpf= s->dsp.me_cmp[size+1];
639 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
640 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
648 CHECK_CLIPPED_MV(x , y);
649 CHECK_CLIPPED_MV(x+1, y);
650 CHECK_CLIPPED_MV(x, y+1);
651 CHECK_CLIPPED_MV(x-1, y);
652 CHECK_CLIPPED_MV(x, y-1);
659 #define SAB_CHECK_MV(ax,ay)\
661 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
662 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
663 /*printf("sab check %d %d\n", ax, ay);*/\
664 if(map[index]!=key){\
665 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
667 score_map[index]= d;\
668 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
669 /*printf("score: %d\n", d);*/\
670 if(d < minima[minima_count-1].height){\
673 while(d >= minima[j].height) j++;\
675 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
677 minima[j].checked= 0;\
678 minima[j].height= d;\
688 #define MAX_SAB_SIZE ME_MAP_SIZE
689 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
690 int src_index, int ref_index, int const penalty_factor,
691 int size, int h, int flags)
693 MotionEstContext * const c= &s->me;
694 me_cmp_func cmpf, chroma_cmpf;
695 Minima minima[MAX_SAB_SIZE];
696 const int minima_count= FFABS(c->dia_size);
700 unsigned map_generation = c->map_generation;
702 cmpf= s->dsp.me_cmp[size];
703 chroma_cmpf= s->dsp.me_cmp[size+1];
705 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
706 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
708 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
709 uint32_t key= map[i];
711 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
713 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
715 minima[j].height= score_map[i];
716 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
717 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
718 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
719 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
721 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
722 if( minima[j].x > xmax || minima[j].x < xmin
723 || minima[j].y > ymax || minima[j].y < ymin)
727 if(minima[j].x || minima[j].y)
728 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
733 qsort(minima, j, sizeof(Minima), minima_cmp);
735 for(; j<minima_count; j++){
736 minima[j].height=256*256*256*64;
738 minima[j].x= minima[j].y=0;
741 for(i=0; i<minima_count; i++){
742 const int x= minima[i].x;
743 const int y= minima[i].y;
746 if(minima[i].checked) continue;
748 if( x >= xmax || x <= xmin
749 || y >= ymax || y <= ymin)
754 SAB_CHECK_MV(x , y-1)
755 SAB_CHECK_MV(x , y+1)
757 minima[i].checked= 1;
760 best[0]= minima[0].x;
761 best[1]= minima[0].y;
762 dmin= minima[0].height;
764 if( best[0] < xmax && best[0] > xmin
765 && best[1] < ymax && best[1] > ymin){
767 //ensure that the refernece samples for hpel refinement are in the map
768 CHECK_MV(best[0]-1, best[1])
769 CHECK_MV(best[0]+1, best[1])
770 CHECK_MV(best[0], best[1]-1)
771 CHECK_MV(best[0], best[1]+1)
776 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
777 int src_index, int ref_index, int const penalty_factor,
778 int size, int h, int flags)
780 MotionEstContext * const c= &s->me;
781 me_cmp_func cmpf, chroma_cmpf;
785 unsigned map_generation = c->map_generation;
787 cmpf= s->dsp.me_cmp[size];
788 chroma_cmpf= s->dsp.me_cmp[size+1];
790 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
792 const int x= best[0];
793 const int y= best[1];
795 start= FFMAX(0, y + dia_size - ymax);
796 end = FFMIN(dia_size, xmax - x + 1);
797 for(dir= start; dir<end; dir++){
800 //check(x + dir,y + dia_size - dir,0, a0)
801 CHECK_MV(x + dir , y + dia_size - dir);
804 start= FFMAX(0, x + dia_size - xmax);
805 end = FFMIN(dia_size, y - ymin + 1);
806 for(dir= start; dir<end; dir++){
809 //check(x + dia_size - dir, y - dir,0, a1)
810 CHECK_MV(x + dia_size - dir, y - dir );
813 start= FFMAX(0, -y + dia_size + ymin );
814 end = FFMIN(dia_size, x - xmin + 1);
815 for(dir= start; dir<end; dir++){
818 //check(x - dir,y - dia_size + dir,0, a2)
819 CHECK_MV(x - dir , y - dia_size + dir);
822 start= FFMAX(0, -x + dia_size + xmin );
823 end = FFMIN(dia_size, ymax - y + 1);
824 for(dir= start; dir<end; dir++){
827 //check(x - dia_size + dir, y + dir,0, a3)
828 CHECK_MV(x - dia_size + dir, y + dir );
831 if(x!=best[0] || y!=best[1])
837 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
838 int src_index, int ref_index, int const penalty_factor,
839 int size, int h, int flags){
840 MotionEstContext * const c= &s->me;
842 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
843 else if(c->dia_size<-1)
844 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
845 else if(c->dia_size<2)
846 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
847 else if(c->dia_size>1024)
848 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
849 else if(c->dia_size>768)
850 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
851 else if(c->dia_size>512)
852 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
853 else if(c->dia_size>256)
854 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
856 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
860 @param P a list of candidate mvs to check before starting the
861 iterative search. If one of the candidates is close to the optimal mv, then
862 it takes fewer iterations. And it increases the chance that we find the
865 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
866 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
867 int ref_mv_scale, int flags, int size, int h)
869 MotionEstContext * const c= &s->me;
870 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
871 i.e. the difference between the position of the
872 block currently being encoded and the position of
873 the block chosen to predict it from. */
874 int d; ///< the score (cmp + penalty) of any given mv
875 int dmin; /**< the best value of d, i.e. the score
876 corresponding to the mv stored in best[]. */
877 unsigned map_generation;
879 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
880 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
881 me_cmp_func cmpf, chroma_cmpf;
887 penalty_factor= c->pre_penalty_factor;
888 cmpf= s->dsp.me_pre_cmp[size];
889 chroma_cmpf= s->dsp.me_pre_cmp[size+1];
891 penalty_factor= c->penalty_factor;
892 cmpf= s->dsp.me_cmp[size];
893 chroma_cmpf= s->dsp.me_cmp[size+1];
896 map_generation= update_map_generation(c);
899 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
900 map[0]= map_generation;
903 //FIXME precalc first term below?
904 if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
905 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
908 if (s->first_slice_line) {
909 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
910 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
911 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
913 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
914 && ( P_LEFT[0] |P_LEFT[1]
916 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
922 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
923 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
924 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
925 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
926 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
927 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
928 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
929 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
930 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
931 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
935 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
936 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
937 if(!s->first_slice_line)
938 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
939 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
941 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
942 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
943 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
944 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
945 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
949 if(c->avctx->last_predictor_count){
950 const int count= c->avctx->last_predictor_count;
951 const int xstart= FFMAX(0, s->mb_x - count);
952 const int ystart= FFMAX(0, s->mb_y - count);
953 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
954 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
957 for(mb_y=ystart; mb_y<yend; mb_y++){
959 for(mb_x=xstart; mb_x<xend; mb_x++){
960 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
961 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
962 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
964 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
970 //check(best[0],best[1],0, b0)
971 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
973 //check(best[0],best[1],0, b1)
977 // printf("%d %d %d \n", best[0], best[1], dmin);
981 //this function is dedicated to the braindamaged gcc
982 int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
983 int P[10][2], int src_index, int ref_index,
984 int16_t (*last_mv)[2], int ref_mv_scale,
987 MotionEstContext * const c= &s->me;
988 //FIXME convert other functions in the same way if faster
989 if(c->flags==0 && h==16 && size==0){
990 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
992 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
994 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
998 static int epzs_motion_search4(MpegEncContext * s,
999 int *mx_ptr, int *my_ptr, int P[10][2],
1000 int src_index, int ref_index, int16_t (*last_mv)[2],
1003 MotionEstContext * const c= &s->me;
1006 unsigned map_generation;
1007 const int penalty_factor= c->penalty_factor;
1010 const int ref_mv_stride= s->mb_stride;
1011 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1012 me_cmp_func cmpf, chroma_cmpf;
1014 int flags= c->flags;
1017 cmpf= s->dsp.me_cmp[size];
1018 chroma_cmpf= s->dsp.me_cmp[size+1];
1020 map_generation= update_map_generation(c);
1023 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1025 if (s->first_slice_line) {
1026 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1027 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1028 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1029 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1031 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1032 //FIXME try some early stop
1033 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1034 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1035 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1036 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1037 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1038 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1041 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1042 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1043 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1044 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1045 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1048 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1053 // printf("%d %d %d \n", best[0], best[1], dmin);
1057 //try to merge with above FIXME (needs PSNR test)
1058 static int epzs_motion_search2(MpegEncContext * s,
1059 int *mx_ptr, int *my_ptr, int P[10][2],
1060 int src_index, int ref_index, int16_t (*last_mv)[2],
1063 MotionEstContext * const c= &s->me;
1066 unsigned map_generation;
1067 const int penalty_factor= c->penalty_factor;
1068 const int size=0; //FIXME pass as arg
1070 const int ref_mv_stride= s->mb_stride;
1071 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1072 me_cmp_func cmpf, chroma_cmpf;
1074 int flags= c->flags;
1077 cmpf= s->dsp.me_cmp[size];
1078 chroma_cmpf= s->dsp.me_cmp[size+1];
1080 map_generation= update_map_generation(c);
1083 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1085 if (s->first_slice_line) {
1086 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1087 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1088 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1089 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1091 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1092 //FIXME try some early stop
1093 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1094 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1095 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1096 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1097 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1098 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1101 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1102 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1103 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1104 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1105 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1108 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1113 // printf("%d %d %d \n", best[0], best[1], dmin);