3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Motion estimation template.
27 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
29 uint32_t av_unused * const score_map= c->score_map;\
30 const int av_unused xmin= c->xmin;\
31 const int av_unused ymin= c->ymin;\
32 const int av_unused xmax= c->xmax;\
33 const int av_unused ymax= c->ymax;\
34 uint8_t *mv_penalty= c->current_mv_penalty;\
35 const int pred_x= c->pred_x;\
36 const int pred_y= c->pred_y;\
38 #define CHECK_HALF_MV(dx, dy, x, y)\
40 const int hx= 2*(x)+(dx);\
41 const int hy= 2*(y)+(dy);\
42 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
43 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
44 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
47 static int hpel_motion_search(MpegEncContext * s,
48 int *mx_ptr, int *my_ptr, int dmin,
49 int src_index, int ref_index,
52 MotionEstContext * const c= &s->me;
53 const int mx = *mx_ptr;
54 const int my = *my_ptr;
55 const int penalty_factor= c->sub_penalty_factor;
56 me_cmp_func cmp_sub, chroma_cmp_sub;
60 int flags= c->sub_flags;
64 cmp_sub= s->dsp.me_sub_cmp[size];
65 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
67 if(c->skip){ //FIXME move out of hpel?
73 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
74 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
75 if(mx || my || size>0)
76 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
79 if (mx > xmin && mx < xmax &&
80 my > ymin && my < ymax) {
82 const int index= (my<<ME_MAP_SHIFT) + mx;
83 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
84 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
85 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
86 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
87 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
88 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
89 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
90 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
94 unsigned map_generation= c->map_generation;
96 uint32_t *map= c->map;
98 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
99 assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
100 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
101 assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
102 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
103 assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
104 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
105 assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
108 CHECK_HALF_MV(0, 1, mx ,my-1)
110 CHECK_HALF_MV(1, 1, mx-1, my-1)
112 CHECK_HALF_MV(1, 1, mx , my-1)
114 CHECK_HALF_MV(1, 1, mx-1, my )
116 CHECK_HALF_MV(1, 0, mx-1, my )
118 CHECK_HALF_MV(1, 1, mx , my-1)
120 CHECK_HALF_MV(1, 1, mx-1, my-1)
122 CHECK_HALF_MV(1, 1, mx , my )
124 CHECK_HALF_MV(1, 0, mx , my )
129 CHECK_HALF_MV(1, 1, mx-1, my-1)
131 CHECK_HALF_MV(1, 1, mx , my )
133 CHECK_HALF_MV(1, 0, mx-1, my)
134 CHECK_HALF_MV(1, 1, mx-1, my)
137 CHECK_HALF_MV(1, 1, mx , my-1)
139 CHECK_HALF_MV(1, 1, mx-1, my)
141 CHECK_HALF_MV(1, 0, mx , my)
142 CHECK_HALF_MV(1, 1, mx , my)
144 CHECK_HALF_MV(0, 1, mx , my)
146 assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
155 static int no_sub_motion_search(MpegEncContext * s,
156 int *mx_ptr, int *my_ptr, int dmin,
157 int src_index, int ref_index,
165 inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
166 int ref_index, int size, int h, int add_rate)
168 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
169 MotionEstContext * const c= &s->me;
170 const int penalty_factor= c->mb_penalty_factor;
171 const int flags= c->mb_flags;
172 const int qpel= flags & FLAG_QPEL;
173 const int mask= 1+2*qpel;
174 me_cmp_func cmp_sub, chroma_cmp_sub;
181 cmp_sub= s->dsp.mb_cmp[size];
182 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
185 // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
187 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
188 //FIXME check cbp before adding penalty for (0,0) vector
189 if(add_rate && (mx || my || size>0))
190 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
195 #define CHECK_QUARTER_MV(dx, dy, x, y)\
197 const int hx= 4*(x)+(dx);\
198 const int hy= 4*(y)+(dy);\
199 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
200 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
201 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
204 static int qpel_motion_search(MpegEncContext * s,
205 int *mx_ptr, int *my_ptr, int dmin,
206 int src_index, int ref_index,
209 MotionEstContext * const c= &s->me;
210 const int mx = *mx_ptr;
211 const int my = *my_ptr;
212 const int penalty_factor= c->sub_penalty_factor;
213 const unsigned map_generation = c->map_generation;
214 const int subpel_quality= c->avctx->me_subpel_quality;
215 uint32_t *map= c->map;
216 me_cmp_func cmpf, chroma_cmpf;
217 me_cmp_func cmp_sub, chroma_cmp_sub;
220 int flags= c->sub_flags;
222 cmpf= s->dsp.me_cmp[size];
223 chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
226 cmp_sub= s->dsp.me_sub_cmp[size];
227 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
229 if(c->skip){ //FIXME somehow move up (benchmark)
235 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
236 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
237 if(mx || my || size>0)
238 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
241 if (mx > xmin && mx < xmax &&
242 my > ymin && my < ymax) {
243 int bx=4*mx, by=4*my;
246 const int index= (my<<ME_MAP_SHIFT) + mx;
247 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
248 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
249 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
250 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
251 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
255 memset(best, 64, sizeof(int)*8);
256 if(s->me.dia_size>=2){
257 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
258 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
259 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
260 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
262 for(ny= -3; ny <= 3; ny++){
263 for(nx= -3; nx <= 3; nx++){
264 //FIXME this could overflow (unlikely though)
265 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
266 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
267 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
268 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
271 if((nx&3)==0 && (ny&3)==0) continue;
273 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
275 // if(nx&1) score-=1024*c->penalty_factor;
276 // if(ny&1) score-=1024*c->penalty_factor;
280 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
281 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
283 best_pos[i][0]= nx + 4*mx;
284 best_pos[i][1]= ny + 4*my;
292 //FIXME this could overflow (unlikely though)
293 const int cx = 4*(r - l);
294 const int cx2= r + l - 2*c;
295 const int cy = 4*(b - t);
296 const int cy2= b + t - 2*c;
299 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
300 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
302 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
305 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
307 assert(16*cx2 + 4*cx + 32*c == 32*r);
308 assert(16*cx2 - 4*cx + 32*c == 32*l);
309 assert(16*cy2 + 4*cy + 32*c == 32*b);
310 assert(16*cy2 - 4*cy + 32*c == 32*t);
311 assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
313 for(ny= -3; ny <= 3; ny++){
314 for(nx= -3; nx <= 3; nx++){
315 //FIXME this could overflow (unlikely though)
316 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
319 if((nx&3)==0 && (ny&3)==0) continue;
321 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
322 // if(nx&1) score-=32*c->penalty_factor;
323 // if(ny&1) score-=32*c->penalty_factor;
327 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
328 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
330 best_pos[i][0]= nx + 4*mx;
331 best_pos[i][1]= ny + 4*my;
338 for(i=0; i<subpel_quality; i++){
341 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
344 assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
357 #define CHECK_MV(x,y)\
359 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
360 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
361 assert((x) >= xmin);\
362 assert((x) <= xmax);\
363 assert((y) >= ymin);\
364 assert((y) <= ymax);\
365 /*printf("check_mv %d %d\n", x, y);*/\
366 if(map[index]!=key){\
367 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
369 score_map[index]= d;\
370 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
371 /*printf("score:%d\n", d);*/\
372 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
376 #define CHECK_CLIPPED_MV(ax,ay)\
380 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
381 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
385 #define CHECK_MV_DIR(x,y,new_dir)\
387 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
388 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
389 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
390 if(map[index]!=key){\
391 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
393 score_map[index]= d;\
394 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
395 /*printf("score:%d\n", d);*/\
405 #define check(x,y,S,v)\
406 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
407 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
408 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
409 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
411 #define LOAD_COMMON2\
412 uint32_t *map= c->map;\
413 const int qpel= flags&FLAG_QPEL;\
414 const int shift= 1+qpel;\
416 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
417 int src_index, int ref_index, int const penalty_factor,
418 int size, int h, int flags)
420 MotionEstContext * const c= &s->me;
421 me_cmp_func cmpf, chroma_cmpf;
425 unsigned map_generation = c->map_generation;
427 cmpf= s->dsp.me_cmp[size];
428 chroma_cmpf= s->dsp.me_cmp[size+1];
430 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
431 const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
432 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
433 if(map[index]!=key){ //this will be executed only very rarey
434 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
441 const int dir= next_dir;
442 const int x= best[0];
443 const int y= best[1];
447 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
448 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
449 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
450 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
458 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
459 int src_index, int ref_index, int const penalty_factor,
460 int size, int h, int flags)
462 MotionEstContext * const c= &s->me;
463 me_cmp_func cmpf, chroma_cmpf;
467 unsigned map_generation = c->map_generation;
469 cmpf= s->dsp.me_cmp[size];
470 chroma_cmpf= s->dsp.me_cmp[size+1];
472 for(dia_size=1; dia_size<=4; dia_size++){
474 const int x= best[0];
475 const int y= best[1];
477 if(dia_size&(dia_size-1)) continue;
479 if( x + dia_size > xmax
480 || x - dia_size < xmin
481 || y + dia_size > ymax
482 || y - dia_size < ymin)
485 for(dir= 0; dir<dia_size; dir+=2){
488 CHECK_MV(x + dir , y + dia_size - dir);
489 CHECK_MV(x + dia_size - dir, y - dir );
490 CHECK_MV(x - dir , y - dia_size + dir);
491 CHECK_MV(x - dia_size + dir, y + dir );
494 if(x!=best[0] || y!=best[1])
500 static int hex_search(MpegEncContext * s, int *best, int dmin,
501 int src_index, int ref_index, int const penalty_factor,
502 int size, int h, int flags, int dia_size)
504 MotionEstContext * const c= &s->me;
505 me_cmp_func cmpf, chroma_cmpf;
508 unsigned map_generation = c->map_generation;
510 const int dec= dia_size & (dia_size-1);
512 cmpf= s->dsp.me_cmp[size];
513 chroma_cmpf= s->dsp.me_cmp[size+1];
515 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
520 CHECK_CLIPPED_MV(x -dia_size , y);
521 CHECK_CLIPPED_MV(x+ dia_size , y);
522 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
523 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
525 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
526 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
528 }while(best[0] != x || best[1] != y);
534 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
535 int src_index, int ref_index, int const penalty_factor,
536 int size, int h, int flags)
538 MotionEstContext * const c= &s->me;
539 me_cmp_func cmpf, chroma_cmpf;
542 unsigned map_generation = c->map_generation;
544 int dia_size= c->dia_size&0xFF;
545 const int dec= dia_size & (dia_size-1);
546 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
547 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
549 cmpf= s->dsp.me_cmp[size];
550 chroma_cmpf= s->dsp.me_cmp[size+1];
552 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
557 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
559 }while(best[0] != x || best[1] != y);
564 CHECK_CLIPPED_MV(x+1, y);
565 CHECK_CLIPPED_MV(x, y+1);
566 CHECK_CLIPPED_MV(x-1, y);
567 CHECK_CLIPPED_MV(x, y-1);
572 static int umh_search(MpegEncContext * s, int *best, int dmin,
573 int src_index, int ref_index, int const penalty_factor,
574 int size, int h, int flags)
576 MotionEstContext * const c= &s->me;
577 me_cmp_func cmpf, chroma_cmpf;
580 unsigned map_generation = c->map_generation;
581 int x,y,x2,y2, i, j, d;
582 const int dia_size= c->dia_size&0xFE;
583 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
584 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
585 {-2, 3}, { 0, 4}, { 2, 3},
586 {-2,-3}, { 0,-4}, { 2,-3},};
588 cmpf= s->dsp.me_cmp[size];
589 chroma_cmpf= s->dsp.me_cmp[size+1];
593 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
596 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
602 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
603 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
608 //FIXME prevent the CLIP stuff
610 for(j=1; j<=dia_size/4; j++){
612 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
616 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
619 static int full_search(MpegEncContext * s, int *best, int dmin,
620 int src_index, int ref_index, int const penalty_factor,
621 int size, int h, int flags)
623 MotionEstContext * const c= &s->me;
624 me_cmp_func cmpf, chroma_cmpf;
627 unsigned map_generation = c->map_generation;
629 const int dia_size= c->dia_size&0xFF;
631 cmpf= s->dsp.me_cmp[size];
632 chroma_cmpf= s->dsp.me_cmp[size+1];
634 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
635 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
643 CHECK_CLIPPED_MV(x , y);
644 CHECK_CLIPPED_MV(x+1, y);
645 CHECK_CLIPPED_MV(x, y+1);
646 CHECK_CLIPPED_MV(x-1, y);
647 CHECK_CLIPPED_MV(x, y-1);
654 #define SAB_CHECK_MV(ax,ay)\
656 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
657 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
658 /*printf("sab check %d %d\n", ax, ay);*/\
659 if(map[index]!=key){\
660 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
662 score_map[index]= d;\
663 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
664 /*printf("score: %d\n", d);*/\
665 if(d < minima[minima_count-1].height){\
668 while(d >= minima[j].height) j++;\
670 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
672 minima[j].checked= 0;\
673 minima[j].height= d;\
683 #define MAX_SAB_SIZE ME_MAP_SIZE
684 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
685 int src_index, int ref_index, int const penalty_factor,
686 int size, int h, int flags)
688 MotionEstContext * const c= &s->me;
689 me_cmp_func cmpf, chroma_cmpf;
690 Minima minima[MAX_SAB_SIZE];
691 const int minima_count= FFABS(c->dia_size);
695 unsigned map_generation = c->map_generation;
697 cmpf= s->dsp.me_cmp[size];
698 chroma_cmpf= s->dsp.me_cmp[size+1];
700 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
701 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
703 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
704 uint32_t key= map[i];
706 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
708 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
710 minima[j].height= score_map[i];
711 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
712 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
713 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
714 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
716 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
717 if( minima[j].x > xmax || minima[j].x < xmin
718 || minima[j].y > ymax || minima[j].y < ymin)
722 if(minima[j].x || minima[j].y)
723 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
728 qsort(minima, j, sizeof(Minima), minima_cmp);
730 for(; j<minima_count; j++){
731 minima[j].height=256*256*256*64;
733 minima[j].x= minima[j].y=0;
736 for(i=0; i<minima_count; i++){
737 const int x= minima[i].x;
738 const int y= minima[i].y;
741 if(minima[i].checked) continue;
743 if( x >= xmax || x <= xmin
744 || y >= ymax || y <= ymin)
749 SAB_CHECK_MV(x , y-1)
750 SAB_CHECK_MV(x , y+1)
752 minima[i].checked= 1;
755 best[0]= minima[0].x;
756 best[1]= minima[0].y;
757 dmin= minima[0].height;
759 if( best[0] < xmax && best[0] > xmin
760 && best[1] < ymax && best[1] > ymin){
762 //ensure that the refernece samples for hpel refinement are in the map
763 CHECK_MV(best[0]-1, best[1])
764 CHECK_MV(best[0]+1, best[1])
765 CHECK_MV(best[0], best[1]-1)
766 CHECK_MV(best[0], best[1]+1)
771 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
772 int src_index, int ref_index, int const penalty_factor,
773 int size, int h, int flags)
775 MotionEstContext * const c= &s->me;
776 me_cmp_func cmpf, chroma_cmpf;
780 unsigned map_generation = c->map_generation;
782 cmpf= s->dsp.me_cmp[size];
783 chroma_cmpf= s->dsp.me_cmp[size+1];
785 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
787 const int x= best[0];
788 const int y= best[1];
790 start= FFMAX(0, y + dia_size - ymax);
791 end = FFMIN(dia_size, xmax - x + 1);
792 for(dir= start; dir<end; dir++){
795 //check(x + dir,y + dia_size - dir,0, a0)
796 CHECK_MV(x + dir , y + dia_size - dir);
799 start= FFMAX(0, x + dia_size - xmax);
800 end = FFMIN(dia_size, y - ymin + 1);
801 for(dir= start; dir<end; dir++){
804 //check(x + dia_size - dir, y - dir,0, a1)
805 CHECK_MV(x + dia_size - dir, y - dir );
808 start= FFMAX(0, -y + dia_size + ymin );
809 end = FFMIN(dia_size, x - xmin + 1);
810 for(dir= start; dir<end; dir++){
813 //check(x - dir,y - dia_size + dir,0, a2)
814 CHECK_MV(x - dir , y - dia_size + dir);
817 start= FFMAX(0, -x + dia_size + xmin );
818 end = FFMIN(dia_size, ymax - y + 1);
819 for(dir= start; dir<end; dir++){
822 //check(x - dia_size + dir, y + dir,0, a3)
823 CHECK_MV(x - dia_size + dir, y + dir );
826 if(x!=best[0] || y!=best[1])
832 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
833 int src_index, int ref_index, int const penalty_factor,
834 int size, int h, int flags){
835 MotionEstContext * const c= &s->me;
837 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838 else if(c->dia_size<-1)
839 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840 else if(c->dia_size<2)
841 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842 else if(c->dia_size>1024)
843 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844 else if(c->dia_size>768)
845 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
846 else if(c->dia_size>512)
847 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
848 else if(c->dia_size>256)
849 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
851 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
855 @param P a list of candidate mvs to check before starting the
856 iterative search. If one of the candidates is close to the optimal mv, then
857 it takes fewer iterations. And it increases the chance that we find the
860 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
861 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
862 int ref_mv_scale, int flags, int size, int h)
864 MotionEstContext * const c= &s->me;
865 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
866 i.e. the difference between the position of the
867 block currently being encoded and the position of
868 the block chosen to predict it from. */
869 int d; ///< the score (cmp + penalty) of any given mv
870 int dmin; /**< the best value of d, i.e. the score
871 corresponding to the mv stored in best[]. */
872 unsigned map_generation;
874 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
875 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
876 me_cmp_func cmpf, chroma_cmpf;
882 penalty_factor= c->pre_penalty_factor;
883 cmpf= s->dsp.me_pre_cmp[size];
884 chroma_cmpf= s->dsp.me_pre_cmp[size+1];
886 penalty_factor= c->penalty_factor;
887 cmpf= s->dsp.me_cmp[size];
888 chroma_cmpf= s->dsp.me_cmp[size+1];
891 map_generation= update_map_generation(c);
894 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
895 map[0]= map_generation;
898 //FIXME precalc first term below?
899 if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
900 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
903 if (s->first_slice_line) {
904 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
905 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
906 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
908 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
909 && ( P_LEFT[0] |P_LEFT[1]
911 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
917 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
918 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
919 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
920 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
921 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
922 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
923 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
924 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
925 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
926 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
930 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
931 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
932 if(!s->first_slice_line)
933 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
934 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
936 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
937 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
938 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
939 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
940 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
944 if(c->avctx->last_predictor_count){
945 const int count= c->avctx->last_predictor_count;
946 const int xstart= FFMAX(0, s->mb_x - count);
947 const int ystart= FFMAX(0, s->mb_y - count);
948 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
949 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
952 for(mb_y=ystart; mb_y<yend; mb_y++){
954 for(mb_x=xstart; mb_x<xend; mb_x++){
955 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
956 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
957 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
959 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
965 //check(best[0],best[1],0, b0)
966 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
968 //check(best[0],best[1],0, b1)
972 // printf("%d %d %d \n", best[0], best[1], dmin);
976 //this function is dedicated to the braindamaged gcc
977 inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
978 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
979 int ref_mv_scale, int size, int h)
981 MotionEstContext * const c= &s->me;
982 //FIXME convert other functions in the same way if faster
983 if(c->flags==0 && h==16 && size==0){
984 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
986 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
988 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
992 static int epzs_motion_search4(MpegEncContext * s,
993 int *mx_ptr, int *my_ptr, int P[10][2],
994 int src_index, int ref_index, int16_t (*last_mv)[2],
997 MotionEstContext * const c= &s->me;
1000 unsigned map_generation;
1001 const int penalty_factor= c->penalty_factor;
1004 const int ref_mv_stride= s->mb_stride;
1005 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1006 me_cmp_func cmpf, chroma_cmpf;
1008 int flags= c->flags;
1011 cmpf= s->dsp.me_cmp[size];
1012 chroma_cmpf= s->dsp.me_cmp[size+1];
1014 map_generation= update_map_generation(c);
1017 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1019 if (s->first_slice_line) {
1020 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1021 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1022 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1023 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1025 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1026 //FIXME try some early stop
1027 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1028 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1029 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1030 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1031 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1032 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1035 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1036 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1037 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1038 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1039 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1042 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1047 // printf("%d %d %d \n", best[0], best[1], dmin);
1051 //try to merge with above FIXME (needs PSNR test)
1052 static int epzs_motion_search2(MpegEncContext * s,
1053 int *mx_ptr, int *my_ptr, int P[10][2],
1054 int src_index, int ref_index, int16_t (*last_mv)[2],
1057 MotionEstContext * const c= &s->me;
1060 unsigned map_generation;
1061 const int penalty_factor= c->penalty_factor;
1062 const int size=0; //FIXME pass as arg
1064 const int ref_mv_stride= s->mb_stride;
1065 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1066 me_cmp_func cmpf, chroma_cmpf;
1068 int flags= c->flags;
1071 cmpf= s->dsp.me_cmp[size];
1072 chroma_cmpf= s->dsp.me_cmp[size+1];
1074 map_generation= update_map_generation(c);
1077 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1079 if (s->first_slice_line) {
1080 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1081 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1082 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1083 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1085 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1086 //FIXME try some early stop
1087 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1088 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1089 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1090 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1091 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1092 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1095 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1096 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1097 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1098 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1099 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1102 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1107 // printf("%d %d %d \n", best[0], best[1], dmin);