3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Motion estimation template.
27 #include "mpegvideo.h"
29 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
31 uint32_t av_unused * const score_map= c->score_map;\
32 const int av_unused xmin= c->xmin;\
33 const int av_unused ymin= c->ymin;\
34 const int av_unused xmax= c->xmax;\
35 const int av_unused ymax= c->ymax;\
36 uint8_t *mv_penalty= c->current_mv_penalty;\
37 const int pred_x= c->pred_x;\
38 const int pred_y= c->pred_y;\
40 #define CHECK_HALF_MV(dx, dy, x, y)\
42 const int hx= 2*(x)+(dx);\
43 const int hy= 2*(y)+(dy);\
44 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
45 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
46 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
49 static int hpel_motion_search(MpegEncContext * s,
50 int *mx_ptr, int *my_ptr, int dmin,
51 int src_index, int ref_index,
54 MotionEstContext * const c= &s->me;
55 const int mx = *mx_ptr;
56 const int my = *my_ptr;
57 const int penalty_factor= c->sub_penalty_factor;
58 me_cmp_func cmp_sub, chroma_cmp_sub;
62 int flags= c->sub_flags;
66 cmp_sub= s->dsp.me_sub_cmp[size];
67 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
69 if(c->skip){ //FIXME move out of hpel?
75 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
76 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
77 if(mx || my || size>0)
78 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
81 if (mx > xmin && mx < xmax &&
82 my > ymin && my < ymax) {
84 const int index= (my<<ME_MAP_SHIFT) + mx;
85 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
86 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
87 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
88 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
89 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
90 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
91 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
92 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
94 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
96 unsigned map_generation= c->map_generation;
97 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
98 av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
99 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
100 av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
101 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
102 av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
103 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
104 av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
107 CHECK_HALF_MV(0, 1, mx ,my-1)
109 CHECK_HALF_MV(1, 1, mx-1, my-1)
111 CHECK_HALF_MV(1, 1, mx , my-1)
113 CHECK_HALF_MV(1, 1, mx-1, my )
115 CHECK_HALF_MV(1, 0, mx-1, my )
117 CHECK_HALF_MV(1, 1, mx , my-1)
119 CHECK_HALF_MV(1, 1, mx-1, my-1)
121 CHECK_HALF_MV(1, 1, mx , my )
123 CHECK_HALF_MV(1, 0, mx , my )
128 CHECK_HALF_MV(1, 1, mx-1, my-1)
130 CHECK_HALF_MV(1, 1, mx , my )
132 CHECK_HALF_MV(1, 0, mx-1, my)
133 CHECK_HALF_MV(1, 1, mx-1, my)
136 CHECK_HALF_MV(1, 1, mx , my-1)
138 CHECK_HALF_MV(1, 1, mx-1, my)
140 CHECK_HALF_MV(1, 0, mx , my)
141 CHECK_HALF_MV(1, 1, mx , my)
143 CHECK_HALF_MV(0, 1, mx , my)
145 av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
154 static int no_sub_motion_search(MpegEncContext * s,
155 int *mx_ptr, int *my_ptr, int dmin,
156 int src_index, int ref_index,
164 static inline int get_mb_score(MpegEncContext *s, int mx, int my,
165 int src_index, int ref_index, int size,
168 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
169 MotionEstContext * const c= &s->me;
170 const int penalty_factor= c->mb_penalty_factor;
171 const int flags= c->mb_flags;
172 const int qpel= flags & FLAG_QPEL;
173 const int mask= 1+2*qpel;
174 me_cmp_func cmp_sub, chroma_cmp_sub;
181 cmp_sub= s->dsp.mb_cmp[size];
182 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
184 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
185 //FIXME check cbp before adding penalty for (0,0) vector
186 if(add_rate && (mx || my || size>0))
187 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
192 int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
193 int ref_index, int size, int h, int add_rate)
195 return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
198 #define CHECK_QUARTER_MV(dx, dy, x, y)\
200 const int hx= 4*(x)+(dx);\
201 const int hy= 4*(y)+(dy);\
202 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
203 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
204 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
207 static int qpel_motion_search(MpegEncContext * s,
208 int *mx_ptr, int *my_ptr, int dmin,
209 int src_index, int ref_index,
212 MotionEstContext * const c= &s->me;
213 const int mx = *mx_ptr;
214 const int my = *my_ptr;
215 const int penalty_factor= c->sub_penalty_factor;
216 const unsigned map_generation = c->map_generation;
217 const int subpel_quality= c->avctx->me_subpel_quality;
218 uint32_t *map= c->map;
219 me_cmp_func cmpf, chroma_cmpf;
220 me_cmp_func cmp_sub, chroma_cmp_sub;
223 int flags= c->sub_flags;
225 cmpf= s->dsp.me_cmp[size];
226 chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
229 cmp_sub= s->dsp.me_sub_cmp[size];
230 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
232 if(c->skip){ //FIXME somehow move up (benchmark)
238 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
239 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
240 if(mx || my || size>0)
241 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
244 if (mx > xmin && mx < xmax &&
245 my > ymin && my < ymax) {
246 int bx=4*mx, by=4*my;
249 const int index= (my<<ME_MAP_SHIFT) + mx;
250 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
251 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
252 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
253 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
254 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
258 memset(best, 64, sizeof(int)*8);
259 if(s->me.dia_size>=2){
260 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
261 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
262 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
263 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
265 for(ny= -3; ny <= 3; ny++){
266 for(nx= -3; nx <= 3; nx++){
267 //FIXME this could overflow (unlikely though)
268 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
269 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
270 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
271 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
274 if((nx&3)==0 && (ny&3)==0) continue;
276 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
278 // if(nx&1) score-=1024*c->penalty_factor;
279 // if(ny&1) score-=1024*c->penalty_factor;
283 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
284 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
286 best_pos[i][0]= nx + 4*mx;
287 best_pos[i][1]= ny + 4*my;
295 //FIXME this could overflow (unlikely though)
296 const int cx = 4*(r - l);
297 const int cx2= r + l - 2*c;
298 const int cy = 4*(b - t);
299 const int cy2= b + t - 2*c;
302 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
303 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
305 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
308 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
310 av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
311 av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
312 av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
313 av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
314 av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
316 for(ny= -3; ny <= 3; ny++){
317 for(nx= -3; nx <= 3; nx++){
318 //FIXME this could overflow (unlikely though)
319 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
322 if((nx&3)==0 && (ny&3)==0) continue;
324 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
325 // if(nx&1) score-=32*c->penalty_factor;
326 // if(ny&1) score-=32*c->penalty_factor;
330 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
331 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
333 best_pos[i][0]= nx + 4*mx;
334 best_pos[i][1]= ny + 4*my;
341 for(i=0; i<subpel_quality; i++){
344 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
347 av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
360 #define CHECK_MV(x,y)\
362 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
363 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
364 av_assert2((x) >= xmin);\
365 av_assert2((x) <= xmax);\
366 av_assert2((y) >= ymin);\
367 av_assert2((y) <= ymax);\
368 if(map[index]!=key){\
369 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
371 score_map[index]= d;\
372 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
373 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
377 #define CHECK_CLIPPED_MV(ax,ay)\
381 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
382 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
386 #define CHECK_MV_DIR(x,y,new_dir)\
388 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
389 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
390 if(map[index]!=key){\
391 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
393 score_map[index]= d;\
394 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
404 #define check(x,y,S,v)\
405 if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
406 if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
407 if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
408 if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
410 #define LOAD_COMMON2\
411 uint32_t *map= c->map;\
412 const int qpel= flags&FLAG_QPEL;\
413 const int shift= 1+qpel;\
415 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
416 int src_index, int ref_index, int const penalty_factor,
417 int size, int h, int flags)
419 MotionEstContext * const c= &s->me;
420 me_cmp_func cmpf, chroma_cmpf;
424 unsigned map_generation = c->map_generation;
426 cmpf= s->dsp.me_cmp[size];
427 chroma_cmpf= s->dsp.me_cmp[size+1];
429 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
430 const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
431 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
432 if(map[index]!=key){ //this will be executed only very rarey
433 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
440 const int dir= next_dir;
441 const int x= best[0];
442 const int y= best[1];
445 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
446 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
447 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
448 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
456 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
457 int src_index, int ref_index, int const penalty_factor,
458 int size, int h, int flags)
460 MotionEstContext * const c= &s->me;
461 me_cmp_func cmpf, chroma_cmpf;
465 unsigned map_generation = c->map_generation;
467 cmpf= s->dsp.me_cmp[size];
468 chroma_cmpf= s->dsp.me_cmp[size+1];
470 for(dia_size=1; dia_size<=4; dia_size++){
472 const int x= best[0];
473 const int y= best[1];
475 if(dia_size&(dia_size-1)) continue;
477 if( x + dia_size > xmax
478 || x - dia_size < xmin
479 || y + dia_size > ymax
480 || y - dia_size < ymin)
483 for(dir= 0; dir<dia_size; dir+=2){
486 CHECK_MV(x + dir , y + dia_size - dir);
487 CHECK_MV(x + dia_size - dir, y - dir );
488 CHECK_MV(x - dir , y - dia_size + dir);
489 CHECK_MV(x - dia_size + dir, y + dir );
492 if(x!=best[0] || y!=best[1])
498 static int hex_search(MpegEncContext * s, int *best, int dmin,
499 int src_index, int ref_index, int const penalty_factor,
500 int size, int h, int flags, int dia_size)
502 MotionEstContext * const c= &s->me;
503 me_cmp_func cmpf, chroma_cmpf;
506 unsigned map_generation = c->map_generation;
508 const int dec= dia_size & (dia_size-1);
510 cmpf= s->dsp.me_cmp[size];
511 chroma_cmpf= s->dsp.me_cmp[size+1];
513 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
518 CHECK_CLIPPED_MV(x -dia_size , y);
519 CHECK_CLIPPED_MV(x+ dia_size , y);
520 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
521 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
523 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
524 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
526 }while(best[0] != x || best[1] != y);
532 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
533 int src_index, int ref_index, int const penalty_factor,
534 int size, int h, int flags)
536 MotionEstContext * const c= &s->me;
537 me_cmp_func cmpf, chroma_cmpf;
540 unsigned map_generation = c->map_generation;
542 int dia_size= c->dia_size&0xFF;
543 const int dec= dia_size & (dia_size-1);
544 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
545 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
547 cmpf= s->dsp.me_cmp[size];
548 chroma_cmpf= s->dsp.me_cmp[size+1];
550 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
555 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
557 }while(best[0] != x || best[1] != y);
562 CHECK_CLIPPED_MV(x+1, y);
563 CHECK_CLIPPED_MV(x, y+1);
564 CHECK_CLIPPED_MV(x-1, y);
565 CHECK_CLIPPED_MV(x, y-1);
570 static int umh_search(MpegEncContext * s, int *best, int dmin,
571 int src_index, int ref_index, int const penalty_factor,
572 int size, int h, int flags)
574 MotionEstContext * const c= &s->me;
575 me_cmp_func cmpf, chroma_cmpf;
578 unsigned map_generation = c->map_generation;
579 int x,y,x2,y2, i, j, d;
580 const int dia_size= c->dia_size&0xFE;
581 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
582 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
583 {-2, 3}, { 0, 4}, { 2, 3},
584 {-2,-3}, { 0,-4}, { 2,-3},};
586 cmpf= s->dsp.me_cmp[size];
587 chroma_cmpf= s->dsp.me_cmp[size+1];
591 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
594 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
600 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
601 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
606 //FIXME prevent the CLIP stuff
608 for(j=1; j<=dia_size/4; j++){
610 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
614 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
617 static int full_search(MpegEncContext * s, int *best, int dmin,
618 int src_index, int ref_index, int const penalty_factor,
619 int size, int h, int flags)
621 MotionEstContext * const c= &s->me;
622 me_cmp_func cmpf, chroma_cmpf;
625 unsigned map_generation = c->map_generation;
627 const int dia_size= c->dia_size&0xFF;
629 cmpf= s->dsp.me_cmp[size];
630 chroma_cmpf= s->dsp.me_cmp[size+1];
632 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
633 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
641 CHECK_CLIPPED_MV(x , y);
642 CHECK_CLIPPED_MV(x+1, y);
643 CHECK_CLIPPED_MV(x, y+1);
644 CHECK_CLIPPED_MV(x-1, y);
645 CHECK_CLIPPED_MV(x, y-1);
652 #define SAB_CHECK_MV(ax,ay)\
654 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
655 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
656 if(map[index]!=key){\
657 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
659 score_map[index]= d;\
660 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
661 if(d < minima[minima_count-1].height){\
664 while(d >= minima[j].height) j++;\
666 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
668 minima[j].checked= 0;\
669 minima[j].height= d;\
679 #define MAX_SAB_SIZE ME_MAP_SIZE
680 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
681 int src_index, int ref_index, int const penalty_factor,
682 int size, int h, int flags)
684 MotionEstContext * const c= &s->me;
685 me_cmp_func cmpf, chroma_cmpf;
686 Minima minima[MAX_SAB_SIZE];
687 const int minima_count= FFABS(c->dia_size);
691 unsigned map_generation = c->map_generation;
693 av_assert1(minima_count <= MAX_SAB_SIZE);
695 cmpf= s->dsp.me_cmp[size];
696 chroma_cmpf= s->dsp.me_cmp[size+1];
698 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
699 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
701 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
702 uint32_t key= map[i];
704 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
706 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
708 minima[j].height= score_map[i];
709 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
710 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
711 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
712 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
714 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
715 if( minima[j].x > xmax || minima[j].x < xmin
716 || minima[j].y > ymax || minima[j].y < ymin)
720 if(minima[j].x || minima[j].y)
721 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
726 qsort(minima, j, sizeof(Minima), minima_cmp);
728 for(; j<minima_count; j++){
729 minima[j].height=256*256*256*64;
731 minima[j].x= minima[j].y=0;
734 for(i=0; i<minima_count; i++){
735 const int x= minima[i].x;
736 const int y= minima[i].y;
739 if(minima[i].checked) continue;
741 if( x >= xmax || x <= xmin
742 || y >= ymax || y <= ymin)
747 SAB_CHECK_MV(x , y-1)
748 SAB_CHECK_MV(x , y+1)
750 minima[i].checked= 1;
753 best[0]= minima[0].x;
754 best[1]= minima[0].y;
755 dmin= minima[0].height;
757 if( best[0] < xmax && best[0] > xmin
758 && best[1] < ymax && best[1] > ymin){
760 //ensure that the refernece samples for hpel refinement are in the map
761 CHECK_MV(best[0]-1, best[1])
762 CHECK_MV(best[0]+1, best[1])
763 CHECK_MV(best[0], best[1]-1)
764 CHECK_MV(best[0], best[1]+1)
769 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
770 int src_index, int ref_index, int const penalty_factor,
771 int size, int h, int flags)
773 MotionEstContext * const c= &s->me;
774 me_cmp_func cmpf, chroma_cmpf;
778 unsigned map_generation = c->map_generation;
780 cmpf= s->dsp.me_cmp[size];
781 chroma_cmpf= s->dsp.me_cmp[size+1];
783 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
785 const int x= best[0];
786 const int y= best[1];
788 start= FFMAX(0, y + dia_size - ymax);
789 end = FFMIN(dia_size, xmax - x + 1);
790 for(dir= start; dir<end; dir++){
793 //check(x + dir,y + dia_size - dir,0, a0)
794 CHECK_MV(x + dir , y + dia_size - dir);
797 start= FFMAX(0, x + dia_size - xmax);
798 end = FFMIN(dia_size, y - ymin + 1);
799 for(dir= start; dir<end; dir++){
802 //check(x + dia_size - dir, y - dir,0, a1)
803 CHECK_MV(x + dia_size - dir, y - dir );
806 start= FFMAX(0, -y + dia_size + ymin );
807 end = FFMIN(dia_size, x - xmin + 1);
808 for(dir= start; dir<end; dir++){
811 //check(x - dir,y - dia_size + dir,0, a2)
812 CHECK_MV(x - dir , y - dia_size + dir);
815 start= FFMAX(0, -x + dia_size + xmin );
816 end = FFMIN(dia_size, ymax - y + 1);
817 for(dir= start; dir<end; dir++){
820 //check(x - dia_size + dir, y + dir,0, a3)
821 CHECK_MV(x - dia_size + dir, y + dir );
824 if(x!=best[0] || y!=best[1])
830 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
831 int src_index, int ref_index, int const penalty_factor,
832 int size, int h, int flags){
833 MotionEstContext * const c= &s->me;
835 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
836 else if(c->dia_size<-1)
837 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838 else if(c->dia_size<2)
839 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840 else if(c->dia_size>1024)
841 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842 else if(c->dia_size>768)
843 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844 else if(c->dia_size>512)
845 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
846 else if(c->dia_size>256)
847 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
849 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
853 @param P a list of candidate mvs to check before starting the
854 iterative search. If one of the candidates is close to the optimal mv, then
855 it takes fewer iterations. And it increases the chance that we find the
858 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
859 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
860 int ref_mv_scale, int flags, int size, int h)
862 MotionEstContext * const c= &s->me;
863 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
864 i.e. the difference between the position of the
865 block currently being encoded and the position of
866 the block chosen to predict it from. */
867 int d; ///< the score (cmp + penalty) of any given mv
868 int dmin; /**< the best value of d, i.e. the score
869 corresponding to the mv stored in best[]. */
870 unsigned map_generation;
872 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
873 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
874 me_cmp_func cmpf, chroma_cmpf;
880 penalty_factor= c->pre_penalty_factor;
881 cmpf= s->dsp.me_pre_cmp[size];
882 chroma_cmpf= s->dsp.me_pre_cmp[size+1];
884 penalty_factor= c->penalty_factor;
885 cmpf= s->dsp.me_cmp[size];
886 chroma_cmpf= s->dsp.me_cmp[size+1];
889 map_generation= update_map_generation(c);
892 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
893 map[0]= map_generation;
896 //FIXME precalc first term below?
897 if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) ||
898 s->mpv_flags & FF_MPV_FLAG_MV0)
899 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
902 if (s->first_slice_line) {
903 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
904 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
905 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
907 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
908 && ( P_LEFT[0] |P_LEFT[1]
910 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
916 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
917 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
918 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
919 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
920 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
921 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
922 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
923 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
924 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
925 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
929 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
930 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
931 if(!s->first_slice_line)
932 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
933 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
935 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
936 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
937 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
938 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
939 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
943 if(c->avctx->last_predictor_count){
944 const int count= c->avctx->last_predictor_count;
945 const int xstart= FFMAX(0, s->mb_x - count);
946 const int ystart= FFMAX(0, s->mb_y - count);
947 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
948 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
951 for(mb_y=ystart; mb_y<yend; mb_y++){
953 for(mb_x=xstart; mb_x<xend; mb_x++){
954 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
955 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
956 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
958 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
964 //check(best[0],best[1],0, b0)
965 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
967 //check(best[0],best[1],0, b1)
974 //this function is dedicated to the braindamaged gcc
975 int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
976 int P[10][2], int src_index, int ref_index,
977 int16_t (*last_mv)[2], int ref_mv_scale,
980 MotionEstContext * const c= &s->me;
981 //FIXME convert other functions in the same way if faster
982 if(c->flags==0 && h==16 && size==0){
983 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
985 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
987 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
991 static int epzs_motion_search4(MpegEncContext * s,
992 int *mx_ptr, int *my_ptr, int P[10][2],
993 int src_index, int ref_index, int16_t (*last_mv)[2],
996 MotionEstContext * const c= &s->me;
999 unsigned map_generation;
1000 const int penalty_factor= c->penalty_factor;
1003 const int ref_mv_stride= s->mb_stride;
1004 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1005 me_cmp_func cmpf, chroma_cmpf;
1007 int flags= c->flags;
1010 cmpf= s->dsp.me_cmp[size];
1011 chroma_cmpf= s->dsp.me_cmp[size+1];
1013 map_generation= update_map_generation(c);
1018 if (s->first_slice_line) {
1019 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1020 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1021 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1022 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1024 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1025 //FIXME try some early stop
1026 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1027 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1028 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1029 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1030 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1031 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1034 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1035 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1036 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1037 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1038 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1041 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1049 //try to merge with above FIXME (needs PSNR test)
1050 static int epzs_motion_search2(MpegEncContext * s,
1051 int *mx_ptr, int *my_ptr, int P[10][2],
1052 int src_index, int ref_index, int16_t (*last_mv)[2],
1055 MotionEstContext * const c= &s->me;
1058 unsigned map_generation;
1059 const int penalty_factor= c->penalty_factor;
1060 const int size=0; //FIXME pass as arg
1062 const int ref_mv_stride= s->mb_stride;
1063 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1064 me_cmp_func cmpf, chroma_cmpf;
1066 int flags= c->flags;
1069 cmpf= s->dsp.me_cmp[size];
1070 chroma_cmpf= s->dsp.me_cmp[size+1];
1072 map_generation= update_map_generation(c);
1077 if (s->first_slice_line) {
1078 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1079 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1080 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1081 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1083 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1084 //FIXME try some early stop
1085 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1086 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1087 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1088 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1089 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1090 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1093 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1094 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1095 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1096 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1097 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1100 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);