3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Motion estimation template.
27 #include "mpegvideo.h"
29 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
31 uint32_t av_unused * const score_map= c->score_map;\
32 const int av_unused xmin= c->xmin;\
33 const int av_unused ymin= c->ymin;\
34 const int av_unused xmax= c->xmax;\
35 const int av_unused ymax= c->ymax;\
36 uint8_t *mv_penalty= c->current_mv_penalty;\
37 const int pred_x= c->pred_x;\
38 const int pred_y= c->pred_y;\
40 #define CHECK_HALF_MV(dx, dy, x, y)\
42 const int hx= 2*(x)+(dx);\
43 const int hy= 2*(y)+(dy);\
44 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
45 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
46 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
49 static int hpel_motion_search(MpegEncContext * s,
50 int *mx_ptr, int *my_ptr, int dmin,
51 int src_index, int ref_index,
54 MotionEstContext * const c= &s->me;
55 const int mx = *mx_ptr;
56 const int my = *my_ptr;
57 const int penalty_factor= c->sub_penalty_factor;
58 me_cmp_func cmp_sub, chroma_cmp_sub;
62 int flags= c->sub_flags;
66 cmp_sub = s->mecc.me_sub_cmp[size];
67 chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
69 if(c->skip){ //FIXME move out of hpel?
75 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
76 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
77 if(mx || my || size>0)
78 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
81 if (mx > xmin && mx < xmax &&
82 my > ymin && my < ymax) {
84 const int index= (my<<ME_MAP_SHIFT) + mx;
85 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
86 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
87 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
88 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
89 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
90 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
91 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
92 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
94 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
96 unsigned map_generation= c->map_generation;
97 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
98 av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
99 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
100 av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
101 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
102 av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
103 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
104 av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
107 CHECK_HALF_MV(0, 1, mx ,my-1)
109 CHECK_HALF_MV(1, 1, mx-1, my-1)
111 CHECK_HALF_MV(1, 1, mx , my-1)
113 CHECK_HALF_MV(1, 1, mx-1, my )
115 CHECK_HALF_MV(1, 0, mx-1, my )
117 CHECK_HALF_MV(1, 1, mx , my-1)
119 CHECK_HALF_MV(1, 1, mx-1, my-1)
121 CHECK_HALF_MV(1, 1, mx , my )
123 CHECK_HALF_MV(1, 0, mx , my )
128 CHECK_HALF_MV(1, 1, mx-1, my-1)
130 CHECK_HALF_MV(1, 1, mx , my )
132 CHECK_HALF_MV(1, 0, mx-1, my)
133 CHECK_HALF_MV(1, 1, mx-1, my)
136 CHECK_HALF_MV(1, 1, mx , my-1)
138 CHECK_HALF_MV(1, 1, mx-1, my)
140 CHECK_HALF_MV(1, 0, mx , my)
141 CHECK_HALF_MV(1, 1, mx , my)
143 CHECK_HALF_MV(0, 1, mx , my)
145 av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
154 static int no_sub_motion_search(MpegEncContext * s,
155 int *mx_ptr, int *my_ptr, int dmin,
156 int src_index, int ref_index,
164 static inline int get_mb_score(MpegEncContext *s, int mx, int my,
165 int src_index, int ref_index, int size,
168 MotionEstContext * const c= &s->me;
169 const int penalty_factor= c->mb_penalty_factor;
170 const int flags= c->mb_flags;
171 const int qpel= flags & FLAG_QPEL;
172 const int mask= 1+2*qpel;
173 me_cmp_func cmp_sub, chroma_cmp_sub;
180 cmp_sub = s->mecc.mb_cmp[size];
181 chroma_cmp_sub = s->mecc.mb_cmp[size + 1];
183 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
184 //FIXME check cbp before adding penalty for (0,0) vector
185 if(add_rate && (mx || my || size>0))
186 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
191 int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
192 int ref_index, int size, int h, int add_rate)
194 return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
197 #define CHECK_QUARTER_MV(dx, dy, x, y)\
199 const int hx= 4*(x)+(dx);\
200 const int hy= 4*(y)+(dy);\
201 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
202 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
203 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
206 static int qpel_motion_search(MpegEncContext * s,
207 int *mx_ptr, int *my_ptr, int dmin,
208 int src_index, int ref_index,
211 MotionEstContext * const c= &s->me;
212 const int mx = *mx_ptr;
213 const int my = *my_ptr;
214 const int penalty_factor= c->sub_penalty_factor;
215 const unsigned map_generation = c->map_generation;
216 const int subpel_quality= c->avctx->me_subpel_quality;
217 uint32_t *map= c->map;
218 me_cmp_func cmpf, chroma_cmpf;
219 me_cmp_func cmp_sub, chroma_cmp_sub;
222 int flags= c->sub_flags;
224 cmpf = s->mecc.me_cmp[size];
225 chroma_cmpf = s->mecc.me_cmp[size + 1]; // FIXME: factorize
228 cmp_sub = s->mecc.me_sub_cmp[size];
229 chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
231 if(c->skip){ //FIXME somehow move up (benchmark)
237 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
238 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
239 if(mx || my || size>0)
240 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
243 if (mx > xmin && mx < xmax &&
244 my > ymin && my < ymax) {
245 int bx=4*mx, by=4*my;
248 const int index= (my<<ME_MAP_SHIFT) + mx;
249 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
250 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
251 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
252 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
253 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
257 memset(best, 64, sizeof(int)*8);
258 if(s->me.dia_size>=2){
259 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
260 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
261 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
262 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
264 for(ny= -3; ny <= 3; ny++){
265 for(nx= -3; nx <= 3; nx++){
266 //FIXME this could overflow (unlikely though)
267 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
268 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
269 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
270 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
273 if((nx&3)==0 && (ny&3)==0) continue;
275 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
277 // if(nx&1) score-=1024*c->penalty_factor;
278 // if(ny&1) score-=1024*c->penalty_factor;
282 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
283 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
285 best_pos[i][0]= nx + 4*mx;
286 best_pos[i][1]= ny + 4*my;
294 //FIXME this could overflow (unlikely though)
295 const int cx = 4*(r - l);
296 const int cx2= r + l - 2*c;
297 const int cy = 4*(b - t);
298 const int cy2= b + t - 2*c;
301 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
302 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
304 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
307 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
309 av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
310 av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
311 av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
312 av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
313 av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
315 for(ny= -3; ny <= 3; ny++){
316 for(nx= -3; nx <= 3; nx++){
317 //FIXME this could overflow (unlikely though)
318 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
321 if((nx&3)==0 && (ny&3)==0) continue;
323 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
324 // if(nx&1) score-=32*c->penalty_factor;
325 // if(ny&1) score-=32*c->penalty_factor;
329 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
330 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
332 best_pos[i][0]= nx + 4*mx;
333 best_pos[i][1]= ny + 4*my;
340 for(i=0; i<subpel_quality; i++){
343 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
346 av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
359 #define CHECK_MV(x,y)\
361 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
362 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
363 av_assert2((x) >= xmin);\
364 av_assert2((x) <= xmax);\
365 av_assert2((y) >= ymin);\
366 av_assert2((y) <= ymax);\
367 if(map[index]!=key){\
368 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
370 score_map[index]= d;\
371 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
372 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
376 #define CHECK_CLIPPED_MV(ax,ay)\
380 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
381 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
385 #define CHECK_MV_DIR(x,y,new_dir)\
387 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
388 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
389 if(map[index]!=key){\
390 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
392 score_map[index]= d;\
393 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
403 #define check(x,y,S,v)\
404 if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
405 if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
406 if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
407 if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
409 #define LOAD_COMMON2\
410 uint32_t *map= c->map;\
411 const int qpel= flags&FLAG_QPEL;\
412 const int shift= 1+qpel;\
414 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
415 int src_index, int ref_index, int const penalty_factor,
416 int size, int h, int flags)
418 MotionEstContext * const c= &s->me;
419 me_cmp_func cmpf, chroma_cmpf;
423 unsigned map_generation = c->map_generation;
425 cmpf = s->mecc.me_cmp[size];
426 chroma_cmpf = s->mecc.me_cmp[size + 1];
428 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
429 const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
430 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
431 if(map[index]!=key){ //this will be executed only very rarey
432 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
439 const int dir= next_dir;
440 const int x= best[0];
441 const int y= best[1];
444 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
445 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
446 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
447 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
455 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
456 int src_index, int ref_index, int const penalty_factor,
457 int size, int h, int flags)
459 MotionEstContext * const c= &s->me;
460 me_cmp_func cmpf, chroma_cmpf;
464 unsigned map_generation = c->map_generation;
466 cmpf = s->mecc.me_cmp[size];
467 chroma_cmpf = s->mecc.me_cmp[size + 1];
469 for(dia_size=1; dia_size<=4; dia_size++){
471 const int x= best[0];
472 const int y= best[1];
474 if(dia_size&(dia_size-1)) continue;
476 if( x + dia_size > xmax
477 || x - dia_size < xmin
478 || y + dia_size > ymax
479 || y - dia_size < ymin)
482 for(dir= 0; dir<dia_size; dir+=2){
485 CHECK_MV(x + dir , y + dia_size - dir);
486 CHECK_MV(x + dia_size - dir, y - dir );
487 CHECK_MV(x - dir , y - dia_size + dir);
488 CHECK_MV(x - dia_size + dir, y + dir );
491 if(x!=best[0] || y!=best[1])
497 static int hex_search(MpegEncContext * s, int *best, int dmin,
498 int src_index, int ref_index, int const penalty_factor,
499 int size, int h, int flags, int dia_size)
501 MotionEstContext * const c= &s->me;
502 me_cmp_func cmpf, chroma_cmpf;
505 unsigned map_generation = c->map_generation;
507 const int dec= dia_size & (dia_size-1);
509 cmpf = s->mecc.me_cmp[size];
510 chroma_cmpf = s->mecc.me_cmp[size + 1];
512 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
517 CHECK_CLIPPED_MV(x -dia_size , y);
518 CHECK_CLIPPED_MV(x+ dia_size , y);
519 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
520 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
522 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
523 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
525 }while(best[0] != x || best[1] != y);
531 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
532 int src_index, int ref_index, int const penalty_factor,
533 int size, int h, int flags)
535 MotionEstContext * const c= &s->me;
536 me_cmp_func cmpf, chroma_cmpf;
539 unsigned map_generation = c->map_generation;
541 int dia_size= c->dia_size&0xFF;
542 const int dec= dia_size & (dia_size-1);
543 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
544 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
546 cmpf = s->mecc.me_cmp[size];
547 chroma_cmpf = s->mecc.me_cmp[size + 1];
549 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
554 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
556 }while(best[0] != x || best[1] != y);
561 CHECK_CLIPPED_MV(x+1, y);
562 CHECK_CLIPPED_MV(x, y+1);
563 CHECK_CLIPPED_MV(x-1, y);
564 CHECK_CLIPPED_MV(x, y-1);
569 static int umh_search(MpegEncContext * s, int *best, int dmin,
570 int src_index, int ref_index, int const penalty_factor,
571 int size, int h, int flags)
573 MotionEstContext * const c= &s->me;
574 me_cmp_func cmpf, chroma_cmpf;
577 unsigned map_generation = c->map_generation;
578 int x,y,x2,y2, i, j, d;
579 const int dia_size= c->dia_size&0xFE;
580 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
581 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
582 {-2, 3}, { 0, 4}, { 2, 3},
583 {-2,-3}, { 0,-4}, { 2,-3},};
585 cmpf = s->mecc.me_cmp[size];
586 chroma_cmpf = s->mecc.me_cmp[size + 1];
590 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
593 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
599 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
600 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
605 //FIXME prevent the CLIP stuff
607 for(j=1; j<=dia_size/4; j++){
609 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
613 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
616 static int full_search(MpegEncContext * s, int *best, int dmin,
617 int src_index, int ref_index, int const penalty_factor,
618 int size, int h, int flags)
620 MotionEstContext * const c= &s->me;
621 me_cmp_func cmpf, chroma_cmpf;
624 unsigned map_generation = c->map_generation;
626 const int dia_size= c->dia_size&0xFF;
628 cmpf = s->mecc.me_cmp[size];
629 chroma_cmpf = s->mecc.me_cmp[size + 1];
631 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
632 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
640 CHECK_CLIPPED_MV(x , y);
641 CHECK_CLIPPED_MV(x+1, y);
642 CHECK_CLIPPED_MV(x, y+1);
643 CHECK_CLIPPED_MV(x-1, y);
644 CHECK_CLIPPED_MV(x, y-1);
651 #define SAB_CHECK_MV(ax,ay)\
653 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
654 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
655 if(map[index]!=key){\
656 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
658 score_map[index]= d;\
659 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
660 if(d < minima[minima_count-1].height){\
663 while(d >= minima[j].height) j++;\
665 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
667 minima[j].checked= 0;\
668 minima[j].height= d;\
678 #define MAX_SAB_SIZE ME_MAP_SIZE
679 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
680 int src_index, int ref_index, int const penalty_factor,
681 int size, int h, int flags)
683 MotionEstContext * const c= &s->me;
684 me_cmp_func cmpf, chroma_cmpf;
685 Minima minima[MAX_SAB_SIZE];
686 const int minima_count= FFABS(c->dia_size);
690 unsigned map_generation = c->map_generation;
692 av_assert1(minima_count <= MAX_SAB_SIZE);
694 cmpf = s->mecc.me_cmp[size];
695 chroma_cmpf = s->mecc.me_cmp[size + 1];
697 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
698 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
700 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
701 uint32_t key= map[i];
703 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
705 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
707 minima[j].height= score_map[i];
708 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
709 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
710 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
711 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
713 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
714 if( minima[j].x > xmax || minima[j].x < xmin
715 || minima[j].y > ymax || minima[j].y < ymin)
719 if(minima[j].x || minima[j].y)
720 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
725 qsort(minima, j, sizeof(Minima), minima_cmp);
727 for(; j<minima_count; j++){
728 minima[j].height=256*256*256*64;
730 minima[j].x= minima[j].y=0;
733 for(i=0; i<minima_count; i++){
734 const int x= minima[i].x;
735 const int y= minima[i].y;
738 if(minima[i].checked) continue;
740 if( x >= xmax || x <= xmin
741 || y >= ymax || y <= ymin)
746 SAB_CHECK_MV(x , y-1)
747 SAB_CHECK_MV(x , y+1)
749 minima[i].checked= 1;
752 best[0]= minima[0].x;
753 best[1]= minima[0].y;
754 dmin= minima[0].height;
756 if( best[0] < xmax && best[0] > xmin
757 && best[1] < ymax && best[1] > ymin){
759 //ensure that the refernece samples for hpel refinement are in the map
760 CHECK_MV(best[0]-1, best[1])
761 CHECK_MV(best[0]+1, best[1])
762 CHECK_MV(best[0], best[1]-1)
763 CHECK_MV(best[0], best[1]+1)
768 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
769 int src_index, int ref_index, int const penalty_factor,
770 int size, int h, int flags)
772 MotionEstContext * const c= &s->me;
773 me_cmp_func cmpf, chroma_cmpf;
777 unsigned map_generation = c->map_generation;
779 cmpf = s->mecc.me_cmp[size];
780 chroma_cmpf = s->mecc.me_cmp[size + 1];
782 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
784 const int x= best[0];
785 const int y= best[1];
787 start= FFMAX(0, y + dia_size - ymax);
788 end = FFMIN(dia_size, xmax - x + 1);
789 for(dir= start; dir<end; dir++){
792 //check(x + dir,y + dia_size - dir,0, a0)
793 CHECK_MV(x + dir , y + dia_size - dir);
796 start= FFMAX(0, x + dia_size - xmax);
797 end = FFMIN(dia_size, y - ymin + 1);
798 for(dir= start; dir<end; dir++){
801 //check(x + dia_size - dir, y - dir,0, a1)
802 CHECK_MV(x + dia_size - dir, y - dir );
805 start= FFMAX(0, -y + dia_size + ymin );
806 end = FFMIN(dia_size, x - xmin + 1);
807 for(dir= start; dir<end; dir++){
810 //check(x - dir,y - dia_size + dir,0, a2)
811 CHECK_MV(x - dir , y - dia_size + dir);
814 start= FFMAX(0, -x + dia_size + xmin );
815 end = FFMIN(dia_size, ymax - y + 1);
816 for(dir= start; dir<end; dir++){
819 //check(x - dia_size + dir, y + dir,0, a3)
820 CHECK_MV(x - dia_size + dir, y + dir );
823 if(x!=best[0] || y!=best[1])
829 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
830 int src_index, int ref_index, int const penalty_factor,
831 int size, int h, int flags){
832 MotionEstContext * const c= &s->me;
834 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
835 else if(c->dia_size<-1)
836 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
837 else if(c->dia_size<2)
838 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
839 else if(c->dia_size>1024)
840 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
841 else if(c->dia_size>768)
842 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
843 else if(c->dia_size>512)
844 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
845 else if(c->dia_size>256)
846 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
848 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
852 @param P a list of candidate mvs to check before starting the
853 iterative search. If one of the candidates is close to the optimal mv, then
854 it takes fewer iterations. And it increases the chance that we find the
857 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
858 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
859 int ref_mv_scale, int flags, int size, int h)
861 MotionEstContext * const c= &s->me;
862 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
863 i.e. the difference between the position of the
864 block currently being encoded and the position of
865 the block chosen to predict it from. */
866 int d; ///< the score (cmp + penalty) of any given mv
867 int dmin; /**< the best value of d, i.e. the score
868 corresponding to the mv stored in best[]. */
869 unsigned map_generation;
871 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
872 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
873 me_cmp_func cmpf, chroma_cmpf;
879 penalty_factor= c->pre_penalty_factor;
880 cmpf = s->mecc.me_pre_cmp[size];
881 chroma_cmpf = s->mecc.me_pre_cmp[size + 1];
883 penalty_factor= c->penalty_factor;
884 cmpf = s->mecc.me_cmp[size];
885 chroma_cmpf = s->mecc.me_cmp[size + 1];
888 map_generation= update_map_generation(c);
891 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
892 map[0]= map_generation;
895 //FIXME precalc first term below?
896 if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) ||
897 s->mpv_flags & FF_MPV_FLAG_MV0)
898 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
901 if (s->first_slice_line) {
902 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
903 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
904 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
906 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
907 && ( P_LEFT[0] |P_LEFT[1]
909 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
915 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
916 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
917 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
918 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
919 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
920 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
921 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
922 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
923 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
924 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
928 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
929 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
930 if(!s->first_slice_line)
931 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
932 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
934 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
935 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
936 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
937 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
938 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
942 if(c->avctx->last_predictor_count){
943 const int count= c->avctx->last_predictor_count;
944 const int xstart= FFMAX(0, s->mb_x - count);
945 const int ystart= FFMAX(0, s->mb_y - count);
946 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
947 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
950 for(mb_y=ystart; mb_y<yend; mb_y++){
952 for(mb_x=xstart; mb_x<xend; mb_x++){
953 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
954 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
955 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
957 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
963 //check(best[0],best[1],0, b0)
964 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
966 //check(best[0],best[1],0, b1)
973 //this function is dedicated to the braindamaged gcc
974 int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
975 int P[10][2], int src_index, int ref_index,
976 int16_t (*last_mv)[2], int ref_mv_scale,
979 MotionEstContext * const c= &s->me;
980 //FIXME convert other functions in the same way if faster
981 if(c->flags==0 && h==16 && size==0){
982 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
984 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
986 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
990 static int epzs_motion_search4(MpegEncContext * s,
991 int *mx_ptr, int *my_ptr, int P[10][2],
992 int src_index, int ref_index, int16_t (*last_mv)[2],
995 MotionEstContext * const c= &s->me;
998 unsigned map_generation;
999 const int penalty_factor= c->penalty_factor;
1002 const int ref_mv_stride= s->mb_stride;
1003 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1004 me_cmp_func cmpf, chroma_cmpf;
1006 int flags= c->flags;
1009 cmpf = s->mecc.me_cmp[size];
1010 chroma_cmpf = s->mecc.me_cmp[size + 1];
1012 map_generation= update_map_generation(c);
1017 if (s->first_slice_line) {
1018 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1019 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1020 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1021 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1023 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1024 //FIXME try some early stop
1025 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1026 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1027 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1028 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1029 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1030 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1033 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1034 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1035 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1036 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1037 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1040 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1048 //try to merge with above FIXME (needs PSNR test)
1049 static int epzs_motion_search2(MpegEncContext * s,
1050 int *mx_ptr, int *my_ptr, int P[10][2],
1051 int src_index, int ref_index, int16_t (*last_mv)[2],
1054 MotionEstContext * const c= &s->me;
1057 unsigned map_generation;
1058 const int penalty_factor= c->penalty_factor;
1059 const int size=0; //FIXME pass as arg
1061 const int ref_mv_stride= s->mb_stride;
1062 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1063 me_cmp_func cmpf, chroma_cmpf;
1065 int flags= c->flags;
1068 cmpf = s->mecc.me_cmp[size];
1069 chroma_cmpf = s->mecc.me_cmp[size + 1];
1071 map_generation= update_map_generation(c);
1076 if (s->first_slice_line) {
1077 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1078 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1079 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1080 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1082 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1083 //FIXME try some early stop
1084 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1085 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1086 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1087 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1088 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1089 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1092 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1093 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1094 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1095 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1096 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1099 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);