3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Motion estimation template.
27 #include "mpegvideo.h"
29 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
31 uint32_t av_unused * const score_map= c->score_map;\
32 const int av_unused xmin= c->xmin;\
33 const int av_unused ymin= c->ymin;\
34 const int av_unused xmax= c->xmax;\
35 const int av_unused ymax= c->ymax;\
36 uint8_t *mv_penalty= c->current_mv_penalty;\
37 const int pred_x= c->pred_x;\
38 const int pred_y= c->pred_y;\
40 #define CHECK_HALF_MV(dx, dy, x, y)\
42 const int hx= 2*(x)+(dx);\
43 const int hy= 2*(y)+(dy);\
44 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
45 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
46 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
49 static int hpel_motion_search(MpegEncContext * s,
50 int *mx_ptr, int *my_ptr, int dmin,
51 int src_index, int ref_index,
54 MotionEstContext * const c= &s->me;
55 const int mx = *mx_ptr;
56 const int my = *my_ptr;
57 const int penalty_factor= c->sub_penalty_factor;
58 me_cmp_func cmp_sub, chroma_cmp_sub;
62 int flags= c->sub_flags;
66 cmp_sub = s->mecc.me_sub_cmp[size];
67 chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
69 if(c->skip){ //FIXME move out of hpel?
75 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
76 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
77 if(mx || my || size>0)
78 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
81 if (mx > xmin && mx < xmax &&
82 my > ymin && my < ymax) {
84 const int index= (my<<ME_MAP_SHIFT) + mx;
85 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
86 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
87 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
88 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
89 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
90 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
91 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
92 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
95 unsigned map_generation= c->map_generation;
97 uint32_t *map= c->map;
99 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
100 assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
101 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
102 assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
103 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
104 assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
105 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
106 assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
108 CHECK_HALF_MV(0, 1, mx ,my-1)
110 CHECK_HALF_MV(1, 1, mx-1, my-1)
112 CHECK_HALF_MV(1, 1, mx , my-1)
114 CHECK_HALF_MV(1, 1, mx-1, my )
116 CHECK_HALF_MV(1, 0, mx-1, my )
118 CHECK_HALF_MV(1, 1, mx , my-1)
120 CHECK_HALF_MV(1, 1, mx-1, my-1)
122 CHECK_HALF_MV(1, 1, mx , my )
124 CHECK_HALF_MV(1, 0, mx , my )
129 CHECK_HALF_MV(1, 1, mx-1, my-1)
131 CHECK_HALF_MV(1, 1, mx , my )
133 CHECK_HALF_MV(1, 0, mx-1, my)
134 CHECK_HALF_MV(1, 1, mx-1, my)
137 CHECK_HALF_MV(1, 1, mx , my-1)
139 CHECK_HALF_MV(1, 1, mx-1, my)
141 CHECK_HALF_MV(1, 0, mx , my)
142 CHECK_HALF_MV(1, 1, mx , my)
144 CHECK_HALF_MV(0, 1, mx , my)
146 assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
155 static int no_sub_motion_search(MpegEncContext * s,
156 int *mx_ptr, int *my_ptr, int dmin,
157 int src_index, int ref_index,
165 static inline int get_mb_score(MpegEncContext *s, int mx, int my,
166 int src_index, int ref_index, int size,
169 MotionEstContext * const c= &s->me;
170 const int penalty_factor= c->mb_penalty_factor;
171 const int flags= c->mb_flags;
172 const int qpel= flags & FLAG_QPEL;
173 const int mask= 1+2*qpel;
174 me_cmp_func cmp_sub, chroma_cmp_sub;
181 cmp_sub = s->mecc.mb_cmp[size];
182 chroma_cmp_sub = s->mecc.mb_cmp[size + 1];
185 // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
187 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
188 //FIXME check cbp before adding penalty for (0,0) vector
189 if(add_rate && (mx || my || size>0))
190 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
195 int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
196 int ref_index, int size, int h, int add_rate)
198 return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
201 #define CHECK_QUARTER_MV(dx, dy, x, y)\
203 const int hx= 4*(x)+(dx);\
204 const int hy= 4*(y)+(dy);\
205 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
206 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
207 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
210 static int qpel_motion_search(MpegEncContext * s,
211 int *mx_ptr, int *my_ptr, int dmin,
212 int src_index, int ref_index,
215 MotionEstContext * const c= &s->me;
216 const int mx = *mx_ptr;
217 const int my = *my_ptr;
218 const int penalty_factor= c->sub_penalty_factor;
219 const unsigned map_generation = c->map_generation;
220 const int subpel_quality= c->avctx->me_subpel_quality;
221 uint32_t *map= c->map;
222 me_cmp_func cmpf, chroma_cmpf;
223 me_cmp_func cmp_sub, chroma_cmp_sub;
226 int flags= c->sub_flags;
228 cmpf = s->mecc.me_cmp[size];
229 chroma_cmpf = s->mecc.me_cmp[size + 1]; // FIXME: factorize
232 cmp_sub = s->mecc.me_sub_cmp[size];
233 chroma_cmp_sub = s->mecc.me_sub_cmp[size + 1];
235 if(c->skip){ //FIXME somehow move up (benchmark)
241 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
242 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
243 if(mx || my || size>0)
244 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
247 if (mx > xmin && mx < xmax &&
248 my > ymin && my < ymax) {
249 int bx=4*mx, by=4*my;
252 const int index= (my<<ME_MAP_SHIFT) + mx;
253 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
254 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
255 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
256 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
257 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
261 memset(best, 64, sizeof(int)*8);
262 if(s->me.dia_size>=2){
263 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
264 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
265 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
266 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
268 for(ny= -3; ny <= 3; ny++){
269 for(nx= -3; nx <= 3; nx++){
270 //FIXME this could overflow (unlikely though)
271 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
272 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
273 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
274 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
277 if((nx&3)==0 && (ny&3)==0) continue;
279 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
281 // if(nx&1) score-=1024*c->penalty_factor;
282 // if(ny&1) score-=1024*c->penalty_factor;
286 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
287 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
289 best_pos[i][0]= nx + 4*mx;
290 best_pos[i][1]= ny + 4*my;
298 //FIXME this could overflow (unlikely though)
299 const int cx = 4*(r - l);
300 const int cx2= r + l - 2*c;
301 const int cy = 4*(b - t);
302 const int cy2= b + t - 2*c;
305 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
306 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
308 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
311 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
313 assert(16*cx2 + 4*cx + 32*c == 32*r);
314 assert(16*cx2 - 4*cx + 32*c == 32*l);
315 assert(16*cy2 + 4*cy + 32*c == 32*b);
316 assert(16*cy2 - 4*cy + 32*c == 32*t);
317 assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
319 for(ny= -3; ny <= 3; ny++){
320 for(nx= -3; nx <= 3; nx++){
321 //FIXME this could overflow (unlikely though)
322 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
325 if((nx&3)==0 && (ny&3)==0) continue;
327 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
328 // if(nx&1) score-=32*c->penalty_factor;
329 // if(ny&1) score-=32*c->penalty_factor;
333 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
334 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
336 best_pos[i][0]= nx + 4*mx;
337 best_pos[i][1]= ny + 4*my;
344 for(i=0; i<subpel_quality; i++){
347 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
350 assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
363 #define CHECK_MV(x,y)\
365 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
366 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
367 assert((x) >= xmin);\
368 assert((x) <= xmax);\
369 assert((y) >= ymin);\
370 assert((y) <= ymax);\
371 if(map[index]!=key){\
372 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
374 score_map[index]= d;\
375 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
376 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
380 #define CHECK_CLIPPED_MV(ax,ay)\
384 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
385 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
389 #define CHECK_MV_DIR(x,y,new_dir)\
391 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
392 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
393 if(map[index]!=key){\
394 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
396 score_map[index]= d;\
397 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
407 #define check(x,y,S,v)\
408 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
409 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
410 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
411 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
413 #define LOAD_COMMON2\
414 uint32_t *map= c->map;\
415 const int qpel= flags&FLAG_QPEL;\
416 const int shift= 1+qpel;\
418 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
419 int src_index, int ref_index, int const penalty_factor,
420 int size, int h, int flags)
422 MotionEstContext * const c= &s->me;
423 me_cmp_func cmpf, chroma_cmpf;
427 unsigned map_generation = c->map_generation;
429 cmpf = s->mecc.me_cmp[size];
430 chroma_cmpf = s->mecc.me_cmp[size + 1];
432 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
433 const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
434 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
435 if(map[index]!=key){ //this will be executed only very rarey
436 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
443 const int dir= next_dir;
444 const int x= best[0];
445 const int y= best[1];
448 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
449 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
450 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
451 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
459 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
460 int src_index, int ref_index, int const penalty_factor,
461 int size, int h, int flags)
463 MotionEstContext * const c= &s->me;
464 me_cmp_func cmpf, chroma_cmpf;
468 unsigned map_generation = c->map_generation;
470 cmpf = s->mecc.me_cmp[size];
471 chroma_cmpf = s->mecc.me_cmp[size + 1];
473 for(dia_size=1; dia_size<=4; dia_size++){
475 const int x= best[0];
476 const int y= best[1];
478 if(dia_size&(dia_size-1)) continue;
480 if( x + dia_size > xmax
481 || x - dia_size < xmin
482 || y + dia_size > ymax
483 || y - dia_size < ymin)
486 for(dir= 0; dir<dia_size; dir+=2){
489 CHECK_MV(x + dir , y + dia_size - dir);
490 CHECK_MV(x + dia_size - dir, y - dir );
491 CHECK_MV(x - dir , y - dia_size + dir);
492 CHECK_MV(x - dia_size + dir, y + dir );
495 if(x!=best[0] || y!=best[1])
501 static int hex_search(MpegEncContext * s, int *best, int dmin,
502 int src_index, int ref_index, int const penalty_factor,
503 int size, int h, int flags, int dia_size)
505 MotionEstContext * const c= &s->me;
506 me_cmp_func cmpf, chroma_cmpf;
509 unsigned map_generation = c->map_generation;
511 const int dec= dia_size & (dia_size-1);
513 cmpf = s->mecc.me_cmp[size];
514 chroma_cmpf = s->mecc.me_cmp[size + 1];
516 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
521 CHECK_CLIPPED_MV(x -dia_size , y);
522 CHECK_CLIPPED_MV(x+ dia_size , y);
523 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
524 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
526 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
527 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
529 }while(best[0] != x || best[1] != y);
535 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
536 int src_index, int ref_index, int const penalty_factor,
537 int size, int h, int flags)
539 MotionEstContext * const c= &s->me;
540 me_cmp_func cmpf, chroma_cmpf;
543 unsigned map_generation = c->map_generation;
545 int dia_size= c->dia_size&0xFF;
546 const int dec= dia_size & (dia_size-1);
547 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
548 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
550 cmpf = s->mecc.me_cmp[size];
551 chroma_cmpf = s->mecc.me_cmp[size + 1];
553 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
558 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
560 }while(best[0] != x || best[1] != y);
565 CHECK_CLIPPED_MV(x+1, y);
566 CHECK_CLIPPED_MV(x, y+1);
567 CHECK_CLIPPED_MV(x-1, y);
568 CHECK_CLIPPED_MV(x, y-1);
573 static int umh_search(MpegEncContext * s, int *best, int dmin,
574 int src_index, int ref_index, int const penalty_factor,
575 int size, int h, int flags)
577 MotionEstContext * const c= &s->me;
578 me_cmp_func cmpf, chroma_cmpf;
581 unsigned map_generation = c->map_generation;
582 int x,y,x2,y2, i, j, d;
583 const int dia_size= c->dia_size&0xFE;
584 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
585 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
586 {-2, 3}, { 0, 4}, { 2, 3},
587 {-2,-3}, { 0,-4}, { 2,-3},};
589 cmpf = s->mecc.me_cmp[size];
590 chroma_cmpf = s->mecc.me_cmp[size + 1];
594 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
597 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
603 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
604 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
609 //FIXME prevent the CLIP stuff
611 for(j=1; j<=dia_size/4; j++){
613 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
617 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
620 static int full_search(MpegEncContext * s, int *best, int dmin,
621 int src_index, int ref_index, int const penalty_factor,
622 int size, int h, int flags)
624 MotionEstContext * const c= &s->me;
625 me_cmp_func cmpf, chroma_cmpf;
628 unsigned map_generation = c->map_generation;
630 const int dia_size= c->dia_size&0xFF;
632 cmpf = s->mecc.me_cmp[size];
633 chroma_cmpf = s->mecc.me_cmp[size + 1];
635 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
636 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
644 CHECK_CLIPPED_MV(x , y);
645 CHECK_CLIPPED_MV(x+1, y);
646 CHECK_CLIPPED_MV(x, y+1);
647 CHECK_CLIPPED_MV(x-1, y);
648 CHECK_CLIPPED_MV(x, y-1);
655 #define SAB_CHECK_MV(ax,ay)\
657 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
658 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
659 if(map[index]!=key){\
660 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
662 score_map[index]= d;\
663 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
664 if(d < minima[minima_count-1].height){\
667 while(d >= minima[j].height) j++;\
669 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
671 minima[j].checked= 0;\
672 minima[j].height= d;\
682 #define MAX_SAB_SIZE ME_MAP_SIZE
683 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
684 int src_index, int ref_index, int const penalty_factor,
685 int size, int h, int flags)
687 MotionEstContext * const c= &s->me;
688 me_cmp_func cmpf, chroma_cmpf;
689 Minima minima[MAX_SAB_SIZE];
690 const int minima_count= FFABS(c->dia_size);
694 unsigned map_generation = c->map_generation;
696 cmpf = s->mecc.me_cmp[size];
697 chroma_cmpf = s->mecc.me_cmp[size + 1];
699 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
700 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
702 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
703 uint32_t key= map[i];
705 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
707 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
709 minima[j].height= score_map[i];
710 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
711 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
712 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
713 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
715 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
716 if( minima[j].x > xmax || minima[j].x < xmin
717 || minima[j].y > ymax || minima[j].y < ymin)
721 if(minima[j].x || minima[j].y)
722 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
727 qsort(minima, j, sizeof(Minima), minima_cmp);
729 for(; j<minima_count; j++){
730 minima[j].height=256*256*256*64;
732 minima[j].x= minima[j].y=0;
735 for(i=0; i<minima_count; i++){
736 const int x= minima[i].x;
737 const int y= minima[i].y;
740 if(minima[i].checked) continue;
742 if( x >= xmax || x <= xmin
743 || y >= ymax || y <= ymin)
748 SAB_CHECK_MV(x , y-1)
749 SAB_CHECK_MV(x , y+1)
751 minima[i].checked= 1;
754 best[0]= minima[0].x;
755 best[1]= minima[0].y;
756 dmin= minima[0].height;
758 if( best[0] < xmax && best[0] > xmin
759 && best[1] < ymax && best[1] > ymin){
761 //ensure that the refernece samples for hpel refinement are in the map
762 CHECK_MV(best[0]-1, best[1])
763 CHECK_MV(best[0]+1, best[1])
764 CHECK_MV(best[0], best[1]-1)
765 CHECK_MV(best[0], best[1]+1)
770 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
771 int src_index, int ref_index, int const penalty_factor,
772 int size, int h, int flags)
774 MotionEstContext * const c= &s->me;
775 me_cmp_func cmpf, chroma_cmpf;
779 unsigned map_generation = c->map_generation;
781 cmpf = s->mecc.me_cmp[size];
782 chroma_cmpf = s->mecc.me_cmp[size + 1];
784 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
786 const int x= best[0];
787 const int y= best[1];
789 start= FFMAX(0, y + dia_size - ymax);
790 end = FFMIN(dia_size, xmax - x + 1);
791 for(dir= start; dir<end; dir++){
794 //check(x + dir,y + dia_size - dir,0, a0)
795 CHECK_MV(x + dir , y + dia_size - dir);
798 start= FFMAX(0, x + dia_size - xmax);
799 end = FFMIN(dia_size, y - ymin + 1);
800 for(dir= start; dir<end; dir++){
803 //check(x + dia_size - dir, y - dir,0, a1)
804 CHECK_MV(x + dia_size - dir, y - dir );
807 start= FFMAX(0, -y + dia_size + ymin );
808 end = FFMIN(dia_size, x - xmin + 1);
809 for(dir= start; dir<end; dir++){
812 //check(x - dir,y - dia_size + dir,0, a2)
813 CHECK_MV(x - dir , y - dia_size + dir);
816 start= FFMAX(0, -x + dia_size + xmin );
817 end = FFMIN(dia_size, ymax - y + 1);
818 for(dir= start; dir<end; dir++){
821 //check(x - dia_size + dir, y + dir,0, a3)
822 CHECK_MV(x - dia_size + dir, y + dir );
825 if(x!=best[0] || y!=best[1])
831 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
832 int src_index, int ref_index, int const penalty_factor,
833 int size, int h, int flags){
834 MotionEstContext * const c= &s->me;
836 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
837 else if(c->dia_size<-1)
838 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
839 else if(c->dia_size<2)
840 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
841 else if(c->dia_size>1024)
842 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
843 else if(c->dia_size>768)
844 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
845 else if(c->dia_size>512)
846 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
847 else if(c->dia_size>256)
848 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
850 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
854 @param P a list of candidate mvs to check before starting the
855 iterative search. If one of the candidates is close to the optimal mv, then
856 it takes fewer iterations. And it increases the chance that we find the
859 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
860 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
861 int ref_mv_scale, int flags, int size, int h)
863 MotionEstContext * const c= &s->me;
864 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
865 i.e. the difference between the position of the
866 block currently being encoded and the position of
867 the block chosen to predict it from. */
868 int d; ///< the score (cmp + penalty) of any given mv
869 int dmin; /**< the best value of d, i.e. the score
870 corresponding to the mv stored in best[]. */
871 unsigned map_generation;
873 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
874 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
875 me_cmp_func cmpf, chroma_cmpf;
881 penalty_factor= c->pre_penalty_factor;
882 cmpf = s->mecc.me_pre_cmp[size];
883 chroma_cmpf = s->mecc.me_pre_cmp[size + 1];
885 penalty_factor= c->penalty_factor;
886 cmpf = s->mecc.me_cmp[size];
887 chroma_cmpf = s->mecc.me_cmp[size + 1];
890 map_generation= update_map_generation(c);
893 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
894 map[0]= map_generation;
897 //FIXME precalc first term below?
898 if ((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) ||
899 s->mpv_flags & FF_MPV_FLAG_MV0)
900 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
903 if (s->first_slice_line) {
904 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
905 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
906 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
908 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
909 && ( P_LEFT[0] |P_LEFT[1]
911 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
917 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
918 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
919 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
920 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
921 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
922 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
923 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
924 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
925 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
926 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
930 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
931 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
932 if(!s->first_slice_line)
933 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
934 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
936 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
937 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
938 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
939 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
940 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
944 if(c->avctx->last_predictor_count){
945 const int count= c->avctx->last_predictor_count;
946 const int xstart= FFMAX(0, s->mb_x - count);
947 const int ystart= FFMAX(0, s->mb_y - count);
948 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
949 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
952 for(mb_y=ystart; mb_y<yend; mb_y++){
954 for(mb_x=xstart; mb_x<xend; mb_x++){
955 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
956 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
957 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
959 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
965 //check(best[0],best[1],0, b0)
966 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
968 //check(best[0],best[1],0, b1)
975 //this function is dedicated to the braindamaged gcc
976 int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
977 int P[10][2], int src_index, int ref_index,
978 int16_t (*last_mv)[2], int ref_mv_scale,
981 MotionEstContext * const c= &s->me;
982 //FIXME convert other functions in the same way if faster
983 if(c->flags==0 && h==16 && size==0){
984 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
986 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
988 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
992 static int epzs_motion_search4(MpegEncContext * s,
993 int *mx_ptr, int *my_ptr, int P[10][2],
994 int src_index, int ref_index, int16_t (*last_mv)[2],
997 MotionEstContext * const c= &s->me;
1000 unsigned map_generation;
1001 const int penalty_factor= c->penalty_factor;
1004 const int ref_mv_stride= s->mb_stride;
1005 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1006 me_cmp_func cmpf, chroma_cmpf;
1008 int flags= c->flags;
1011 cmpf = s->mecc.me_cmp[size];
1012 chroma_cmpf = s->mecc.me_cmp[size + 1];
1014 map_generation= update_map_generation(c);
1019 if (s->first_slice_line) {
1020 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1021 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1022 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1023 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1025 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1026 //FIXME try some early stop
1027 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1028 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1029 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1030 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1031 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1032 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1035 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1036 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1037 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1038 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1039 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1042 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1050 //try to merge with above FIXME (needs PSNR test)
1051 static int epzs_motion_search2(MpegEncContext * s,
1052 int *mx_ptr, int *my_ptr, int P[10][2],
1053 int src_index, int ref_index, int16_t (*last_mv)[2],
1056 MotionEstContext * const c= &s->me;
1059 unsigned map_generation;
1060 const int penalty_factor= c->penalty_factor;
1061 const int size=0; //FIXME pass as arg
1063 const int ref_mv_stride= s->mb_stride;
1064 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1065 me_cmp_func cmpf, chroma_cmpf;
1067 int flags= c->flags;
1070 cmpf = s->mecc.me_cmp[size];
1071 chroma_cmpf = s->mecc.me_cmp[size + 1];
1073 map_generation= update_map_generation(c);
1078 if (s->first_slice_line) {
1079 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1080 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1081 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1082 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1084 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1085 //FIXME try some early stop
1086 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1087 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1088 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1089 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1090 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1091 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1094 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1095 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1096 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1097 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1098 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1101 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);