3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Motion estimation template.
27 #include "mpegvideo.h"
29 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
31 uint32_t av_unused * const score_map= c->score_map;\
32 const int av_unused xmin= c->xmin;\
33 const int av_unused ymin= c->ymin;\
34 const int av_unused xmax= c->xmax;\
35 const int av_unused ymax= c->ymax;\
36 uint8_t *mv_penalty= c->current_mv_penalty;\
37 const int pred_x= c->pred_x;\
38 const int pred_y= c->pred_y;\
40 #define CHECK_HALF_MV(dx, dy, x, y)\
42 const int hx= 2*(x)+(dx);\
43 const int hy= 2*(y)+(dy);\
44 d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
45 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
46 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
49 static int hpel_motion_search(MpegEncContext * s,
50 int *mx_ptr, int *my_ptr, int dmin,
51 int src_index, int ref_index,
54 MotionEstContext * const c= &s->me;
55 const int mx = *mx_ptr;
56 const int my = *my_ptr;
57 const int penalty_factor= c->sub_penalty_factor;
58 me_cmp_func cmp_sub, chroma_cmp_sub;
62 int flags= c->sub_flags;
66 cmp_sub= s->dsp.me_sub_cmp[size];
67 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
69 if(c->skip){ //FIXME move out of hpel?
75 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
76 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
77 if(mx || my || size>0)
78 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
81 if (mx > xmin && mx < xmax &&
82 my > ymin && my < ymax) {
84 const int index= (my<<ME_MAP_SHIFT) + mx;
85 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
86 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
87 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
88 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
89 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
90 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
91 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
92 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
95 unsigned map_generation= c->map_generation;
97 uint32_t *map= c->map;
99 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
100 assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
101 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
102 assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
103 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
104 assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
105 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
106 assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
108 CHECK_HALF_MV(0, 1, mx ,my-1)
110 CHECK_HALF_MV(1, 1, mx-1, my-1)
112 CHECK_HALF_MV(1, 1, mx , my-1)
114 CHECK_HALF_MV(1, 1, mx-1, my )
116 CHECK_HALF_MV(1, 0, mx-1, my )
118 CHECK_HALF_MV(1, 1, mx , my-1)
120 CHECK_HALF_MV(1, 1, mx-1, my-1)
122 CHECK_HALF_MV(1, 1, mx , my )
124 CHECK_HALF_MV(1, 0, mx , my )
129 CHECK_HALF_MV(1, 1, mx-1, my-1)
131 CHECK_HALF_MV(1, 1, mx , my )
133 CHECK_HALF_MV(1, 0, mx-1, my)
134 CHECK_HALF_MV(1, 1, mx-1, my)
137 CHECK_HALF_MV(1, 1, mx , my-1)
139 CHECK_HALF_MV(1, 1, mx-1, my)
141 CHECK_HALF_MV(1, 0, mx , my)
142 CHECK_HALF_MV(1, 1, mx , my)
144 CHECK_HALF_MV(0, 1, mx , my)
146 assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
155 static int no_sub_motion_search(MpegEncContext * s,
156 int *mx_ptr, int *my_ptr, int dmin,
157 int src_index, int ref_index,
165 static inline int get_mb_score(MpegEncContext *s, int mx, int my,
166 int src_index, int ref_index, int size,
169 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
170 MotionEstContext * const c= &s->me;
171 const int penalty_factor= c->mb_penalty_factor;
172 const int flags= c->mb_flags;
173 const int qpel= flags & FLAG_QPEL;
174 const int mask= 1+2*qpel;
175 me_cmp_func cmp_sub, chroma_cmp_sub;
182 cmp_sub= s->dsp.mb_cmp[size];
183 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
186 // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
188 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
189 //FIXME check cbp before adding penalty for (0,0) vector
190 if(add_rate && (mx || my || size>0))
191 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
196 int ff_get_mb_score(MpegEncContext *s, int mx, int my, int src_index,
197 int ref_index, int size, int h, int add_rate)
199 return get_mb_score(s, mx, my, src_index, ref_index, size, h, add_rate);
202 #define CHECK_QUARTER_MV(dx, dy, x, y)\
204 const int hx= 4*(x)+(dx);\
205 const int hy= 4*(y)+(dy);\
206 d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
207 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
208 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
211 static int qpel_motion_search(MpegEncContext * s,
212 int *mx_ptr, int *my_ptr, int dmin,
213 int src_index, int ref_index,
216 MotionEstContext * const c= &s->me;
217 const int mx = *mx_ptr;
218 const int my = *my_ptr;
219 const int penalty_factor= c->sub_penalty_factor;
220 const unsigned map_generation = c->map_generation;
221 const int subpel_quality= c->avctx->me_subpel_quality;
222 uint32_t *map= c->map;
223 me_cmp_func cmpf, chroma_cmpf;
224 me_cmp_func cmp_sub, chroma_cmp_sub;
227 int flags= c->sub_flags;
229 cmpf= s->dsp.me_cmp[size];
230 chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
233 cmp_sub= s->dsp.me_sub_cmp[size];
234 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
236 if(c->skip){ //FIXME somehow move up (benchmark)
242 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
243 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
244 if(mx || my || size>0)
245 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
248 if (mx > xmin && mx < xmax &&
249 my > ymin && my < ymax) {
250 int bx=4*mx, by=4*my;
253 const int index= (my<<ME_MAP_SHIFT) + mx;
254 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
255 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
256 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
257 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
258 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
262 memset(best, 64, sizeof(int)*8);
263 if(s->me.dia_size>=2){
264 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
265 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
266 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
267 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
269 for(ny= -3; ny <= 3; ny++){
270 for(nx= -3; nx <= 3; nx++){
271 //FIXME this could overflow (unlikely though)
272 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
273 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
274 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
275 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
278 if((nx&3)==0 && (ny&3)==0) continue;
280 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
282 // if(nx&1) score-=1024*c->penalty_factor;
283 // if(ny&1) score-=1024*c->penalty_factor;
287 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
288 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
290 best_pos[i][0]= nx + 4*mx;
291 best_pos[i][1]= ny + 4*my;
299 //FIXME this could overflow (unlikely though)
300 const int cx = 4*(r - l);
301 const int cx2= r + l - 2*c;
302 const int cy = 4*(b - t);
303 const int cy2= b + t - 2*c;
306 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
307 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
309 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
312 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
314 assert(16*cx2 + 4*cx + 32*c == 32*r);
315 assert(16*cx2 - 4*cx + 32*c == 32*l);
316 assert(16*cy2 + 4*cy + 32*c == 32*b);
317 assert(16*cy2 - 4*cy + 32*c == 32*t);
318 assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
320 for(ny= -3; ny <= 3; ny++){
321 for(nx= -3; nx <= 3; nx++){
322 //FIXME this could overflow (unlikely though)
323 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
326 if((nx&3)==0 && (ny&3)==0) continue;
328 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
329 // if(nx&1) score-=32*c->penalty_factor;
330 // if(ny&1) score-=32*c->penalty_factor;
334 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
335 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
337 best_pos[i][0]= nx + 4*mx;
338 best_pos[i][1]= ny + 4*my;
345 for(i=0; i<subpel_quality; i++){
348 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
351 assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
364 #define CHECK_MV(x,y)\
366 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
367 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
368 assert((x) >= xmin);\
369 assert((x) <= xmax);\
370 assert((y) >= ymin);\
371 assert((y) <= ymax);\
372 if(map[index]!=key){\
373 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
375 score_map[index]= d;\
376 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
377 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
381 #define CHECK_CLIPPED_MV(ax,ay)\
385 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
386 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
390 #define CHECK_MV_DIR(x,y,new_dir)\
392 const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
393 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
394 if(map[index]!=key){\
395 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
397 score_map[index]= d;\
398 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
408 #define check(x,y,S,v)\
409 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
410 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
411 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
412 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
414 #define LOAD_COMMON2\
415 uint32_t *map= c->map;\
416 const int qpel= flags&FLAG_QPEL;\
417 const int shift= 1+qpel;\
419 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
420 int src_index, int ref_index, int const penalty_factor,
421 int size, int h, int flags)
423 MotionEstContext * const c= &s->me;
424 me_cmp_func cmpf, chroma_cmpf;
428 unsigned map_generation = c->map_generation;
430 cmpf= s->dsp.me_cmp[size];
431 chroma_cmpf= s->dsp.me_cmp[size+1];
433 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
434 const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
435 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
436 if(map[index]!=key){ //this will be executed only very rarey
437 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
444 const int dir= next_dir;
445 const int x= best[0];
446 const int y= best[1];
449 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
450 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
451 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
452 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
460 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
461 int src_index, int ref_index, int const penalty_factor,
462 int size, int h, int flags)
464 MotionEstContext * const c= &s->me;
465 me_cmp_func cmpf, chroma_cmpf;
469 unsigned map_generation = c->map_generation;
471 cmpf= s->dsp.me_cmp[size];
472 chroma_cmpf= s->dsp.me_cmp[size+1];
474 for(dia_size=1; dia_size<=4; dia_size++){
476 const int x= best[0];
477 const int y= best[1];
479 if(dia_size&(dia_size-1)) continue;
481 if( x + dia_size > xmax
482 || x - dia_size < xmin
483 || y + dia_size > ymax
484 || y - dia_size < ymin)
487 for(dir= 0; dir<dia_size; dir+=2){
490 CHECK_MV(x + dir , y + dia_size - dir);
491 CHECK_MV(x + dia_size - dir, y - dir );
492 CHECK_MV(x - dir , y - dia_size + dir);
493 CHECK_MV(x - dia_size + dir, y + dir );
496 if(x!=best[0] || y!=best[1])
502 static int hex_search(MpegEncContext * s, int *best, int dmin,
503 int src_index, int ref_index, int const penalty_factor,
504 int size, int h, int flags, int dia_size)
506 MotionEstContext * const c= &s->me;
507 me_cmp_func cmpf, chroma_cmpf;
510 unsigned map_generation = c->map_generation;
512 const int dec= dia_size & (dia_size-1);
514 cmpf= s->dsp.me_cmp[size];
515 chroma_cmpf= s->dsp.me_cmp[size+1];
517 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
522 CHECK_CLIPPED_MV(x -dia_size , y);
523 CHECK_CLIPPED_MV(x+ dia_size , y);
524 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
525 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
527 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
528 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
530 }while(best[0] != x || best[1] != y);
536 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
537 int src_index, int ref_index, int const penalty_factor,
538 int size, int h, int flags)
540 MotionEstContext * const c= &s->me;
541 me_cmp_func cmpf, chroma_cmpf;
544 unsigned map_generation = c->map_generation;
546 int dia_size= c->dia_size&0xFF;
547 const int dec= dia_size & (dia_size-1);
548 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
549 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
551 cmpf= s->dsp.me_cmp[size];
552 chroma_cmpf= s->dsp.me_cmp[size+1];
554 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
559 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
561 }while(best[0] != x || best[1] != y);
566 CHECK_CLIPPED_MV(x+1, y);
567 CHECK_CLIPPED_MV(x, y+1);
568 CHECK_CLIPPED_MV(x-1, y);
569 CHECK_CLIPPED_MV(x, y-1);
574 static int umh_search(MpegEncContext * s, int *best, int dmin,
575 int src_index, int ref_index, int const penalty_factor,
576 int size, int h, int flags)
578 MotionEstContext * const c= &s->me;
579 me_cmp_func cmpf, chroma_cmpf;
582 unsigned map_generation = c->map_generation;
583 int x,y,x2,y2, i, j, d;
584 const int dia_size= c->dia_size&0xFE;
585 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
586 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
587 {-2, 3}, { 0, 4}, { 2, 3},
588 {-2,-3}, { 0,-4}, { 2,-3},};
590 cmpf= s->dsp.me_cmp[size];
591 chroma_cmpf= s->dsp.me_cmp[size+1];
595 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
598 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
604 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
605 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
610 //FIXME prevent the CLIP stuff
612 for(j=1; j<=dia_size/4; j++){
614 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
618 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
621 static int full_search(MpegEncContext * s, int *best, int dmin,
622 int src_index, int ref_index, int const penalty_factor,
623 int size, int h, int flags)
625 MotionEstContext * const c= &s->me;
626 me_cmp_func cmpf, chroma_cmpf;
629 unsigned map_generation = c->map_generation;
631 const int dia_size= c->dia_size&0xFF;
633 cmpf= s->dsp.me_cmp[size];
634 chroma_cmpf= s->dsp.me_cmp[size+1];
636 for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
637 for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
645 CHECK_CLIPPED_MV(x , y);
646 CHECK_CLIPPED_MV(x+1, y);
647 CHECK_CLIPPED_MV(x, y+1);
648 CHECK_CLIPPED_MV(x-1, y);
649 CHECK_CLIPPED_MV(x, y-1);
656 #define SAB_CHECK_MV(ax,ay)\
658 const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
659 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
660 if(map[index]!=key){\
661 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
663 score_map[index]= d;\
664 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
665 if(d < minima[minima_count-1].height){\
668 while(d >= minima[j].height) j++;\
670 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
672 minima[j].checked= 0;\
673 minima[j].height= d;\
683 #define MAX_SAB_SIZE ME_MAP_SIZE
684 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
685 int src_index, int ref_index, int const penalty_factor,
686 int size, int h, int flags)
688 MotionEstContext * const c= &s->me;
689 me_cmp_func cmpf, chroma_cmpf;
690 Minima minima[MAX_SAB_SIZE];
691 const int minima_count= FFABS(c->dia_size);
695 unsigned map_generation = c->map_generation;
697 cmpf= s->dsp.me_cmp[size];
698 chroma_cmpf= s->dsp.me_cmp[size+1];
700 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
701 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
703 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
704 uint32_t key= map[i];
706 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
708 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
710 minima[j].height= score_map[i];
711 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
712 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
713 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
714 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
716 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
717 if( minima[j].x > xmax || minima[j].x < xmin
718 || minima[j].y > ymax || minima[j].y < ymin)
722 if(minima[j].x || minima[j].y)
723 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
728 qsort(minima, j, sizeof(Minima), minima_cmp);
730 for(; j<minima_count; j++){
731 minima[j].height=256*256*256*64;
733 minima[j].x= minima[j].y=0;
736 for(i=0; i<minima_count; i++){
737 const int x= minima[i].x;
738 const int y= minima[i].y;
741 if(minima[i].checked) continue;
743 if( x >= xmax || x <= xmin
744 || y >= ymax || y <= ymin)
749 SAB_CHECK_MV(x , y-1)
750 SAB_CHECK_MV(x , y+1)
752 minima[i].checked= 1;
755 best[0]= minima[0].x;
756 best[1]= minima[0].y;
757 dmin= minima[0].height;
759 if( best[0] < xmax && best[0] > xmin
760 && best[1] < ymax && best[1] > ymin){
762 //ensure that the refernece samples for hpel refinement are in the map
763 CHECK_MV(best[0]-1, best[1])
764 CHECK_MV(best[0]+1, best[1])
765 CHECK_MV(best[0], best[1]-1)
766 CHECK_MV(best[0], best[1]+1)
771 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
772 int src_index, int ref_index, int const penalty_factor,
773 int size, int h, int flags)
775 MotionEstContext * const c= &s->me;
776 me_cmp_func cmpf, chroma_cmpf;
780 unsigned map_generation = c->map_generation;
782 cmpf= s->dsp.me_cmp[size];
783 chroma_cmpf= s->dsp.me_cmp[size+1];
785 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
787 const int x= best[0];
788 const int y= best[1];
790 start= FFMAX(0, y + dia_size - ymax);
791 end = FFMIN(dia_size, xmax - x + 1);
792 for(dir= start; dir<end; dir++){
795 //check(x + dir,y + dia_size - dir,0, a0)
796 CHECK_MV(x + dir , y + dia_size - dir);
799 start= FFMAX(0, x + dia_size - xmax);
800 end = FFMIN(dia_size, y - ymin + 1);
801 for(dir= start; dir<end; dir++){
804 //check(x + dia_size - dir, y - dir,0, a1)
805 CHECK_MV(x + dia_size - dir, y - dir );
808 start= FFMAX(0, -y + dia_size + ymin );
809 end = FFMIN(dia_size, x - xmin + 1);
810 for(dir= start; dir<end; dir++){
813 //check(x - dir,y - dia_size + dir,0, a2)
814 CHECK_MV(x - dir , y - dia_size + dir);
817 start= FFMAX(0, -x + dia_size + xmin );
818 end = FFMIN(dia_size, ymax - y + 1);
819 for(dir= start; dir<end; dir++){
822 //check(x - dia_size + dir, y + dir,0, a3)
823 CHECK_MV(x - dia_size + dir, y + dir );
826 if(x!=best[0] || y!=best[1])
832 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
833 int src_index, int ref_index, int const penalty_factor,
834 int size, int h, int flags){
835 MotionEstContext * const c= &s->me;
837 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
838 else if(c->dia_size<-1)
839 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
840 else if(c->dia_size<2)
841 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
842 else if(c->dia_size>1024)
843 return full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
844 else if(c->dia_size>768)
845 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
846 else if(c->dia_size>512)
847 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
848 else if(c->dia_size>256)
849 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
851 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
855 @param P a list of candidate mvs to check before starting the
856 iterative search. If one of the candidates is close to the optimal mv, then
857 it takes fewer iterations. And it increases the chance that we find the
860 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
861 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
862 int ref_mv_scale, int flags, int size, int h)
864 MotionEstContext * const c= &s->me;
865 int best[2]={0, 0}; /**< x and y coordinates of the best motion vector.
866 i.e. the difference between the position of the
867 block currently being encoded and the position of
868 the block chosen to predict it from. */
869 int d; ///< the score (cmp + penalty) of any given mv
870 int dmin; /**< the best value of d, i.e. the score
871 corresponding to the mv stored in best[]. */
872 unsigned map_generation;
874 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
875 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
876 me_cmp_func cmpf, chroma_cmpf;
882 penalty_factor= c->pre_penalty_factor;
883 cmpf= s->dsp.me_pre_cmp[size];
884 chroma_cmpf= s->dsp.me_pre_cmp[size+1];
886 penalty_factor= c->penalty_factor;
887 cmpf= s->dsp.me_cmp[size];
888 chroma_cmpf= s->dsp.me_cmp[size+1];
891 map_generation= update_map_generation(c);
894 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
895 map[0]= map_generation;
898 //FIXME precalc first term below?
899 if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
900 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
903 if (s->first_slice_line) {
904 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
905 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
906 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
908 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
909 && ( P_LEFT[0] |P_LEFT[1]
911 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
917 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
918 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
919 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
920 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
921 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
922 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
923 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
924 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
925 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
926 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
930 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
931 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
932 if(!s->first_slice_line)
933 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
934 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
936 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
937 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
938 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
939 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
940 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
944 if(c->avctx->last_predictor_count){
945 const int count= c->avctx->last_predictor_count;
946 const int xstart= FFMAX(0, s->mb_x - count);
947 const int ystart= FFMAX(0, s->mb_y - count);
948 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
949 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
952 for(mb_y=ystart; mb_y<yend; mb_y++){
954 for(mb_x=xstart; mb_x<xend; mb_x++){
955 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
956 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
957 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
959 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
965 //check(best[0],best[1],0, b0)
966 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
968 //check(best[0],best[1],0, b1)
975 //this function is dedicated to the braindamaged gcc
976 int ff_epzs_motion_search(MpegEncContext *s, int *mx_ptr, int *my_ptr,
977 int P[10][2], int src_index, int ref_index,
978 int16_t (*last_mv)[2], int ref_mv_scale,
981 MotionEstContext * const c= &s->me;
982 //FIXME convert other functions in the same way if faster
983 if(c->flags==0 && h==16 && size==0){
984 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
986 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
988 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
992 static int epzs_motion_search4(MpegEncContext * s,
993 int *mx_ptr, int *my_ptr, int P[10][2],
994 int src_index, int ref_index, int16_t (*last_mv)[2],
997 MotionEstContext * const c= &s->me;
1000 unsigned map_generation;
1001 const int penalty_factor= c->penalty_factor;
1004 const int ref_mv_stride= s->mb_stride;
1005 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1006 me_cmp_func cmpf, chroma_cmpf;
1008 int flags= c->flags;
1011 cmpf= s->dsp.me_cmp[size];
1012 chroma_cmpf= s->dsp.me_cmp[size+1];
1014 map_generation= update_map_generation(c);
1019 if (s->first_slice_line) {
1020 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1021 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1022 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1023 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1025 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1026 //FIXME try some early stop
1027 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1028 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1029 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1030 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1031 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1032 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1035 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1036 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1037 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1038 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1039 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1042 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1050 //try to merge with above FIXME (needs PSNR test)
1051 static int epzs_motion_search2(MpegEncContext * s,
1052 int *mx_ptr, int *my_ptr, int P[10][2],
1053 int src_index, int ref_index, int16_t (*last_mv)[2],
1056 MotionEstContext * const c= &s->me;
1059 unsigned map_generation;
1060 const int penalty_factor= c->penalty_factor;
1061 const int size=0; //FIXME pass as arg
1063 const int ref_mv_stride= s->mb_stride;
1064 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1065 me_cmp_func cmpf, chroma_cmpf;
1067 int flags= c->flags;
1070 cmpf= s->dsp.me_cmp[size];
1071 chroma_cmpf= s->dsp.me_cmp[size+1];
1073 map_generation= update_map_generation(c);
1078 if (s->first_slice_line) {
1079 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1080 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1081 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1082 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1084 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1085 //FIXME try some early stop
1086 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1087 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1088 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1089 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1090 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1091 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1094 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1095 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1096 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1097 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1098 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1101 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);