3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * @file motion_est_template.c
25 * Motion estimation template.
28 //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
30 uint32_t av_unused * const score_map= c->score_map;\
31 const int av_unused xmin= c->xmin;\
32 const int av_unused ymin= c->ymin;\
33 const int av_unused xmax= c->xmax;\
34 const int av_unused ymax= c->ymax;\
35 uint8_t *mv_penalty= c->current_mv_penalty;\
36 const int pred_x= c->pred_x;\
37 const int pred_y= c->pred_y;\
39 #define CHECK_HALF_MV(dx, dy, x, y)\
41 const int hx= 2*(x)+(dx);\
42 const int hy= 2*(y)+(dy);\
43 d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
44 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
45 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
49 static int hpel_motion_search)(MpegEncContext * s,
50 int *mx_ptr, int *my_ptr, int dmin,
54 const int xx = 16 * s->mb_x + 8*(n&1);
55 const int yy = 16 * s->mb_y + 8*(n>>1);
56 const int mx = *mx_ptr;
57 const int my = *my_ptr;
58 const int penalty_factor= c->sub_penalty_factor;
64 me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
66 if(s->no_rounding /*FIXME b_type*/){
67 hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
68 chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
70 hpel_put=& s->dsp.put_pixels_tab[size];
71 chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
73 cmpf= s->dsp.me_cmp[size];
74 chroma_cmpf= s->dsp.me_cmp[size+1];
75 cmp_sub= s->dsp.me_sub_cmp[size];
76 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
78 if(c->skip){ //FIXME somehow move up (benchmark)
84 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
85 CMP_HPEL(dmin, 0, 0, mx, my, size);
87 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
90 if (mx > xmin && mx < xmax &&
91 my > ymin && my < ymax) {
95 CHECK_HALF_MV(1, 1, mx-1, my-1)
96 CHECK_HALF_MV(0, 1, mx , my-1)
97 CHECK_HALF_MV(1, 1, mx , my-1)
98 CHECK_HALF_MV(1, 0, mx-1, my )
99 CHECK_HALF_MV(1, 0, mx , my )
100 CHECK_HALF_MV(1, 1, mx-1, my )
101 CHECK_HALF_MV(0, 1, mx , my )
102 CHECK_HALF_MV(1, 1, mx , my )
104 assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
117 static int hpel_motion_search(MpegEncContext * s,
118 int *mx_ptr, int *my_ptr, int dmin,
119 int src_index, int ref_index,
122 MotionEstContext * const c= &s->me;
123 const int mx = *mx_ptr;
124 const int my = *my_ptr;
125 const int penalty_factor= c->sub_penalty_factor;
126 me_cmp_func cmp_sub, chroma_cmp_sub;
127 int bx=2*mx, by=2*my;
130 int flags= c->sub_flags;
134 cmp_sub= s->dsp.me_sub_cmp[size];
135 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
137 if(c->skip){ //FIXME move out of hpel?
143 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
144 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
145 if(mx || my || size>0)
146 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
149 if (mx > xmin && mx < xmax &&
150 my > ymin && my < ymax) {
152 const int index= (my<<ME_MAP_SHIFT) + mx;
153 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
154 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
155 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
156 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
157 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
158 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*c->penalty_factor;
159 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
160 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
164 int map_generation= c->map_generation;
166 uint32_t *map= c->map;
168 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
169 assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
170 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
171 assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
172 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
173 assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
174 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
175 assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
178 CHECK_HALF_MV(0, 1, mx ,my-1)
180 CHECK_HALF_MV(1, 1, mx-1, my-1)
182 CHECK_HALF_MV(1, 1, mx , my-1)
184 CHECK_HALF_MV(1, 1, mx-1, my )
186 CHECK_HALF_MV(1, 0, mx-1, my )
188 CHECK_HALF_MV(1, 1, mx , my-1)
190 CHECK_HALF_MV(1, 1, mx-1, my-1)
192 CHECK_HALF_MV(1, 1, mx , my )
194 CHECK_HALF_MV(1, 0, mx , my )
199 CHECK_HALF_MV(1, 1, mx-1, my-1)
201 CHECK_HALF_MV(1, 1, mx , my )
203 CHECK_HALF_MV(1, 0, mx-1, my)
204 CHECK_HALF_MV(1, 1, mx-1, my)
207 CHECK_HALF_MV(1, 1, mx , my-1)
209 CHECK_HALF_MV(1, 1, mx-1, my)
211 CHECK_HALF_MV(1, 0, mx , my)
212 CHECK_HALF_MV(1, 1, mx , my)
214 CHECK_HALF_MV(0, 1, mx , my)
216 assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
226 static int no_sub_motion_search(MpegEncContext * s,
227 int *mx_ptr, int *my_ptr, int dmin,
228 int src_index, int ref_index,
236 inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
237 int ref_index, int size, int h, int add_rate)
239 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
240 MotionEstContext * const c= &s->me;
241 const int penalty_factor= c->mb_penalty_factor;
242 const int flags= c->mb_flags;
243 const int qpel= flags & FLAG_QPEL;
244 const int mask= 1+2*qpel;
245 me_cmp_func cmp_sub, chroma_cmp_sub;
252 cmp_sub= s->dsp.mb_cmp[size];
253 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
256 // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
258 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
259 //FIXME check cbp before adding penalty for (0,0) vector
260 if(add_rate && (mx || my || size>0))
261 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
266 #define CHECK_QUARTER_MV(dx, dy, x, y)\
268 const int hx= 4*(x)+(dx);\
269 const int hy= 4*(y)+(dy);\
270 d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
271 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
272 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
275 static int qpel_motion_search(MpegEncContext * s,
276 int *mx_ptr, int *my_ptr, int dmin,
277 int src_index, int ref_index,
280 MotionEstContext * const c= &s->me;
281 const int mx = *mx_ptr;
282 const int my = *my_ptr;
283 const int penalty_factor= c->sub_penalty_factor;
284 const int map_generation= c->map_generation;
285 const int subpel_quality= c->avctx->me_subpel_quality;
286 uint32_t *map= c->map;
287 me_cmp_func cmpf, chroma_cmpf;
288 me_cmp_func cmp_sub, chroma_cmp_sub;
291 int flags= c->sub_flags;
293 cmpf= s->dsp.me_cmp[size];
294 chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
297 cmp_sub= s->dsp.me_sub_cmp[size];
298 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
300 if(c->skip){ //FIXME somehow move up (benchmark)
306 if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
307 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
308 if(mx || my || size>0)
309 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
312 if (mx > xmin && mx < xmax &&
313 my > ymin && my < ymax) {
314 int bx=4*mx, by=4*my;
317 const int index= (my<<ME_MAP_SHIFT) + mx;
318 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
319 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
320 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
321 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
322 const int c= score_map[(index )&(ME_MAP_SIZE-1)];
326 memset(best, 64, sizeof(int)*8);
328 if(s->me.dia_size>=2){
329 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
330 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
331 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
332 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
334 for(ny= -3; ny <= 3; ny++){
335 for(nx= -3; nx <= 3; nx++){
336 //FIXME this could overflow (unlikely though)
337 const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
338 const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
339 const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
340 int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
343 if((nx&3)==0 && (ny&3)==0) continue;
345 score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
347 // if(nx&1) score-=1024*c->penalty_factor;
348 // if(ny&1) score-=1024*c->penalty_factor;
352 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
353 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
355 best_pos[i][0]= nx + 4*mx;
356 best_pos[i][1]= ny + 4*my;
364 //FIXME this could overflow (unlikely though)
365 const int cx = 4*(r - l);
366 const int cx2= r + l - 2*c;
367 const int cy = 4*(b - t);
368 const int cy2= b + t - 2*c;
371 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
372 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
374 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
377 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
379 assert(16*cx2 + 4*cx + 32*c == 32*r);
380 assert(16*cx2 - 4*cx + 32*c == 32*l);
381 assert(16*cy2 + 4*cy + 32*c == 32*b);
382 assert(16*cy2 - 4*cy + 32*c == 32*t);
383 assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
385 for(ny= -3; ny <= 3; ny++){
386 for(nx= -3; nx <= 3; nx++){
387 //FIXME this could overflow (unlikely though)
388 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
391 if((nx&3)==0 && (ny&3)==0) continue;
393 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
394 // if(nx&1) score-=32*c->penalty_factor;
395 // if(ny&1) score-=32*c->penalty_factor;
399 memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
400 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
402 best_pos[i][0]= nx + 4*mx;
403 best_pos[i][1]= ny + 4*my;
410 for(i=0; i<subpel_quality; i++){
413 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
417 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
418 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
419 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
420 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
421 // if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
424 // nx= FFMAX(4*mx - bx, bx - 4*mx);
425 // ny= FFMAX(4*my - by, by - 4*my);
427 static int stats[7][7], count;
429 stats[4*mx - bx + 3][4*my - by + 3]++;
430 if(256*256*256*64 % count ==0){
432 if((i%7)==0) printf("\n");
433 printf("%6d ", stats[0][i]);
441 CHECK_QUARTER_MV(2, 2, mx-1, my-1)
442 CHECK_QUARTER_MV(0, 2, mx , my-1)
443 CHECK_QUARTER_MV(2, 2, mx , my-1)
444 CHECK_QUARTER_MV(2, 0, mx , my )
445 CHECK_QUARTER_MV(2, 2, mx , my )
446 CHECK_QUARTER_MV(0, 2, mx , my )
447 CHECK_QUARTER_MV(2, 2, mx-1, my )
448 CHECK_QUARTER_MV(2, 0, mx-1, my )
454 int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
455 int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
456 CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
461 CHECK_QUARTER_MV(1, 3, mx-1, my-1)
462 CHECK_QUARTER_MV(1, 2, mx-1, my-1)
463 CHECK_QUARTER_MV(1, 1, mx-1, my-1)
464 CHECK_QUARTER_MV(2, 1, mx-1, my-1)
465 CHECK_QUARTER_MV(3, 1, mx-1, my-1)
466 CHECK_QUARTER_MV(0, 1, mx , my-1)
467 CHECK_QUARTER_MV(1, 1, mx , my-1)
468 CHECK_QUARTER_MV(2, 1, mx , my-1)
469 CHECK_QUARTER_MV(3, 1, mx , my-1)
470 CHECK_QUARTER_MV(3, 2, mx , my-1)
471 CHECK_QUARTER_MV(3, 3, mx , my-1)
472 CHECK_QUARTER_MV(3, 0, mx , my )
473 CHECK_QUARTER_MV(3, 1, mx , my )
474 CHECK_QUARTER_MV(3, 2, mx , my )
475 CHECK_QUARTER_MV(3, 3, mx , my )
476 CHECK_QUARTER_MV(2, 3, mx , my )
477 CHECK_QUARTER_MV(1, 3, mx , my )
478 CHECK_QUARTER_MV(0, 3, mx , my )
479 CHECK_QUARTER_MV(3, 3, mx-1, my )
480 CHECK_QUARTER_MV(2, 3, mx-1, my )
481 CHECK_QUARTER_MV(1, 3, mx-1, my )
482 CHECK_QUARTER_MV(1, 2, mx-1, my )
483 CHECK_QUARTER_MV(1, 1, mx-1, my )
484 CHECK_QUARTER_MV(1, 0, mx-1, my )
486 assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
499 #define CHECK_MV(x,y)\
501 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
502 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
503 assert((x) >= xmin);\
504 assert((x) <= xmax);\
505 assert((y) >= ymin);\
506 assert((y) <= ymax);\
507 /*printf("check_mv %d %d\n", x, y);*/\
508 if(map[index]!=key){\
509 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
511 score_map[index]= d;\
512 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
513 /*printf("score:%d\n", d);*/\
514 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
518 #define CHECK_CLIPPED_MV(ax,ay)\
522 const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
523 const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
527 #define CHECK_MV_DIR(x,y,new_dir)\
529 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
530 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
531 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
532 if(map[index]!=key){\
533 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
535 score_map[index]= d;\
536 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
537 /*printf("score:%d\n", d);*/\
547 #define check(x,y,S,v)\
548 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
549 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
550 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
551 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
553 #define LOAD_COMMON2\
554 uint32_t *map= c->map;\
555 const int qpel= flags&FLAG_QPEL;\
556 const int shift= 1+qpel;\
558 static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
559 int src_index, int ref_index, int const penalty_factor,
560 int size, int h, int flags)
562 MotionEstContext * const c= &s->me;
563 me_cmp_func cmpf, chroma_cmpf;
567 int map_generation= c->map_generation;
569 cmpf= s->dsp.me_cmp[size];
570 chroma_cmpf= s->dsp.me_cmp[size+1];
572 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
573 const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
574 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
575 if(map[index]!=key){ //this will be executed only very rarey
576 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
583 const int dir= next_dir;
584 const int x= best[0];
585 const int y= best[1];
589 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
590 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
591 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
592 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
600 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
601 int src_index, int ref_index, int const penalty_factor,
602 int size, int h, int flags)
604 MotionEstContext * const c= &s->me;
605 me_cmp_func cmpf, chroma_cmpf;
609 int map_generation= c->map_generation;
611 cmpf= s->dsp.me_cmp[size];
612 chroma_cmpf= s->dsp.me_cmp[size+1];
614 for(dia_size=1; dia_size<=4; dia_size++){
616 const int x= best[0];
617 const int y= best[1];
619 if(dia_size&(dia_size-1)) continue;
621 if( x + dia_size > xmax
622 || x - dia_size < xmin
623 || y + dia_size > ymax
624 || y - dia_size < ymin)
627 for(dir= 0; dir<dia_size; dir+=2){
630 CHECK_MV(x + dir , y + dia_size - dir);
631 CHECK_MV(x + dia_size - dir, y - dir );
632 CHECK_MV(x - dir , y - dia_size + dir);
633 CHECK_MV(x - dia_size + dir, y + dir );
636 if(x!=best[0] || y!=best[1])
641 static int stats[8*8];
642 dx= FFABS(x-best[0]);
643 dy= FFABS(y-best[1]);
645 dx^=dy; dy^=dx; dx^=dy;
648 if(256*256*256*64 % (stats[0]+1)==0){
650 if((i&7)==0) printf("\n");
651 printf("%8d ", stats[i]);
661 static int hex_search(MpegEncContext * s, int *best, int dmin,
662 int src_index, int ref_index, int const penalty_factor,
663 int size, int h, int flags, int dia_size)
665 MotionEstContext * const c= &s->me;
666 me_cmp_func cmpf, chroma_cmpf;
669 int map_generation= c->map_generation;
671 const int dec= dia_size & (dia_size-1);
673 cmpf= s->dsp.me_cmp[size];
674 chroma_cmpf= s->dsp.me_cmp[size+1];
676 for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
681 CHECK_CLIPPED_MV(x -dia_size , y);
682 CHECK_CLIPPED_MV(x+ dia_size , y);
683 CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
684 CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
686 CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
687 CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
689 }while(best[0] != x || best[1] != y);
695 static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
696 int src_index, int ref_index, int const penalty_factor,
697 int size, int h, int flags)
699 MotionEstContext * const c= &s->me;
700 me_cmp_func cmpf, chroma_cmpf;
703 int map_generation= c->map_generation;
705 int dia_size= c->dia_size&0xFF;
706 const int dec= dia_size & (dia_size-1);
707 static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
708 { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
710 cmpf= s->dsp.me_cmp[size];
711 chroma_cmpf= s->dsp.me_cmp[size+1];
713 for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
718 CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
720 }while(best[0] != x || best[1] != y);
725 CHECK_CLIPPED_MV(x+1, y);
726 CHECK_CLIPPED_MV(x, y+1);
727 CHECK_CLIPPED_MV(x-1, y);
728 CHECK_CLIPPED_MV(x, y-1);
733 static int umh_search(MpegEncContext * s, int *best, int dmin,
734 int src_index, int ref_index, int const penalty_factor,
735 int size, int h, int flags)
737 MotionEstContext * const c= &s->me;
738 me_cmp_func cmpf, chroma_cmpf;
741 int map_generation= c->map_generation;
742 int x,y,x2,y2, i, j, d;
743 const int dia_size= c->dia_size&0xFE;
744 static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
745 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
746 {-2, 3}, { 0, 4}, { 2, 3},
747 {-2,-3}, { 0,-4}, { 2,-3},};
749 cmpf= s->dsp.me_cmp[size];
750 chroma_cmpf= s->dsp.me_cmp[size+1];
754 for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
757 for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
763 for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
764 for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
769 //FIXME prevent the CLIP stuff
771 for(j=1; j<=dia_size/4; j++){
773 CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
777 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
780 #define SAB_CHECK_MV(ax,ay)\
782 const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
783 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
784 /*printf("sab check %d %d\n", ax, ay);*/\
785 if(map[index]!=key){\
786 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
788 score_map[index]= d;\
789 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
790 /*printf("score: %d\n", d);*/\
791 if(d < minima[minima_count-1].height){\
794 while(d >= minima[j].height) j++;\
796 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
798 minima[j].checked= 0;\
799 minima[j].height= d;\
809 #define MAX_SAB_SIZE ME_MAP_SIZE
810 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
811 int src_index, int ref_index, int const penalty_factor,
812 int size, int h, int flags)
814 MotionEstContext * const c= &s->me;
815 me_cmp_func cmpf, chroma_cmpf;
816 Minima minima[MAX_SAB_SIZE];
817 const int minima_count= FFABS(c->dia_size);
821 int map_generation= c->map_generation;
823 cmpf= s->dsp.me_cmp[size];
824 chroma_cmpf= s->dsp.me_cmp[size+1];
826 /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
827 become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
829 for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
830 uint32_t key= map[i];
832 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
834 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
836 minima[j].height= score_map[i];
837 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
838 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
839 minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
840 minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
842 // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
843 if( minima[j].x > xmax || minima[j].x < xmin
844 || minima[j].y > ymax || minima[j].y < ymin)
848 if(minima[j].x || minima[j].y)
849 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
854 qsort(minima, j, sizeof(Minima), minima_cmp);
856 for(; j<minima_count; j++){
857 minima[j].height=256*256*256*64;
859 minima[j].x= minima[j].y=0;
862 for(i=0; i<minima_count; i++){
863 const int x= minima[i].x;
864 const int y= minima[i].y;
867 if(minima[i].checked) continue;
869 if( x >= xmax || x <= xmin
870 || y >= ymax || y <= ymin)
875 SAB_CHECK_MV(x , y-1)
876 SAB_CHECK_MV(x , y+1)
878 minima[i].checked= 1;
881 best[0]= minima[0].x;
882 best[1]= minima[0].y;
883 dmin= minima[0].height;
885 if( best[0] < xmax && best[0] > xmin
886 && best[1] < ymax && best[1] > ymin){
888 //ensure that the refernece samples for hpel refinement are in the map
889 CHECK_MV(best[0]-1, best[1])
890 CHECK_MV(best[0]+1, best[1])
891 CHECK_MV(best[0], best[1]-1)
892 CHECK_MV(best[0], best[1]+1)
897 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
898 int src_index, int ref_index, int const penalty_factor,
899 int size, int h, int flags)
901 MotionEstContext * const c= &s->me;
902 me_cmp_func cmpf, chroma_cmpf;
906 int map_generation= c->map_generation;
908 cmpf= s->dsp.me_cmp[size];
909 chroma_cmpf= s->dsp.me_cmp[size+1];
911 for(dia_size=1; dia_size<=c->dia_size; dia_size++){
913 const int x= best[0];
914 const int y= best[1];
916 start= FFMAX(0, y + dia_size - ymax);
917 end = FFMIN(dia_size, xmax - x + 1);
918 for(dir= start; dir<end; dir++){
921 //check(x + dir,y + dia_size - dir,0, a0)
922 CHECK_MV(x + dir , y + dia_size - dir);
925 start= FFMAX(0, x + dia_size - xmax);
926 end = FFMIN(dia_size, y - ymin + 1);
927 for(dir= start; dir<end; dir++){
930 //check(x + dia_size - dir, y - dir,0, a1)
931 CHECK_MV(x + dia_size - dir, y - dir );
934 start= FFMAX(0, -y + dia_size + ymin );
935 end = FFMIN(dia_size, x - xmin + 1);
936 for(dir= start; dir<end; dir++){
939 //check(x - dir,y - dia_size + dir,0, a2)
940 CHECK_MV(x - dir , y - dia_size + dir);
943 start= FFMAX(0, -x + dia_size + xmin );
944 end = FFMIN(dia_size, ymax - y + 1);
945 for(dir= start; dir<end; dir++){
948 //check(x - dia_size + dir, y + dir,0, a3)
949 CHECK_MV(x - dia_size + dir, y + dir );
952 if(x!=best[0] || y!=best[1])
957 static int stats[8*8];
958 dx= FFABS(x-best[0]);
959 dy= FFABS(y-best[1]);
961 if(256*256*256*64 % (stats[0]+1)==0){
963 if((i&7)==0) printf("\n");
964 printf("%6d ", stats[i]);
974 static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
975 int src_index, int ref_index, int const penalty_factor,
976 int size, int h, int flags){
977 MotionEstContext * const c= &s->me;
979 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
980 else if(c->dia_size<-1)
981 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
982 else if(c->dia_size<2)
983 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
984 else if(c->dia_size>768)
985 return umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
986 else if(c->dia_size>512)
987 return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
988 else if(c->dia_size>256)
989 return l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
991 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
995 \param P[10][2] a list of candidate mvs to check before starting the
996 iterative search. If one of the candidates is close to the optimal mv, then
997 it takes fewer iterations. And it increases the chance that we find the
1000 static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1001 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1002 int ref_mv_scale, int flags, int size, int h)
1004 MotionEstContext * const c= &s->me;
1005 int best[2]={0, 0}; /*!< x and y coordinates of the best motion vector.
1006 i.e. the difference between the position of the
1007 block currently being encoded and the position of
1008 the block chosen to predict it from. */
1009 int d; ///< the score (cmp + penalty) of any given mv
1010 int dmin; /*!< the best value of d, i.e. the score
1011 corresponding to the mv stored in best[]. */
1014 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
1015 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
1016 me_cmp_func cmpf, chroma_cmpf;
1022 penalty_factor= c->pre_penalty_factor;
1023 cmpf= s->dsp.me_pre_cmp[size];
1024 chroma_cmpf= s->dsp.me_pre_cmp[size+1];
1026 penalty_factor= c->penalty_factor;
1027 cmpf= s->dsp.me_cmp[size];
1028 chroma_cmpf= s->dsp.me_cmp[size+1];
1031 map_generation= update_map_generation(c);
1034 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
1035 map[0]= map_generation;
1038 //FIXME precalc first term below?
1039 if((s->pict_type == B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
1040 dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
1043 if (s->first_slice_line) {
1044 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1045 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1046 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1048 if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
1049 && ( P_LEFT[0] |P_LEFT[1]
1051 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
1057 CHECK_MV( P_MEDIAN[0] >>shift , P_MEDIAN[1] >>shift)
1058 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)-1)
1059 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift) , (P_MEDIAN[1]>>shift)+1)
1060 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift) )
1061 CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift) )
1062 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1063 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1064 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
1065 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
1066 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1070 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
1071 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
1072 if(!s->first_slice_line)
1073 CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1074 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1076 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1077 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1078 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1079 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1080 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1084 if(c->avctx->last_predictor_count){
1085 const int count= c->avctx->last_predictor_count;
1086 const int xstart= FFMAX(0, s->mb_x - count);
1087 const int ystart= FFMAX(0, s->mb_y - count);
1088 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
1089 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
1092 for(mb_y=ystart; mb_y<yend; mb_y++){
1094 for(mb_x=xstart; mb_x<xend; mb_x++){
1095 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
1096 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
1097 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
1099 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
1105 //check(best[0],best[1],0, b0)
1106 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1108 //check(best[0],best[1],0, b1)
1112 // printf("%d %d %d \n", best[0], best[1], dmin);
1116 //this function is dedicated to the braindamaged gcc
1117 inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1118 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1119 int ref_mv_scale, int size, int h)
1121 MotionEstContext * const c= &s->me;
1122 //FIXME convert other functions in the same way if faster
1123 if(c->flags==0 && h==16 && size==0){
1124 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
1126 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
1128 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
1132 static int epzs_motion_search4(MpegEncContext * s,
1133 int *mx_ptr, int *my_ptr, int P[10][2],
1134 int src_index, int ref_index, int16_t (*last_mv)[2],
1137 MotionEstContext * const c= &s->me;
1141 const int penalty_factor= c->penalty_factor;
1144 const int ref_mv_stride= s->mb_stride;
1145 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1146 me_cmp_func cmpf, chroma_cmpf;
1148 int flags= c->flags;
1151 cmpf= s->dsp.me_cmp[size];
1152 chroma_cmpf= s->dsp.me_cmp[size+1];
1154 map_generation= update_map_generation(c);
1157 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1159 if (s->first_slice_line) {
1160 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1161 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1162 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1163 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1165 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1166 //FIXME try some early stop
1167 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1168 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1169 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1170 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1171 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1172 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1175 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1176 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1177 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1178 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1179 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1182 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1187 // printf("%d %d %d \n", best[0], best[1], dmin);
1191 //try to merge with above FIXME (needs PSNR test)
1192 static int epzs_motion_search2(MpegEncContext * s,
1193 int *mx_ptr, int *my_ptr, int P[10][2],
1194 int src_index, int ref_index, int16_t (*last_mv)[2],
1197 MotionEstContext * const c= &s->me;
1201 const int penalty_factor= c->penalty_factor;
1202 const int size=0; //FIXME pass as arg
1204 const int ref_mv_stride= s->mb_stride;
1205 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1206 me_cmp_func cmpf, chroma_cmpf;
1208 int flags= c->flags;
1211 cmpf= s->dsp.me_cmp[size];
1212 chroma_cmpf= s->dsp.me_cmp[size+1];
1214 map_generation= update_map_generation(c);
1217 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1219 if (s->first_slice_line) {
1220 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1221 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1222 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1223 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1225 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1226 //FIXME try some early stop
1227 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1228 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1229 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1230 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1231 CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1232 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1235 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1236 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1237 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1238 CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1239 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1242 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1247 // printf("%d %d %d \n", best[0], best[1], dmin);