]> git.sesse.net Git - mlt/blob - src/modules/motion_est/filter_motion_est.c
Massive refactoring of image conversion.
[mlt] / src / modules / motion_est / filter_motion_est.c
1 /*
2  *      /brief fast motion estimation filter
3  *      /author Zachary Drew, Copyright 2005
4  *
5  *      Currently only uses Gamma data for comparisonon (bug or feature?)
6  *      SSE optimized where available.
7  *
8  *      Vector orientation: The vector data that is generated for the current frame specifies
9  *      the motion from the previous frame to the current frame. To know how a macroblock
10  *      in the current frame will move in the future, the next frame is needed.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software Foundation,
24  * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25  */
26
27
28 #include "filter_motion_est.h"
29 #include <framework/mlt.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <math.h>
33 #include <string.h>
34 #include <sys/time.h>
35 #include <unistd.h>
36
37 #ifdef USE_SSE
38 #include "sad_sse.h"
39 #endif
40
41 #define NDEBUG
42 #include <assert.h>
43
44 #undef DEBUG
45 #undef DEBUG_ASM
46 #undef BENCHMARK
47 #undef COUNT_COMPARES
48
49 #define DIAMOND_SEARCH 0x0
50 #define FULL_SEARCH 0x1
51 #define SHIFT 8
52 #define MIN(a,b) ((a) > (b) ? (b) : (a))
53 #define ABS(a) ((a) >= 0 ? (a) : (-(a)))
54
55
56 struct motion_est_context_s
57 {
58         int initialized;                                // true if filter has been initialized
59
60 #ifdef COUNT_COMPARES
61         int compares;
62 #endif
63
64         /* same as mlt_frame's parameters */
65         int width, height;
66
67         /* Operational details */
68         int mb_w, mb_h;
69         int xstride, ystride;
70         uint8_t *cache_image;                   // Copy of current frame
71         uint8_t *former_image;                  // Copy of former frame
72         int search_method;
73         int skip_prediction;
74         int shot_change;
75         int limit_x, limit_y;                   // max x and y of a motion vector
76         int initial_thresh;
77         int check_chroma;                       // if check_chroma == 1 then compare chroma
78         int denoise;
79         int previous_msad;
80         int show_reconstruction;
81         int toggle_when_paused;
82         int show_residual;
83
84         /* bounds */
85         struct mlt_geometry_item_s bounds;      // Current bounds (from filters crop_detect, autotrack rectangle, or other)
86
87         /* bounds in macroblock units; macroblocks are completely contained within the boundry */
88         int left_mb, prev_left_mb, right_mb, prev_right_mb;
89         int top_mb, prev_top_mb, bottom_mb, prev_bottom_mb;
90
91         /* size of our vector buffers */
92         int mv_buffer_height, mv_buffer_width, mv_size;
93
94         /* vector buffers */
95         int former_vectors_valid;               //<! true if the previous frame's buffered motion vector data is valid
96         motion_vector *former_vectors;
97         motion_vector *current_vectors;
98         motion_vector *denoise_vectors;
99         mlt_position former_frame_position, current_frame_position;
100
101         /* diagnostic metrics */
102         float predictive_misses;                // How often do the prediction motion vectors fail?
103         int comparison_average;                 // How far does the best estimation deviate from a perfect comparison?
104         int bad_comparisons;
105         int average_length;
106         int average_x, average_y;
107
108         /* run-time configurable comparison functions */
109         int (*compare_reference)(uint8_t *, uint8_t *, int, int, int, int);
110         int (*compare_optimized)(uint8_t *, uint8_t *, int, int, int, int);
111
112 };
113
114 // This is used to constrains pixel operations between two blocks to be within the image boundry
115 inline static int constrain(    int *x, int *y, int *w, int *h,
116                                 const int dx, const int dy,
117                                 const int left, const int right,
118                                 const int top, const int bottom)
119 {
120         uint32_t penalty = 1 << SHIFT;                  // Retain a few extra bits of precision
121         int x2 = *x + dx;
122         int y2 = *y + dy;
123         int w_remains = *w;
124         int h_remains = *h;
125
126         // Origin of macroblock moves left of image boundy
127         if( *x < left || x2 < left ) {
128                 w_remains = *w - left + ((*x < x2) ?  *x : x2);
129                 *x += *w - w_remains;
130         }
131         // Portion of macroblock moves right of image boundry
132         else if( *x + *w > right || x2 + *w > right )
133                 w_remains = right - ((*x > x2) ? *x : x2);
134
135         // Origin of macroblock moves above image boundy
136         if( *y < top || y2 < top ) {
137                 h_remains = *h - top + ((*y < y2) ? *y : y2);
138                 *y += *h - h_remains;
139         }
140         // Portion of macroblock moves bellow image boundry
141         else if( *y + *h > bottom || y2 + *h > bottom )
142                 h_remains = bottom - ((*y > y2) ?  *y : y2);
143
144         if( w_remains == *w && h_remains == *h ) return penalty;
145         if( w_remains <= 0 || h_remains <= 0) return 0; // Block is clipped out of existance
146         penalty = (*w * *h * penalty)
147                 / ( w_remains * h_remains);             // Recipricol of the fraction of the block that remains
148
149         assert(*x >= left); assert(x2 + *w - w_remains >= left);
150         assert(*y >= top); assert(y2 + *h - h_remains >= top);
151         assert(*x + w_remains <= right); assert(x2 + w_remains <= right);
152         assert(*y + h_remains <= bottom); assert(y2 + h_remains <= bottom);
153
154         *w = w_remains;                                 // Update the width and height
155         *h = h_remains;
156
157         return penalty;
158 }
159
160 /** /brief Reference Sum of Absolute Differences comparison function
161 *
162 */
163 static int sad_reference( uint8_t *block1, uint8_t *block2, const int xstride, const int ystride, const int w, const int h )
164 {
165         int i, j, score = 0;
166         for ( j = 0; j < h; j++ ){
167                 for ( i = 0; i < w; i++ ){
168                         score += ABS( block1[i*xstride] - block2[i*xstride] );
169                 }
170                 block1 += ystride;
171                 block2 += ystride;
172         }
173
174         return score;
175 }
176
177
178 /** /brief Abstracted block comparison function
179 */
180 inline static int block_compare( uint8_t *block1,
181                            uint8_t *block2,
182                            int x,
183                            int y,
184                            int dx,
185                            int dy,
186                            struct motion_est_context_s *c)
187 {
188
189 #ifdef COUNT_COMPARES
190         c->compares++;
191 #endif
192
193         int score;
194
195         // Default comparison may be overridden by the slower, more capable reference comparison
196         int (*cmp)(uint8_t *, uint8_t *, int, int, int, int) = c->compare_optimized;
197
198         // vector displacement limited has been exceeded
199         if( ABS( dx ) >= c->limit_x || ABS( dy ) >= c->limit_y )
200                 return MAX_MSAD;
201
202         int mb_w = c->mb_w;     // Some writeable local copies
203         int mb_h = c->mb_h;
204
205         // Determine if either macroblock got clipped
206         int penalty = constrain( &x, &y, &mb_w, &mb_h, dx, dy, 0, c->width, 0, c->height);
207
208         // Some gotchas
209         if( penalty == 0 )                      // Clipped out of existance: Return worst score
210                 return MAX_MSAD;
211         else if( penalty != 1<<SHIFT )          // Nonstandard macroblock dimensions: Disable SIMD optimizizations.
212                 cmp = c->compare_reference;
213
214         // Calculate the memory locations of the macroblocks
215         block1 += x      * c->xstride + y       * c->ystride;
216         block2 += (x+dx) * c->xstride + (y+dy)  * c->ystride;
217
218         #ifdef DEBUG_ASM
219         if( penalty == 1<<SHIFT ){
220                 score = c->compare_reference( block1, block2, c->xstride, c->ystride, mb_w, mb_h );
221                 int score2 = c->compare_optimized( block1, block2, c->xstride, c->ystride, mb_w, mb_h );
222                 if ( score != score2 )
223                         fprintf(stderr, "Your assembly doesn't work! Reference: %d Asm: %d\n", score, score2);
224         }
225         else
226         #endif
227
228         score = cmp( block1, block2, c->xstride, c->ystride, mb_w, mb_h );
229
230         return ( score * penalty ) >> SHIFT;                    // Ditch the extra precision
231 }
232
233 static inline void check_candidates (   uint8_t *ref,
234                                         uint8_t *candidate_base,
235                                         const int x,
236                                         const int y,
237                                         const motion_vector *candidates,// Contains to_x & to_y
238                                         const int count,                // Number of candidates
239                                         const int unique,               // Sometimes we know the candidates are unique
240                                         motion_vector *result,
241                                         struct motion_est_context_s *c )
242 {
243                 int score, i, j;
244                 /* Scan for the best candidate */
245                 for ( i = 0; i < count; i++ )
246                 {
247                         // this little dohicky ignores duplicate candidates, if they are possible
248                         if ( unique == 0 ) {
249                                 j = 0;
250                                 while ( j < i )
251                                 {
252                                         if ( candidates[j].dx == candidates[i].dx &&
253                                              candidates[j].dy == candidates[i].dy )
254                                                         goto next_for_loop;
255
256                                         j++;
257                                 }
258                         }
259
260                         // Luma
261                         score = block_compare( ref, candidate_base,
262                                                 x, y,
263                                                 candidates[i].dx,       // from
264                                                 candidates[i].dy,
265                                                 c);
266
267                         if ( score < result->msad ) {   // New minimum
268                                 result->dx = candidates[i].dx;
269                                 result->dy = candidates[i].dy;
270                                 result->msad = score;
271                         }
272                         next_for_loop:;
273                 }
274 }
275
276 /* /brief Diamond search
277 * Operates on a single macroblock
278 */
279 static inline void diamond_search(
280                         uint8_t *ref,                           //<! Image data from previous frame
281                         uint8_t *candidate_base,                //<! Image data in current frame
282                         const int x,                            //<! X upper left corner of macroblock
283                         const int y,                            //<! U upper left corner of macroblock
284                         struct motion_vector_s *result,         //<! Best predicted mv and eventual result
285                         struct motion_est_context_s *c)         //<! motion estimation context
286 {
287
288         // diamond search pattern
289         motion_vector candidates[4];
290
291         // Keep track of best and former best candidates
292         motion_vector best, former;
293         best.dx = 0;
294         best.dy = 0;
295         former.dx = 0;
296         former.dy = 0;
297
298         // The direction of the refinement needs to be known
299         motion_vector current;
300
301         int i, first = 1;
302
303         // Loop through the search pattern
304         while( 1 ) {
305
306                 current.dx = result->dx;
307                 current.dy = result->dy;
308
309                 if ( first == 1 )       // Set the initial pattern
310                 {
311                         candidates[0].dx = result->dx + 1; candidates[0].dy = result->dy + 0;
312                         candidates[1].dx = result->dx + 0; candidates[1].dy = result->dy + 1;
313                         candidates[2].dx = result->dx - 1; candidates[2].dy = result->dy + 0;
314                         candidates[3].dx = result->dx + 0; candidates[3].dy = result->dy - 1;
315                         i = 4;
316                 }
317                 else     // Construct the next portion of the search pattern
318                 {
319                         candidates[0].dx = result->dx + best.dx;
320                         candidates[0].dy = result->dy + best.dy;
321                         if (best.dx == former.dx && best.dy == former.dy) {
322                                 candidates[1].dx = result->dx + best.dy;
323                                 candidates[1].dy = result->dy + best.dx;                //      Yes, the wires
324                                 candidates[2].dx = result->dx - best.dy;                //      are crossed
325                                 candidates[2].dy = result->dy - best.dx;
326                                 i = 3;
327                         } else {
328                                 candidates[1].dx = result->dx + former.dx;
329                                 candidates[1].dy = result->dy + former.dy;
330                                 i = 2;
331                         }
332
333                         former.dx = best.dx; former.dy = best.dy;       // Keep track of new former best
334                 }
335
336                 check_candidates ( ref, candidate_base, x, y, candidates, i, 1, result, c );
337
338                 // Which candidate was the best?
339                 best.dx = result->dx - current.dx;
340                 best.dy = result->dy - current.dy;
341
342                 // A better canidate was not found
343                 if ( best.dx == 0 && best.dy == 0 )
344                         return;
345
346                 if ( first == 1 ){
347                         first = 0;
348                         former.dx = best.dx; former.dy = best.dy; // First iteration, sensible value for former.d*
349                 }
350         }
351 }
352
353 /* /brief Full (brute) search
354 * Operates on a single macroblock
355 */
356 __attribute__((used))
357 static void full_search(
358                         uint8_t *ref,                           //<! Image data from previous frame
359                         uint8_t *candidate_base,                //<! Image data in current frame
360                         int x,                                  //<! X upper left corner of macroblock
361                         int y,                                  //<! U upper left corner of macroblock
362                         struct motion_vector_s *result,         //<! Best predicted mv and eventual result
363                         struct motion_est_context_s *c)         //<! motion estimation context
364 {
365         // Keep track of best candidate
366         int i,j,score;
367
368         // Go loopy
369         for( i = -c->mb_w; i <= c->mb_w; i++ ){
370                 for( j = -c->mb_h; j <=  c->mb_h; j++ ){
371
372                         score = block_compare( ref, candidate_base,
373                                                 x,
374                                                 y,
375                                                 x + i,
376                                                 y + j,
377                                                 c);
378
379                         if ( score < result->msad ) {
380                                 result->dx = i;
381                                 result->dy = j;
382                                 result->msad = score;
383                         }
384                 }
385         }
386 }
387
388 // Macros for pointer calculations
389 #define CURRENT(i,j)    ( c->current_vectors + (j)*c->mv_buffer_width + (i) )
390 #define FORMER(i,j)     ( c->former_vectors + (j)*c->mv_buffer_width + (i) )
391 #define DENOISE(i,j)    ( c->denoise_vectors + (j)*c->mv_buffer_width + (i) )
392
393 int ncompare (const void * a, const void * b)
394 {
395         return ( *(const int*)a - *(const int*)b );
396 }
397
398 // motion vector denoising
399 // for x and y components seperately,
400 // change the vector to be the median value of the 9 adjacent vectors
401 static void median_denoise( motion_vector *v, struct motion_est_context_s *c )
402 {
403         int xvalues[9], yvalues[9];
404
405         int i,j,n;
406         for( j = c->top_mb; j <= c->bottom_mb; j++ )
407                 for( i = c->left_mb; i <= c->right_mb; i++ ){
408                 {
409                         n = 0;
410
411                         xvalues[n  ] = CURRENT(i,j)->dx; // Center
412                         yvalues[n++] = CURRENT(i,j)->dy;
413
414                         if( i > c->left_mb ) // Not in First Column
415                         {
416                                 xvalues[n  ] = CURRENT(i-1,j)->dx; // Left
417                                 yvalues[n++] = CURRENT(i-1,j)->dy;
418
419                                 if( j > c->top_mb ) {
420                                         xvalues[n  ] = CURRENT(i-1,j-1)->dx; // Upper Left
421                                         yvalues[n++] = CURRENT(i-1,j-1)->dy;
422                                 }
423
424                                 if( j < c->bottom_mb ) {
425                                         xvalues[n  ] = CURRENT(i-1,j+1)->dx; // Bottom Left
426                                         yvalues[n++] = CURRENT(i-1,j+1)->dy;
427                                 }
428                         }
429                         if( i < c->right_mb ) // Not in Last Column
430                         {
431                                 xvalues[n  ] = CURRENT(i+1,j)->dx; // Right
432                                 yvalues[n++] = CURRENT(i+1,j)->dy;
433
434
435                                 if( j > c->top_mb ) {
436                                         xvalues[n  ] = CURRENT(i+1,j-1)->dx; // Upper Right
437                                         yvalues[n++] = CURRENT(i+1,j-1)->dy;
438                                 }
439
440                                 if( j < c->bottom_mb ) {
441                                         xvalues[n  ] = CURRENT(i+1,j+1)->dx; // Bottom Right
442                                         yvalues[n++] = CURRENT(i+1,j+1)->dy;
443                                 }
444                         }
445                         if( j > c->top_mb ) // Not in First Row
446                         {
447                                 xvalues[n  ] = CURRENT(i,j-1)->dx; // Top
448                                 yvalues[n++] = CURRENT(i,j-1)->dy;
449                         }
450
451                         if( j < c->bottom_mb ) // Not in Last Row
452                         {
453                                 xvalues[n  ] = CURRENT(i,j+1)->dx; // Bottom
454                                 yvalues[n++] = CURRENT(i,j+1)->dy;
455                         }
456
457                         qsort (xvalues, n, sizeof(int), ncompare);
458                         qsort (yvalues, n, sizeof(int), ncompare);
459
460                         if( n % 2 == 1 ) {
461                                 DENOISE(i,j)->dx = xvalues[n/2];
462                                 DENOISE(i,j)->dy = yvalues[n/2];
463                         }
464                         else {
465                                 DENOISE(i,j)->dx = (xvalues[n/2] + xvalues[n/2+1])/2;
466                                 DENOISE(i,j)->dy = (yvalues[n/2] + yvalues[n/2+1])/2;
467                         }
468                 }
469         }
470
471         motion_vector *t = c->current_vectors;
472         c->current_vectors = c->denoise_vectors;
473         c->denoise_vectors = t;
474
475 }
476
477 // Credits: ffmpeg
478 // return the median
479 static inline int median_predictor(int a, int b, int c) {
480         if ( a > b ){
481                 if ( c > b ){
482                         if ( c > a  ) b = a;
483                         else      b = c;
484                 }
485         } else {
486                 if ( b > c ){
487                         if ( c > a ) b = c;
488                         else     b = a;
489                 }
490         }
491         return b;
492 }
493
494
495 /** /brief Motion search
496 *
497 * For each macroblock in the current frame, estimate the block from the last frame that
498 * matches best.
499 *
500 * Vocab: Colocated - the pixel in the previous frame at the current position
501 *
502 * Based on enhanced predictive zonal search. [Tourapis 2002]
503 */
504 static void motion_search( uint8_t *from,                       //<! Image data.
505                            uint8_t *to,                         //<! Image data. Rigid grid.
506                            struct motion_est_context_s *c)      //<! The context
507 {
508
509 #ifdef COUNT_COMPARES
510         compares = 0;
511 #endif
512
513         motion_vector candidates[10];
514         motion_vector *here;            // This one gets used alot (about 30 times per macroblock)
515         int n = 0;
516
517         int i, j, count=0;
518
519         // For every macroblock, perform motion vector estimation
520         for( i = c->left_mb; i <= c->right_mb; i++ ){
521          for( j = c->top_mb; j <= c->bottom_mb; j++ ){
522
523                 here = CURRENT(i,j);
524                 here->valid = 1;
525                 here->color = 100;
526                 here->msad = MAX_MSAD;
527                 count++;
528                 n = 0;
529
530
531                 /* Stack the predictors [i.e. checked in reverse order] */
532
533                 /* Adjacent to collocated */
534                 if( c->former_vectors_valid )
535                 {
536                         // Top of colocated
537                         if( j > c->prev_top_mb ){// && COL_TOP->valid ){
538                                 candidates[n  ].dx = FORMER(i,j-1)->dx;
539                                 candidates[n++].dy = FORMER(i,j-1)->dy;
540                         }
541
542                         // Left of colocated
543                         if( i > c->prev_left_mb ){// && COL_LEFT->valid ){
544                                 candidates[n  ].dx = FORMER(i-1,j)->dx;
545                                 candidates[n++].dy = FORMER(i-1,j)->dy;
546                         }
547
548                         // Right of colocated
549                         if( i < c->prev_right_mb ){// && COL_RIGHT->valid ){
550                                 candidates[n  ].dx = FORMER(i+1,j)->dx;
551                                 candidates[n++].dy = FORMER(i+1,j)->dy;
552                         }
553
554                         // Bottom of colocated
555                         if( j < c->prev_bottom_mb ){// && COL_BOTTOM->valid ){
556                                 candidates[n  ].dx = FORMER(i,j+1)->dx;
557                                 candidates[n++].dy = FORMER(i,j+1)->dy;
558                         }
559
560                         // And finally, colocated
561                         candidates[n  ].dx = FORMER(i,j)->dx;
562                         candidates[n++].dy = FORMER(i,j)->dy;
563                 }
564
565                 // For macroblocks not in the top row
566                 if ( j > c->top_mb) {
567
568                         // Top if ( TOP->valid ) {
569                                 candidates[n  ].dx = CURRENT(i,j-1)->dx;
570                                 candidates[n++].dy = CURRENT(i,j-1)->dy;
571                         //}
572
573                         // Top-Right, macroblocks not in the right row
574                         if ( i < c->right_mb ){// && TOP_RIGHT->valid ) {
575                                 candidates[n  ].dx = CURRENT(i+1,j-1)->dx;
576                                 candidates[n++].dy = CURRENT(i+1,j-1)->dy;
577                         }
578                 }
579
580                 // Left, Macroblocks not in the left column
581                 if ( i > c->left_mb ){// && LEFT->valid ) {
582                         candidates[n  ].dx = CURRENT(i-1,j)->dx;
583                         candidates[n++].dy = CURRENT(i-1,j)->dy;
584                 }
585
586                 /* Median predictor vector (median of left, top, and top right adjacent vectors) */
587                 if ( i > c->left_mb && j > c->top_mb && i < c->right_mb
588                          )//&& LEFT->valid && TOP->valid && TOP_RIGHT->valid )
589                 {
590                         candidates[n  ].dx = median_predictor( CURRENT(i-1,j)->dx, CURRENT(i,j-1)->dx, CURRENT(i+1,j-1)->dx);
591                         candidates[n++].dy = median_predictor( CURRENT(i-1,j)->dy, CURRENT(i,j-1)->dy, CURRENT(i+1,j-1)->dy);
592                 }
593
594                 // Zero vector
595                 candidates[n  ].dx = 0;
596                 candidates[n++].dy = 0;
597
598                 int x = i * c->mb_w;
599                 int y = j * c->mb_h;
600                 check_candidates ( to, from, x, y, candidates, n, 0, here, c );
601
602
603 #ifndef FULLSEARCH
604                 diamond_search( to, from, x, y, here, c);
605 #else
606                 full_search( to, from, x, y, here, c);
607 #endif
608
609                 assert( x + c->mb_w + here->dx > 0 );   // All macroblocks must have area > 0
610                 assert( y + c->mb_h + here->dy > 0 );
611                 assert( x + here->dx < c->width );
612                 assert( y + here->dy < c->height );
613
614          } /* End column loop */
615         } /* End row loop */
616
617 #ifdef USE_SSE
618         asm volatile ( "emms" );
619 #endif
620
621 #ifdef COUNT_COMPARES
622         fprintf(stderr, "%d comparisons per block were made", compares/count);
623 #endif
624         return;
625 }
626
627 void collect_post_statistics( struct motion_est_context_s *c ) {
628
629         c->comparison_average = 0;
630         c->average_length = 0;
631         c->average_x = 0;
632         c->average_y = 0;
633
634         int i, j, count = 0;
635
636         for ( i = c->left_mb; i <= c->right_mb; i++ ){
637          for ( j = c->top_mb; j <= c->bottom_mb; j++ ){
638
639                 count++;
640                 c->comparison_average += CURRENT(i,j)->msad;
641                 c->average_x += CURRENT(i,j)->dx;
642                 c->average_y += CURRENT(i,j)->dy;
643
644
645          }
646         }
647
648         if ( count > 0 )
649         {
650                 c->comparison_average /= count;
651                 c->average_x /= count;
652                 c->average_y /= count;
653                 c->average_length = sqrt( c->average_x * c->average_x + c->average_y * c->average_y );
654         }
655
656 }
657
658 static void init_optimizations( struct motion_est_context_s *c )
659 {
660         switch(c->mb_w){
661 #ifdef USE_SSE
662                 case 4:  if(c->mb_h == 4)       c->compare_optimized = sad_sse_422_luma_4x4;
663                          else                           c->compare_optimized = sad_sse_422_luma_4w;
664                          break;
665                 case 8:  if(c->mb_h == 8)  c->compare_optimized = sad_sse_422_luma_8x8;
666                          else                           c->compare_optimized = sad_sse_422_luma_8w;
667                          break;
668                 case 16: if(c->mb_h == 16) c->compare_optimized = sad_sse_422_luma_16x16;
669                          else                           c->compare_optimized = sad_sse_422_luma_16w;
670                          break;
671                 case 32: if(c->mb_h == 32) c->compare_optimized = sad_sse_422_luma_32x32;
672                          else                           c->compare_optimized = sad_sse_422_luma_32w;
673                          break;
674                 case 64: c->compare_optimized = sad_sse_422_luma_64w;
675                          break;
676 #endif
677                 default: c->compare_optimized = sad_reference;
678                          break;
679         }
680 }
681
682 inline static void set_red(uint8_t *image, struct motion_est_context_s *c)
683 {
684         int n;
685         for( n = 0; n < c->width * c->height * 2; n+=4 )
686         {
687                 image[n]   = 79;
688                 image[n+1] = 91;
689                 image[n+2] = 79;
690                 image[n+3] = 237;
691         }
692
693 }
694
695 static void show_residual( uint8_t *result,  struct motion_est_context_s *c )
696 {
697         int i, j;
698         int x,y,w,h;
699         int dx, dy;
700         int tx,ty;
701         uint8_t *b, *r;
702
703 //      set_red(result,c);
704
705         for( j = c->top_mb; j <= c->bottom_mb; j++ ){
706          for( i = c->left_mb; i <= c->right_mb; i++ ){
707
708                 dx = CURRENT(i,j)->dx;
709                 dy = CURRENT(i,j)->dy;
710                 w = c->mb_w;
711                 h = c->mb_h;
712                 x = i * w;
713                 y = j * h;
714
715                 // Denoise function caused some blocks to be completely clipped, ignore them
716                 if (constrain( &x, &y, &w, &h, dx, dy, 0, c->width, 0, c->height) == 0 )
717                         continue;
718
719                 for( ty = y; ty < y + h ; ty++ ){
720                  for( tx = x; tx < x + w ; tx++ ){
721
722                         b = c->former_image +  (tx+dx)*c->xstride + (ty+dy)*c->ystride;
723                         r = result +            tx*c->xstride + ty*c->ystride;
724
725                         r[0] = 16 + ABS( r[0] - b[0] );
726
727                         if( dx % 2 == 0 )
728                                 r[1] = 128 + ABS( r[1] - b[1] );
729                         else
730                                 // FIXME: may exceed boundies
731                                 r[1] = 128 + ABS( r[1] - ( *(b-1) + b[3] ) /2 );
732                  }
733                 }
734          }
735         }
736 }
737
738 static void show_reconstruction( uint8_t *result, struct motion_est_context_s *c )
739 {
740         int i, j;
741         int x,y,w,h;
742         int dx,dy;
743         uint8_t *r, *s;
744         int tx,ty;
745
746         for( i = c->left_mb; i <= c->right_mb; i++ ){
747          for( j = c->top_mb; j <= c->bottom_mb; j++ ){
748
749                 dx = CURRENT(i,j)->dx;
750                 dy = CURRENT(i,j)->dy;
751                 w = c->mb_w;
752                 h = c->mb_h;
753                 x = i * w;
754                 y = j * h;
755
756                 // Denoise function caused some blocks to be completely clipped, ignore them
757                 if (constrain( &x, &y, &w, &h, dx, dy, 0, c->width, 0, c->height) == 0 )
758                         continue;
759
760                 for( ty = y; ty < y + h ; ty++ ){
761                  for( tx = x; tx < x + w ; tx++ ){
762
763                         r = result +          tx*c->xstride + ty*c->ystride;
764                         s = c->former_image + (tx+dx)*c->xstride + (ty+dy)*c->ystride;
765
766                         r[0] = s[0];
767
768                         if( dx % 2 == 0 )
769                                 r[1] = s[1];
770                         else
771                                 // FIXME: may exceed boundies
772                                 r[1] = ( *(s-1) + s[3] ) /2;
773                  }
774                 }
775          }
776         }
777 }
778
779 // Image stack(able) method
780 static int filter_get_image( mlt_frame frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
781 {
782         // Get the filter
783         mlt_filter filter = mlt_frame_pop_service( frame );
784
785         // Get the motion_est context object
786         struct motion_est_context_s *c = mlt_properties_get_data( MLT_FILTER_PROPERTIES( filter ), "context", NULL);
787
788
789         // Get the new image and frame number
790         *format = mlt_image_yuv422;
791         int error = mlt_frame_get_image( frame, image, format, width, height, 1 );
792
793         #ifdef BENCHMARK
794         struct timeval start; gettimeofday(&start, NULL );
795         #endif
796
797
798         if( error != 0 )
799                 mlt_properties_debug( MLT_FRAME_PROPERTIES(frame), "error after mlt_frame_get_image() in motion_est", stderr );
800
801         c->current_frame_position = mlt_frame_get_position( frame );
802
803         /* Context Initialization */
804         if ( c->initialized == 0 ) {
805
806                 // Get the filter properties object
807                 mlt_properties properties = mlt_filter_properties( filter );
808
809                 c->width = *width;
810                 c->height = *height;
811
812                 /* Get parameters that may have been overridden */
813                 if( mlt_properties_get( properties, "macroblock_width") != NULL )
814                         c->mb_w = mlt_properties_get_int( properties, "macroblock_width");
815
816                 if( mlt_properties_get( properties, "macroblock_height") != NULL )
817                         c->mb_h = mlt_properties_get_int( properties, "macroblock_height");
818
819                 if( mlt_properties_get( properties, "prediction_thresh") != NULL )
820                         c->initial_thresh = mlt_properties_get_int( properties, "prediction_thresh" );
821                 else
822                         c->initial_thresh = c->mb_w * c->mb_h;
823
824                 if( mlt_properties_get( properties, "search_method") != NULL )
825                         c->search_method = mlt_properties_get_int( properties, "search_method");
826
827                 if( mlt_properties_get( properties, "skip_prediction") != NULL )
828                         c->skip_prediction = mlt_properties_get_int( properties, "skip_prediction");
829
830                 if( mlt_properties_get( properties, "limit_x") != NULL )
831                         c->limit_x = mlt_properties_get_int( properties, "limit_x");
832
833                 if( mlt_properties_get( properties, "limit_y") != NULL )
834                         c->limit_y = mlt_properties_get_int( properties, "limit_y");
835
836                 if( mlt_properties_get( properties, "check_chroma" ) != NULL )
837                         c->check_chroma = mlt_properties_get_int( properties, "check_chroma" );
838
839                 if( mlt_properties_get( properties, "denoise" ) != NULL )
840                         c->denoise = mlt_properties_get_int( properties, "denoise" );
841
842                 if( mlt_properties_get( properties, "show_reconstruction" ) != NULL )
843                         c->show_reconstruction = mlt_properties_get_int( properties, "show_reconstruction" );
844
845                 if( mlt_properties_get( properties, "show_residual" ) != NULL )
846                         c->show_residual = mlt_properties_get_int( properties, "show_residual" );
847
848                 if( mlt_properties_get( properties, "toggle_when_paused" ) != NULL )
849                         c->toggle_when_paused = mlt_properties_get_int( properties, "toggle_when_paused" );
850
851                 init_optimizations( c );
852
853                 // Calculate the dimensions in macroblock units
854                 c->mv_buffer_width = (*width / c->mb_w);
855                 c->mv_buffer_height = (*height / c->mb_h);
856
857                 // Size of the motion vector buffer
858                 c->mv_size =  c->mv_buffer_width * c->mv_buffer_height * sizeof(struct motion_vector_s);
859
860                 // Allocate the motion vector buffers
861                 c->former_vectors = mlt_pool_alloc( c->mv_size );
862                 c->current_vectors = mlt_pool_alloc( c->mv_size );
863                 c->denoise_vectors = mlt_pool_alloc( c->mv_size );
864
865                 // Register motion buffers for destruction
866                 mlt_properties_set_data( properties, "current_motion_vectors", (void *)c->current_vectors, 0, mlt_pool_release, NULL );
867                 mlt_properties_set_data( properties, "former_motion_vectors", (void *)c->former_vectors, 0, mlt_pool_release, NULL );
868                 mlt_properties_set_data( properties, "denoise_motion_vectors", (void *)c->denoise_vectors, 0, mlt_pool_release, NULL );
869
870                 c->former_vectors_valid = 0;
871                 memset( c->former_vectors, 0, c->mv_size );
872
873                 c->xstride = 2;
874                 c->ystride = c->xstride * *width;
875
876                 // Allocate a cache for the previous frame's image
877                 c->former_image = mlt_pool_alloc( *width * *height * 2 );
878                 c->cache_image = mlt_pool_alloc( *width * *height * 2 );
879
880                 // Register for destruction
881                 mlt_properties_set_data( properties, "cache_image", (void *)c->cache_image, 0, mlt_pool_release, NULL );
882                 mlt_properties_set_data( properties, "former_image", (void *)c->former_image, 0, mlt_pool_release, NULL );
883
884                 c->former_frame_position = c->current_frame_position;
885                 c->previous_msad = 0;
886
887                 c->initialized = 1;
888         }
889
890         /* Check to see if somebody else has given us bounds */
891         struct mlt_geometry_item_s *bounds = mlt_properties_get_data( MLT_FRAME_PROPERTIES( frame ), "bounds", NULL );
892
893         if( bounds != NULL ) {
894                 // translate pixel units (from bounds) to macroblock units
895                 // make sure whole macroblock stays within bounds
896                 c->left_mb = ( bounds->x + c->mb_w - 1 ) / c->mb_w;
897                 c->top_mb = ( bounds->y + c->mb_h - 1 ) / c->mb_h;
898                 c->right_mb = ( bounds->x + bounds->w ) / c->mb_w - 1;
899                 c->bottom_mb = ( bounds->y + bounds->h ) / c->mb_h - 1;
900                 c->bounds.x = bounds->x;
901                 c->bounds.y = bounds->y;
902                 c->bounds.w = bounds->w;
903                 c->bounds.h = bounds->h;
904         } else {
905                 c->left_mb = c->prev_left_mb = 0;
906                 c->top_mb = c->prev_top_mb = 0;
907                 c->right_mb = c->prev_right_mb = c->mv_buffer_width - 1;        // Zero indexed
908                 c->bottom_mb = c->prev_bottom_mb = c->mv_buffer_height - 1;
909                 c->bounds.x = 0;
910                 c->bounds.y = 0;
911                 c->bounds.w = *width;
912                 c->bounds.h = *height;
913         }
914
915         // If video is advancing, run motion vector algorithm and etc...
916         if( c->former_frame_position + 1 == c->current_frame_position )
917         {
918
919                 // Swap the motion vector buffers and reuse allocated memory
920                 struct motion_vector_s *temp = c->current_vectors;
921                 c->current_vectors = c->former_vectors;
922                 c->former_vectors = temp;
923
924                 // This is done because filter_vismv doesn't pay attention to frame boundry
925                 memset( c->current_vectors, 0, c->mv_size );
926
927                 // Perform the motion search
928                 motion_search( c->cache_image, *image, c );
929
930                 collect_post_statistics( c );
931
932
933                 // Detect shot changes
934                 if( c->comparison_average > 10 * c->mb_w * c->mb_h &&
935                     c->comparison_average > c->previous_msad * 2 )
936                 {
937                         fprintf(stderr, " - SAD: %d   <<Shot change>>\n", c->comparison_average);
938                         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", 1);
939                 //      c->former_vectors_valid = 0; // Invalidate the previous frame's predictors
940                         c->shot_change = 1;
941                 }
942                 else {
943                         c->former_vectors_valid = 1;
944                         c->shot_change = 0;
945                         //fprintf(stderr, " - SAD: %d\n", c->comparison_average);
946                 }
947
948                 c->previous_msad = c->comparison_average;
949
950                 if( c->comparison_average != 0 ) { // If the frame is not a duplicate of the previous frame
951
952                         // denoise the vector buffer
953                         if( c->denoise )
954                                 median_denoise( c->current_vectors, c );
955
956                         // Pass the new vector data into the frame
957                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
958                                          (void*)c->current_vectors, c->mv_size, NULL, NULL );
959
960                         // Cache the frame's image. Save the old cache. Reuse memory.
961                         // After this block, exactly two unique frames will be cached
962                         uint8_t *timg = c->cache_image;
963                         c->cache_image = c->former_image;
964                         c->former_image = timg;
965                         memcpy( c->cache_image, *image, *width * *height * c->xstride );
966
967
968                 }
969                 else {
970                         // Undo the Swap, This fixes the ugliness caused by a duplicate frame
971                         temp = c->current_vectors;
972                         c->current_vectors = c->former_vectors;
973                         c->former_vectors = temp;
974                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
975                                          (void*)c->former_vectors, c->mv_size, NULL, NULL );
976                 }
977
978
979                 if( c->shot_change == 1)
980                         ;
981                 else if( c->show_reconstruction )
982                         show_reconstruction( *image, c );
983                 else if( c->show_residual )
984                         show_residual( *image, c );
985
986         }
987         // paused
988         else if( c->former_frame_position == c->current_frame_position )
989         {
990                 // Pass the old vector data into the frame if it's valid
991                 if( c->former_vectors_valid == 1 ) {
992                         mlt_properties_set_data( MLT_FRAME_PROPERTIES( frame ), "motion_est.vectors",
993                                          (void*)c->current_vectors, c->mv_size, NULL, NULL );
994
995                         if( c->shot_change == 1)
996                                 ;
997                         else if( c->toggle_when_paused == 1 ) {
998                                 if( c->show_reconstruction )
999                                         show_reconstruction( *image, c );
1000                                 else if( c->show_residual )
1001                                         show_residual( *image, c );
1002                                 c->toggle_when_paused = 2;
1003                         }
1004                         else if( c->toggle_when_paused == 2 )
1005                                 c->toggle_when_paused = 1;
1006                         else {
1007                                 if( c->show_reconstruction )
1008                                         show_reconstruction( *image, c );
1009                                 else if( c->show_residual )
1010                                         show_residual( *image, c );
1011                         }
1012
1013                 }
1014
1015                 mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "shot_change", c->shot_change);
1016         }
1017         // there was jump in frame number
1018         else {
1019 //              fprintf(stderr, "Warning: there was a frame number jumped from %d to %d.\n", c->former_frame_position, c->current_frame_position);
1020                 c->former_vectors_valid = 0;
1021         }
1022
1023
1024         // Cache our bounding geometry for the next frame's processing
1025         c->prev_left_mb = c->left_mb;
1026         c->prev_top_mb = c->top_mb;
1027         c->prev_right_mb = c->right_mb;
1028         c->prev_bottom_mb = c->bottom_mb;
1029
1030         // Remember which frame this is
1031         c->former_frame_position = c->current_frame_position;
1032
1033
1034         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_width", c->mb_w );
1035         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.macroblock_height", c->mb_h );
1036         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.left_mb", c->left_mb );
1037         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.right_mb", c->right_mb );
1038         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.top_mb", c->top_mb );
1039         mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame ), "motion_est.bottom_mb", c->bottom_mb );
1040
1041         #ifdef BENCHMARK
1042         struct timeval finish; gettimeofday(&finish, NULL ); int difference = (finish.tv_sec - start.tv_sec) * 1000000 + (finish.tv_usec - start.tv_usec);
1043         fprintf(stderr, " in frame %d:%d usec\n", c->current_frame_position, difference);
1044         #endif
1045
1046
1047         return error;
1048 }
1049
1050
1051
1052 /** filter processing.
1053 */
1054
1055 static mlt_frame filter_process( mlt_filter this, mlt_frame frame )
1056 {
1057
1058         // Keeps tabs on the filter object
1059         mlt_frame_push_service( frame, this);
1060
1061         // Push the frame filter
1062         mlt_frame_push_get_image( frame, filter_get_image );
1063
1064         return frame;
1065 }
1066
1067 /** Constructor for the filter.
1068 */
1069 mlt_filter filter_motion_est_init( mlt_profile profile, mlt_service_type type, const char *id, char *arg )
1070 {
1071         mlt_filter this = mlt_filter_new( );
1072         if ( this != NULL )
1073         {
1074                 // Get the properties object
1075                 mlt_properties properties = MLT_FILTER_PROPERTIES( this );
1076
1077                 // Initialize the motion estimation context
1078                 struct motion_est_context_s *context;
1079                 context = mlt_pool_alloc( sizeof(struct motion_est_context_s) );
1080                 mlt_properties_set_data( properties, "context", (void *)context, sizeof( struct motion_est_context_s ),
1081                                         mlt_pool_release, NULL );
1082
1083
1084                 // Register the filter
1085                 this->process = filter_process;
1086
1087                 /* defaults that may be overridden */
1088                 context->mb_w = 16;
1089                 context->mb_h = 16;
1090                 context->skip_prediction = 0;
1091                 context->limit_x = 64;
1092                 context->limit_y = 64;
1093                 context->search_method = DIAMOND_SEARCH;        // FIXME: not used
1094                 context->check_chroma = 0;
1095                 context->denoise = 1;
1096                 context->show_reconstruction = 0;
1097                 context->show_residual = 0;
1098                 context->toggle_when_paused = 0;
1099
1100                 /* reference functions that may have optimized versions */
1101                 context->compare_reference = sad_reference;
1102
1103                 // The rest of the buffers will be initialized when the filter is first processed
1104                 context->initialized = 0;
1105         }
1106         return this;
1107 }