Mutable watermark producer and small optimisation

[mlt] / src / modules / core / transition_composite.c
diff --git a/src/modules/core/transition_composite.c b/src/modules/core/transition_composite.c

index d9c1bf40c3ff5deb62d2229d0ab45ba689426577..622ae8fe85546443763735e77f38a015be600b07 100644 (file)
--- a/src/modules/core/transition_composite.c
+++ b/src/modules/core/transition_composite.c
@@ -19,7 +19,7 @@
   */
  
  #include "transition_composite.h"
-#include <framework/mlt_frame.h>
+#include <framework/mlt.h>
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -27,11 +27,20 @@
  #include <string.h>
  #include <math.h>
  
+typedef void ( *composite_line_fn )( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness );
+
+/* mmx function declarations */
+#ifdef USE_MMX
+       void composite_line_yuv_mmx( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness );
+       int composite_have_mmx( void );
+#endif
+
  /** Geometry struct.
  */
  
  struct geometry_s
  {
+       int frame;
         float position;
         float mix;
         int nw; // normalised width
@@ -146,18 +155,34 @@ static void geometry_calculate( struct geometry_s *output, struct geometry_s *in
         position = ( position - in->position ) / ( out->position - in->position );
  
         // Calculate this frames geometry
-       output->nw = in->nw;
-       output->nh = in->nh;
-       output->x = in->x + ( out->x - in->x ) * position;
-       output->y = in->y + ( out->y - in->y ) * position;
-       output->w = in->w + ( out->w - in->w ) * position;
-       output->h = in->h + ( out->h - in->h ) * position;
-       output->mix = in->mix + ( out->mix - in->mix ) * position;
-       output->distort = in->distort;
-
-       output->x = ( int )floor( output->x ) & 0xfffffffe;
-       output->w = ( int )floor( output->w ) & 0xfffffffe;
-       output->sw &= 0xfffffffe;
+       if ( in->frame != out->frame - 1 )
+       {
+               output->nw = in->nw;
+               output->nh = in->nh;
+               output->x = in->x + ( out->x - in->x ) * position;
+               output->y = in->y + ( out->y - in->y ) * position;
+               output->w = in->w + ( out->w - in->w ) * position;
+               output->h = in->h + ( out->h - in->h ) * position;
+               output->mix = in->mix + ( out->mix - in->mix ) * position;
+               output->distort = in->distort;
+       }
+       else
+       {
+               output->nw = out->nw;
+               output->nh = out->nh;
+               output->x = out->x;
+               output->y = out->y;
+               output->w = out->w;
+               output->h = out->h;
+               output->mix = out->mix;
+               output->distort = out->distort;
+       }
+
+       // DRD> These break on negative values. I do not think they are needed
+       // since yuv_composite takes care of YUYV group alignment
+       //output->x = ( int )floor( output->x ) & 0xfffffffe;
+       //output->w = ( int )floor( output->w ) & 0xfffffffe;
+       //output->sw &= 0xfffffffe;
  }
  
  void transition_destroy_keys( void *arg )
@@ -229,7 +254,8 @@ static struct geometry_s *transition_parse_keys( mlt_transition this,  int norma
                                 // Parse and add to the list
                                 geometry_parse( temp, ptr, value, normalised_width, normalised_height );
  
-                               // Assign the position
+                               // Assign the position and frame
+                               temp->frame = frame;
                                 temp->position = position;
  
                                 // Allow the next to be appended after this one
@@ -249,9 +275,6 @@ static struct geometry_s *transition_parse_keys( mlt_transition this,  int norma
         else
                 end->position = 1;
  
-       // Assign to properties to ensure we get destroyed
-       mlt_properties_set_data( properties, "geometries", start, 0, transition_destroy_keys, NULL );
-
         return start;
  }
  
@@ -285,15 +308,12 @@ static void alignment_calculate( struct geometry_s *geometry )
  /** Calculate the position for this frame.
  */
  
-static float position_calculate( mlt_transition this, mlt_frame frame )
+static float position_calculate( mlt_transition this, mlt_position position )
  {
         // Get the in and out position
         mlt_position in = mlt_transition_get_in( this );
         mlt_position out = mlt_transition_get_out( this );
  
-       // Get the position
-       mlt_position position = mlt_frame_get_position( frame );
-
         // Now do the calcs
         return ( float )( position - in ) / ( float )( out - in + 1 );
  }
@@ -308,7 +328,8 @@ static inline float delta_calculate( mlt_transition this, mlt_frame frame )
         mlt_position out = mlt_transition_get_out( this );
  
         // Get the position of the frame
-       mlt_position position = mlt_frame_get_position( frame );
+       char *name = mlt_properties_get( mlt_transition_properties( this ), "_unique_id" );
+       mlt_position position = mlt_properties_get_position( mlt_frame_properties( frame ), name );
  
         // Now do the calcs
         float x = ( float )( position - in ) / ( float )( out - in + 1 );
@@ -325,22 +346,206 @@ static int get_value( mlt_properties properties, char *preferred, char *fallback
         return value;
  }
  
+/** A linear threshold determination function.
+*/
+
+static inline int32_t linearstep( int32_t edge1, int32_t edge2, int32_t a )
+{
+       if ( a < edge1 )
+               return 0;
+
+       if ( a >= edge2 )
+               return 0x10000;
+
+       return ( ( a - edge1 ) << 16 ) / ( edge2 - edge1 );
+}
+
+/** A smoother, non-linear threshold determination function.
+*/
+
+static inline int32_t smoothstep( int32_t edge1, int32_t edge2, uint32_t a )
+{
+       if ( a < edge1 )
+               return 0;
+
+       if ( a >= edge2 )
+               return 0x10000;
+
+       a = ( ( a - edge1 ) << 16 ) / ( edge2 - edge1 );
+
+       return ( ( ( a * a ) >> 16 )  * ( ( 3 << 16 ) - ( 2 * a ) ) ) >> 16;
+}
+
+/** Load the luma map from PGM stream.
+*/
+
+static void luma_read_pgm( FILE *f, uint16_t **map, int *width, int *height )
+{
+       uint8_t *data = NULL;
+       while (1)
+       {
+               char line[128];
+               char comment[128];
+               int i = 2;
+               int maxval;
+               int bpp;
+               uint16_t *p;
+
+               line[127] = '\0';
+
+               // get the magic code
+               if ( fgets( line, 127, f ) == NULL )
+                       break;
+
+               // skip comments
+               while ( sscanf( line, " #%s", comment ) > 0 )
+                       if ( fgets( line, 127, f ) == NULL )
+                               break;
+
+               if ( line[0] != 'P' || line[1] != '5' )
+                       break;
+
+               // skip white space and see if a new line must be fetched
+               for ( i = 2; i < 127 && line[i] != '\0' && isspace( line[i] ); i++ );
+               if ( ( line[i] == '\0' || line[i] == '#' ) && fgets( line, 127, f ) == NULL )
+                       break;
+
+               // skip comments
+               while ( sscanf( line, " #%s", comment ) > 0 )
+                       if ( fgets( line, 127, f ) == NULL )
+                               break;
+
+               // get the dimensions
+               if ( line[0] == 'P' )
+                       i = sscanf( line, "P5 %d %d %d", width, height, &maxval );
+               else
+                       i = sscanf( line, "%d %d %d", width, height, &maxval );
+
+               // get the height value, if not yet
+               if ( i < 2 )
+               {
+                       if ( fgets( line, 127, f ) == NULL )
+                               break;
+
+                       // skip comments
+                       while ( sscanf( line, " #%s", comment ) > 0 )
+                               if ( fgets( line, 127, f ) == NULL )
+                                       break;
+
+                       i = sscanf( line, "%d", height );
+                       if ( i == 0 )
+                               break;
+                       else
+                               i = 2;
+               }
+
+               // get the maximum gray value, if not yet
+               if ( i < 3 )
+               {
+                       if ( fgets( line, 127, f ) == NULL )
+                               break;
+
+                       // skip comments
+                       while ( sscanf( line, " #%s", comment ) > 0 )
+                               if ( fgets( line, 127, f ) == NULL )
+                                       break;
+
+                       i = sscanf( line, "%d", &maxval );
+                       if ( i == 0 )
+                               break;
+               }
+
+               // determine if this is one or two bytes per pixel
+               bpp = maxval > 255 ? 2 : 1;
+
+               // allocate temporary storage for the raw data
+               data = mlt_pool_alloc( *width * *height * bpp );
+               if ( data == NULL )
+                       break;
+
+               // read the raw data
+               if ( fread( data, *width * *height * bpp, 1, f ) != 1 )
+                       break;
+
+               // allocate the luma bitmap
+               *map = p = (uint16_t*)mlt_pool_alloc( *width * *height * sizeof( uint16_t ) );
+               if ( *map == NULL )
+                       break;
+
+               // proces the raw data into the luma bitmap
+               for ( i = 0; i < *width * *height * bpp; i += bpp )
+               {
+                       if ( bpp == 1 )
+                               *p++ = data[ i ] << 8;
+                       else
+                               *p++ = ( data[ i ] << 8 ) + data[ i + 1 ];
+               }
+
+               break;
+       }
+
+       if ( data != NULL )
+               mlt_pool_release( data );
+}
+
+/** Generate a luma map from any YUV image.
+*/
+
+static void luma_read_yuv422( uint8_t *image, uint16_t **map, int width, int height )
+{
+       int i;
+       
+       // allocate the luma bitmap
+       uint16_t *p = *map = ( uint16_t* )mlt_pool_alloc( width * height * sizeof( uint16_t ) );
+       if ( *map == NULL )
+               return;
+
+       // proces the image data into the luma bitmap
+       for ( i = 0; i < width * height * 2; i += 2 )
+               *p++ = ( image[ i ] - 16 ) * 299; // 299 = 65535 / 219
+}
+
+
+/** Composite a source line over a destination line
+*/
+
+static inline
+void composite_line_yuv( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness )
+{
+       register int j;
+       int a, mix;
+       
+       for ( j = 0; j < width_src; j ++ )
+       {
+               a = ( alpha == NULL ) ? 255 : *alpha ++;
+               mix = ( luma == NULL ) ? weight : linearstep( luma[ j ], luma[ j ] + softness, weight );
+               mix = ( mix * ( a + 1 ) ) >> 8;
+               *dest = ( *src++ * mix + *dest * ( ( 1 << 16 ) - mix ) ) >> 16;
+               dest++;
+               *dest = ( *src++ * mix + *dest * ( ( 1 << 16 ) - mix ) ) >> 16;
+               dest++;
+       }
+}
+
  /** Composite function.
  */
  
-static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int bpp, uint8_t *p_src, int width_src, int height_src, uint8_t *p_alpha, struct geometry_s geometry, int field )
+static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, uint8_t *p_src, int width_src, int height_src, uint8_t *p_alpha, struct geometry_s geometry, int field, uint16_t *p_luma, int32_t softness, composite_line_fn line_fn )
  {
         int ret = 0;
-       int i, j;
+       int i;
         int x_src = 0, y_src = 0;
         int32_t weight = ( 1 << 16 ) * ( geometry.mix / 100 );
+       int step = ( field > -1 ) ? 2 : 1;
+       int bpp = 2;
         int stride_src = width_src * bpp;
         int stride_dest = width_dest * bpp;
-
+       
         // Adjust to consumer scale
         int x = geometry.x * width_dest / geometry.nw;
         int y = geometry.y * height_dest / geometry.nh;
  
+       // Align x to a full YUYV group
         x &= 0xfffffffe;
         width_src &= 0xfffffffe;
  
@@ -360,7 +565,7 @@ static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int
         }
         
         // crop overlay beyond right edge of frame
-       else if ( x + width_src > width_dest )
+       if ( x + width_src > width_dest )
                 width_src = width_dest - x;
  
         // crop overlay off the top edge of the frame
@@ -369,8 +574,9 @@ static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int
                 y_src = -y;
                 height_src -= y_src;
         }
+       
         // crop overlay below bottom edge of frame
-       else if ( y + height_src > height_dest )
+       if ( y + height_src > height_dest )
                 height_src = height_dest - y;
  
         // offset pointer into overlay buffer based on cropping
@@ -383,6 +589,10 @@ static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int
         if ( p_alpha )
                 p_alpha += x_src + y_src * stride_src / bpp;
  
+       // offset pointer into luma channel based upon cropping
+       if ( p_luma )
+               p_luma += x_src + y_src * stride_src / bpp;
+       
         // Assuming lower field first
         // Special care is taken to make sure the b_frame is aligned to the correct field.
         // field 0 = lower field and y should be odd (y is 0-based).
@@ -405,45 +615,170 @@ static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int
                 height_src--;
         }
  
-       uint8_t *p = p_src;
-       uint8_t *q = p_dest;
-       uint8_t *o = p_dest;
-       uint8_t *z = p_alpha;
-
-       uint8_t a;
-       int32_t value;
-       int step = ( field > -1 ) ? 2 : 1;
-
-       stride_src = stride_src * step;
+       stride_src *= step;
+       stride_dest *= step;
         int alpha_stride = stride_src / bpp;
-       stride_dest = stride_dest * step;
  
         // now do the compositing only to cropped extents
-       for ( i = 0; i < height_src; i += step )
+       if ( line_fn != NULL )
         {
-               p = p_src;
-               q = p_dest;
-               o = q;
-               z = p_alpha;
-
-               for ( j = 0; j < width_src; j ++ )
+               for ( i = 0; i < height_src; i += step )
                 {
-                       a = ( z == NULL ) ? 255 : *z ++;
-                       value = ( weight * ( a + 1 ) ) >> 8;
-                       *o ++ = ( *p++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16;
-                       *o ++ = ( *p++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16;
+                       line_fn( p_dest, p_src, width_src, p_alpha, weight, p_luma, softness );
+       
+                       p_src += stride_src;
+                       p_dest += stride_dest;
+                       if ( p_alpha )
+                               p_alpha += alpha_stride;
+                       if ( p_luma )
+                               p_luma += alpha_stride;
+               }
+       }
+       else
+       {
+               for ( i = 0; i < height_src; i += step )
+               {
+                       composite_line_yuv( p_dest, p_src, width_src, p_alpha, weight, p_luma, softness );
+       
+                       p_src += stride_src;
+                       p_dest += stride_dest;
+                       if ( p_alpha )
+                               p_alpha += alpha_stride;
+                       if ( p_luma )
+                               p_luma += alpha_stride;
                 }
-
-               p_src += stride_src;
-               p_dest += stride_dest;
-               if ( p_alpha )
-                       p_alpha += alpha_stride;
         }
  
         return ret;
  }
  
  
+/** Scale 16bit greyscale luma map using nearest neighbor.
+*/
+
+static inline void
+scale_luma ( uint16_t *dest_buf, int dest_width, int dest_height, const uint16_t *src_buf, int src_width, int src_height )
+{
+       register int i, j;
+       register int x_step = ( src_width << 16 ) / dest_width;
+       register int y_step = ( src_height << 16 ) / dest_height;
+       register int x, y = 0;
+
+       for ( i = 0; i < dest_height; i++ )
+       {
+               const uint16_t *src = src_buf + ( y >> 16 ) * src_width;
+               x = 0;
+               
+               for ( j = 0; j < dest_width; j++ )
+               {
+                       *dest_buf++ = src[ x >> 16 ];
+                       x += x_step;
+               }
+               y += y_step;
+       }
+}
+
+static uint16_t* get_luma( mlt_properties properties, int width, int height )
+{
+       // The cached luma map information
+       int luma_width = mlt_properties_get_int( properties, "_luma.width" );
+       int luma_height = mlt_properties_get_int( properties, "_luma.height" );
+       uint16_t *luma_bitmap = mlt_properties_get_data( properties, "_luma.bitmap", NULL );
+       
+       // If the filename property changed, reload the map
+       char *resource = mlt_properties_get( properties, "luma" );
+
+       if ( resource != NULL && ( luma_bitmap == NULL || luma_width != width || luma_height != height ) )
+       {
+               uint16_t *orig_bitmap = mlt_properties_get_data( properties, "_luma.orig_bitmap", NULL );
+               luma_width = mlt_properties_get_int( properties, "_luma.orig_width" );
+               luma_height = mlt_properties_get_int( properties, "_luma.orig_height" );
+
+               // Load the original luma once
+               if ( orig_bitmap == NULL )
+               {
+                       char *extension = extension = strrchr( resource, '.' );
+                       
+                       // See if it is a PGM
+                       if ( extension != NULL && strcmp( extension, ".pgm" ) == 0 )
+                       {
+                               // Open PGM
+                               FILE *f = fopen( resource, "r" );
+                               if ( f != NULL )
+                               {
+                                       // Load from PGM
+                                       luma_read_pgm( f, &orig_bitmap, &luma_width, &luma_height );
+                                       fclose( f );
+                                       
+                                       // Remember the original size for subsequent scaling
+                                       mlt_properties_set_data( properties, "_luma.orig_bitmap", orig_bitmap, luma_width * luma_height * 2, mlt_pool_release, NULL );
+                                       mlt_properties_set_int( properties, "_luma.orig_width", luma_width );
+                                       mlt_properties_set_int( properties, "_luma.orig_height", luma_height );
+                               }
+                       }
+                       else
+                       {
+                               // Get the factory producer service
+                               char *factory = mlt_properties_get( properties, "factory" );
+       
+                               // Create the producer
+                               mlt_producer producer = mlt_factory_producer( factory, resource );
+       
+                               // If we have one
+                               if ( producer != NULL )
+                               {
+                                       // Get the producer properties
+                                       mlt_properties producer_properties = mlt_producer_properties( producer );
+       
+                                       // Ensure that we loop
+                                       mlt_properties_set( producer_properties, "eof", "loop" );
+       
+                                       // Now pass all producer. properties on the transition down
+                                       mlt_properties_pass( producer_properties, properties, "luma." );
+       
+                                       // We will get the alpha frame from the producer
+                                       mlt_frame luma_frame = NULL;
+       
+                                       // Get the luma frame
+                                       if ( mlt_service_get_frame( mlt_producer_service( producer ), &luma_frame, 0 ) == 0 )
+                                       {
+                                               uint8_t *luma_image;
+                                               mlt_image_format luma_format = mlt_image_yuv422;
+       
+                                               // Get image from the luma producer
+                                               mlt_properties_set( mlt_frame_properties( luma_frame ), "rescale.interp", "none" );
+                                               mlt_frame_get_image( luma_frame, &luma_image, &luma_format, &luma_width, &luma_height, 0 );
+       
+                                               // Generate the luma map
+                                               if ( luma_image != NULL && luma_format == mlt_image_yuv422 )
+                                                       luma_read_yuv422( luma_image, &orig_bitmap, luma_width, luma_height );
+       
+                                               // Remember the original size for subsequent scaling
+                                               mlt_properties_set_data( properties, "_luma.orig_bitmap", orig_bitmap, luma_width * luma_height * 2, mlt_pool_release, NULL );
+                                               mlt_properties_set_int( properties, "_luma.orig_width", luma_width );
+                                               mlt_properties_set_int( properties, "_luma.orig_height", luma_height );
+                                               
+                                               // Cleanup the luma frame
+                                               mlt_frame_close( luma_frame );
+                                       }
+       
+                                       // Cleanup the luma producer
+                                       mlt_producer_close( producer );
+                               }
+                       }
+               }
+               // Scale luma map
+               luma_bitmap = mlt_pool_alloc( width * height * sizeof( uint16_t ) );
+               scale_luma( luma_bitmap, width, height, orig_bitmap, luma_width, luma_height );
+
+               // Remember the scaled luma size to prevent unnecessary scaling
+               mlt_properties_set_int( properties, "_luma.width", width );
+               mlt_properties_set_int( properties, "_luma.height", height );
+               mlt_properties_set_data( properties, "_luma.bitmap", luma_bitmap, width * height * 2, mlt_pool_release, NULL );
+       }
+       return luma_bitmap;
+}
+
  /** Get the properly sized image from b_frame.
  */
  
@@ -465,13 +800,8 @@ static int get_b_frame_image( mlt_transition this, mlt_frame b_frame, uint8_t **
                 int real_height = get_value( b_props, "real_height", "height" );
                 double input_ar = mlt_frame_get_aspect_ratio( b_frame );
                 double output_ar = mlt_properties_get_double( b_props, "consumer_aspect_ratio" );
-               int scaled_width = real_width;
+               int scaled_width = input_ar / output_ar * real_width;
                 int scaled_height = real_height;
-               double output_sar = ( double ) geometry->nw / geometry->nh / output_ar;
-
-               // If the output is fat pixels (NTSC) then stretch our input horizontally
-               // derived from: output_sar / input_sar * real_width
-               scaled_width = output_sar * real_height * input_ar;
                         
                 // Now ensure that our images fit in the normalised frame
                 if ( scaled_width > normalised_width )
@@ -515,10 +845,10 @@ static int get_b_frame_image( mlt_transition this, mlt_frame b_frame, uint8_t **
         *width = geometry->sw * *width / geometry->nw;
         *height = geometry->sh * *height / geometry->nh;
  
-       x -= x % 2;
+       x &= 0xfffffffe;
  
         // optimization points - no work to do
-       if ( *width <= 0 || *height <= 0 )
+       if ( *width < 1 || *height < 1 )
                 return 1;
  
         if ( ( x < 0 && -x >= *width ) || ( y < 0 && -y >= *height ) )
@@ -530,7 +860,7 @@ static int get_b_frame_image( mlt_transition this, mlt_frame b_frame, uint8_t **
  }
  
  
-struct geometry_s *composite_calculate( struct geometry_s *result, mlt_transition this, mlt_frame a_frame, float position )
+static struct geometry_s *composite_calculate( struct geometry_s *result, mlt_transition this, mlt_frame a_frame, float position )
  {
         // Get the properties from the transition
         mlt_properties properties = mlt_transition_properties( this );
@@ -542,7 +872,7 @@ struct geometry_s *composite_calculate( struct geometry_s *result, mlt_transitio
         struct geometry_s *start = mlt_properties_get_data( properties, "geometries", NULL );
  
         // Now parse the geometries
-       if ( start == NULL )
+       if ( start == NULL || mlt_properties_get_int( properties, "refresh" ) )
         {
                 // Obtain the normalised width and height from the a_frame
                 int normalised_width = mlt_properties_get_int( a_props, "normalised_width" );
@@ -550,6 +880,10 @@ struct geometry_s *composite_calculate( struct geometry_s *result, mlt_transitio
  
                 // Parse the transitions properties
                 start = transition_parse_keys( this, normalised_width, normalised_height );
+
+               // Assign to properties to ensure we get destroyed
+               mlt_properties_set_data( properties, "geometries", start, 0, transition_destroy_keys, NULL );
+               mlt_properties_set_int( properties, "refresh", 0 );
         }
  
         // Do the calculation
@@ -562,7 +896,17 @@ struct geometry_s *composite_calculate( struct geometry_s *result, mlt_transitio
         return start;
  }
  
-mlt_frame composite_copy_region( mlt_transition this, mlt_frame a_frame )
+static inline void inline_memcpy( uint8_t *dest, uint8_t *src, int length )
+{
+       uint8_t *end = src + length;
+       while ( src < end )
+       {
+               *dest ++ = *src ++;
+               *dest ++ = *src ++;
+       }
+}
+
+mlt_frame composite_copy_region( mlt_transition this, mlt_frame a_frame, mlt_position frame_position )
  {
         // Create a frame to return
         mlt_frame b_frame = mlt_frame_init( );
@@ -574,7 +918,7 @@ mlt_frame composite_copy_region( mlt_transition this, mlt_frame a_frame )
         mlt_properties b_props = mlt_frame_properties( b_frame );
  
         // Get the position
-       float position = position_calculate( this, a_frame );
+       float position = position_calculate( this, frame_position );
  
         // Destination image
         uint8_t *dest = NULL;
@@ -607,6 +951,15 @@ mlt_frame composite_copy_region( mlt_transition this, mlt_frame a_frame )
         w = result.w * width / result.nw;
         h = result.h * height / result.nh;
  
+       if ( y < 0 )
+       {
+               h = h + y;
+               y = 0;
+       }
+
+       if ( y + h > height )
+               h = height - y;
+
         x &= 0xfffffffe;
         w &= 0xfffffffe;
  
@@ -620,7 +973,7 @@ mlt_frame composite_copy_region( mlt_transition this, mlt_frame a_frame )
  
         while ( q < r )
         {
-               memcpy( q, p, w * 2 );
+               inline_memcpy( q, p, w * 2 );
                 q += w * 2;
                 p += width * 2;
         }
@@ -630,6 +983,9 @@ mlt_frame composite_copy_region( mlt_transition this, mlt_frame a_frame )
         mlt_properties_set_int( b_props, "width", w );
         mlt_properties_set_int( b_props, "height", h );
  
+       // Assign this position to the b frame
+       mlt_frame_set_position( b_frame, frame_position );
+
         // Return the frame
         return b_frame;
  }
@@ -651,6 +1007,9 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
         // Get the image from the a frame
         mlt_frame_get_image( a_frame, image, format, width, height, 1 );
  
+       // Get the properties from the transition
+       mlt_properties properties = mlt_transition_properties( this );
+
         if ( b_frame != NULL )
         {
                 // Get the properties of the a frame
@@ -659,9 +1018,6 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
                 // Get the properties of the b frame
                 mlt_properties b_props = mlt_frame_properties( b_frame );
  
-               // Get the properties from the transition
-               mlt_properties properties = mlt_transition_properties( this );
-
                 // Structures for geometry
                 struct geometry_s result;
  
@@ -671,11 +1027,14 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
  
                 // Do the calculation
                 struct geometry_s *start = composite_calculate( &result, this, a_frame, position );
+               
+               // Optimisation - no compositing required
+               if ( result.mix == 0 || ( result.w == 0 && result.h == 0 ) )
+                       return 0;
  
                 // Since we are the consumer of the b_frame, we must pass along these
                 // consumer properties from the a_frame
                 mlt_properties_set_double( b_props, "consumer_aspect_ratio", mlt_properties_get_double( a_props, "consumer_aspect_ratio" ) );
-               mlt_properties_set_double( b_props, "consumer_scale", mlt_properties_get_double( a_props, "consumer_scale" ) );
  
                 // Get the image from the b frame
                 uint8_t *image_b = NULL;
@@ -686,12 +1045,16 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
                 {
                         uint8_t *dest = *image;
                         uint8_t *src = image_b;
-                       int bpp = 2;
                         uint8_t *alpha = mlt_frame_get_alpha_mask( b_frame );
-                       int progressive = mlt_properties_get_int( a_props, "progressive" ) ||
+                       int progressive = 
                                         mlt_properties_get_int( a_props, "consumer_progressive" ) ||
                                         mlt_properties_get_int( properties, "progressive" );
                         int field;
+                       
+                       int32_t luma_softness = mlt_properties_get_double( properties, "softness" ) * ( 1 << 16 );
+                       uint16_t *luma_bitmap = get_luma( properties, width_b, height_b );
+                       //composite_line_fn line_fn = mlt_properties_get_int( properties, "_MMX" ) ? composite_line_yuv_mmx : NULL;
+                       composite_line_fn line_fn = NULL;
  
                         for ( field = 0; field < ( progressive ? 1 : 2 ); field++ )
                         {
@@ -705,7 +1068,7 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
                                 alignment_calculate( &result );
  
                                 // Composite the b_frame on the a_frame
-                               composite_yuv( dest, *width, *height, bpp, src, width_b, height_b, alpha, result, progressive ? -1 : field );
+                               composite_yuv( dest, *width, *height, src, width_b, height_b, alpha, result, progressive ? -1 : field, luma_bitmap, luma_softness, line_fn );
                         }
                 }
         }
@@ -718,11 +1081,18 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
  
  static mlt_frame composite_process( mlt_transition this, mlt_frame a_frame, mlt_frame b_frame )
  {
+       // Get a unique name to store the frame position
+       char *name = mlt_properties_get( mlt_transition_properties( this ), "_unique_id" );
+
+       // Assign the current position to the name
+       mlt_properties_set_position( mlt_frame_properties( a_frame ), name, mlt_frame_get_position( a_frame ) );
+
         // Propogate the transition properties to the b frame
-       mlt_properties_set_double( mlt_frame_properties( b_frame ), "relative_position", position_calculate( this, a_frame ) );
+       mlt_properties_set_double( mlt_frame_properties( b_frame ), "relative_position", position_calculate( this, mlt_frame_get_position( a_frame ) ) );
+       
         mlt_frame_push_service( a_frame, this );
-       mlt_frame_push_get_image( a_frame, transition_get_image );
         mlt_frame_push_frame( a_frame, b_frame );
+       mlt_frame_push_get_image( a_frame, transition_get_image );
         return a_frame;
  }
  
@@ -734,9 +1104,19 @@ mlt_transition transition_composite_init( char *arg )
         mlt_transition this = calloc( sizeof( struct mlt_transition_s ), 1 );
         if ( this != NULL && mlt_transition_init( this, NULL ) == 0 )
         {
+               mlt_properties properties = mlt_transition_properties( this );
+               
                 this->process = composite_process;
-               mlt_properties_set( mlt_transition_properties( this ), "start", arg != NULL ? arg : "85%,5%:10%x10%" );
+               
+               // Default starting motion and zoom
+               mlt_properties_set( properties, "start", arg != NULL ? arg : "85%,5%:10%x10%" );
+               
+               // Default factory
+               mlt_properties_set( properties, "factory", "fezzik" );
+
+#ifdef USE_MMX
+               //mlt_properties_set_int( properties, "_MMX", composite_have_mmx() );
+#endif
         }
         return this;
  }
-