--- /dev/null
+ .file "composite_line_yuv_mmx"
+ .version "01.01"
+
+gcc2_compiled.:
+.data
+
+.text
+ .align 16
+
+#if !defined(__MINGW32__) && !defined(__CYGWIN__)
+.globl composite_line_yuv_mmx
+ .type composite_line_yuv_mmx,@function
+composite_line_yuv_mmx:
+#else
+.globl _composite_line_yuv_mmx
+_composite_line_yuv_mmx:
+#endif
+
+/*
+ * Arguments
+ *
+ * dest: 8(%ebp) %esi
+ * src: 12(%ebp)
+ * width_src: 16(%ebp)
+ * alpha: 20(%ebp)
+ * weight: 24(%ebp)
+ * luma: 28(%ebp)
+ * softness: 32(%ebp)
+ */
+
+/*
+ * Function call entry
+ */
+ pushl %ebp
+ movl %esp,%ebp
+ subl $28,%esp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+/* Initialise */
+ movl 8(%ebp), %esi # get dest
+ movl $0, %edx # j = 0
+
+.loop:
+
+ movl $0xff, %ecx # a = 255
+ cmpl $0, 20(%ebp) # if alpha == NULL
+ je .noalpha
+ movl 20(%ebp), %edi # a = alpha[ j ]
+ movb (%edi,%edx), %cl
+.noalpha:
+
+ movl 24(%ebp), %eax # mix = weight
+ cmpl $0, 28(%ebp) # if luma == NULL
+ je .noluma
+ movl 28(%ebp), %edi # mix = ...
+ movl %edx, %ebx
+ #sall $1, %ebx
+ movw (%edi,%ebx), %ax # luma[ j*2 ]
+ cmpw %cx, %ax
+ jl .luma0
+ movl %eax, %ebx
+ addl 32(%ebp), %ebx # + softness
+ cmpw %bx, %cx
+ jge .luma1
+ /* TODO: linear interpolate between edges eax and ebx */
+ jmp .noluma
+.luma0:
+ movl $0, %eax
+ jmp .noluma
+.luma1:
+ movl $0xffff, %eax
+.noluma:
+ shrl $8, %eax
+
+ movl %edx, %ebx # edx will be destroyed by mulw
+ mull %ecx # mix = mix * a...
+ movl %ebx, %edx # restore edx
+ shrl $8, %eax # >>8
+ andl $0xff, %eax
+
+/* put alpha and (1-alpha) into mm0 */
+/* 0 aa 0 1-a 0 aa 0 1-a */
+
+ /* duplicate word */
+ movl %eax, %ecx
+ shll $16, %ecx
+ orl %eax, %ecx
+
+ movd %ecx, %mm1
+
+ /* (1 << 16) - mix */
+ movl $0x000000ff, %ecx
+ subl %eax, %ecx
+ andl $0xff, %ecx
+
+ /* duplicate word */
+ movl %ecx, %eax
+ shll $16, %eax
+ orl %eax, %ecx
+
+ movd %ecx, %mm0
+
+ /* unpack words into double words */
+ punpcklwd %mm1, %mm0
+
+/* put src yuv and dest yuv into mm1 */
+/* 0 UVs 0 UVd 0 Ys 0 Yd */
+
+ movl 12(%ebp), %edi # get src
+ movb (%edi), %cl
+ shll $8, %ecx
+ movb 1(%edi), %al
+ shll $24, %eax
+ orl %eax, %ecx
+
+ movb (%esi), %al # get dest
+ orl %eax, %ecx
+ movb 1(%esi), %al
+ shll $16, %eax
+ orl %eax, %ecx
+
+ movd %ecx, %mm1
+ punpcklbw %mm4, %mm1
+
+/* alpha composite */
+ pmaddwd %mm1, %mm0
+ psrld $8, %mm0
+
+/* store result */
+ movd %mm0, %eax
+ movb %al, (%esi)
+ pextrw $2, %mm0, %eax
+ movl $128, %eax
+ movb %al, 1(%esi)
+
+/* for..next */
+ addl $1, %edx # j++
+ cmpl %edx, 16(%ebp) # if ( j == width_src )
+ je .out
+
+ addl $2, %esi
+ addl $2, 12(%ebp)
+
+ jmp .loop
+
+.out:
+ emms
+ leal -40(%ebp),%esp
+ popl %ebx
+ popl %esi
+ popl %edi
+ movl %ebp,%esp
+ popl %ebp
+ ret
+
+
+/********************************************/
+
+.align 8
+#if !defined(__MINGW32__) && !defined(__CYGWIN__)
+.globl composite_have_mmx
+ .type composite_have_mmx,@function
+composite_have_mmx:
+#else
+.globl _composite_have_mmx
+_composite_have_mmx:
+#endif
+
+ push %ebx
+
+# Check if bit 21 in flags word is writeable
+
+ pushfl
+ popl %eax
+ movl %eax,%ebx
+ xorl $0x00200000, %eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+
+ cmpl %eax, %ebx
+
+ je .notfound
+
+# OK, we have CPUID
+
+ movl $1, %eax
+ cpuid
+
+ test $0x00800000, %edx
+ jz .notfound
+
+ movl $1, %eax
+ jmp .out2
+
+.notfound:
+ movl $0, %eax
+.out2:
+ popl %ebx
+ ret
#include <string.h>
#include <math.h>
+typedef void ( *composite_line_fn )( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness );
+
+/* mmx function declarations */
+#ifdef USE_MMX
+ void composite_line_yuv_mmx( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness );
+ int composite_have_mmx( void );
+#endif
+
/** Geometry struct.
*/
output->mix = in->mix + ( out->mix - in->mix ) * position;
output->distort = in->distort;
- output->x = ( int )floor( output->x ) & 0xfffffffe;
- output->w = ( int )floor( output->w ) & 0xfffffffe;
- output->sw &= 0xfffffffe;
+ // DRD> These break on negative values. I do not think they are needed
+ // since yuv_composite takes care of YUYV group alignment
+ //output->x = ( int )floor( output->x ) & 0xfffffffe;
+ //output->w = ( int )floor( output->w ) & 0xfffffffe;
+ //output->sw &= 0xfffffffe;
}
void transition_destroy_keys( void *arg )
*p++ = ( image[ i ] - 16 ) * 299; // 299 = 65535 / 219
}
+
+/** Composite a source line over a destination line
+*/
+
+static inline
+void composite_line_yuv( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness )
+{
+ register int j;
+ int a, mix;
+
+ for ( j = 0; j < width_src; j ++ )
+ {
+ a = ( alpha == NULL ) ? 255 : *alpha ++;
+ mix = ( luma == NULL ) ? weight : linearstep( luma[ j ], luma[ j ] + softness, weight );
+ mix = ( mix * ( a + 1 ) ) >> 8;
+ *dest = ( *src++ * mix + *dest * ( ( 1 << 16 ) - mix ) ) >> 16;
+ dest++;
+ *dest = ( *src++ * mix + *dest * ( ( 1 << 16 ) - mix ) ) >> 16;
+ dest++;
+ }
+}
+
/** Composite function.
*/
-static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int bpp, uint8_t *p_src, int width_src, int height_src, uint8_t *p_alpha, struct geometry_s geometry, int field, uint16_t *p_luma, int32_t softness )
+static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, uint8_t *p_src, int width_src, int height_src, uint8_t *p_alpha, struct geometry_s geometry, int field, uint16_t *p_luma, int32_t softness, composite_line_fn line_fn )
{
int ret = 0;
- int i, j;
+ int i;
int x_src = 0, y_src = 0;
int32_t weight = ( 1 << 16 ) * ( geometry.mix / 100 );
- int stride_src = width_src * bpp;
- int stride_dest = width_dest * bpp;
+ int step = ( field > -1 ) ? 2 : 1;
+ int bpp = 2;
+ int stride_src = width_src * bpp * step;
+ int stride_dest = width_dest * bpp * step;
+ int alpha_stride = stride_src / bpp;
// Adjust to consumer scale
int x = geometry.x * width_dest / geometry.nw;
int y = geometry.y * height_dest / geometry.nh;
+ // Align x to a full YUYV group
x &= 0xfffffffe;
width_src &= 0xfffffffe;
height_src--;
}
- uint8_t *p = p_src;
- uint8_t *q = p_dest;
- uint8_t *o = p_dest;
- uint16_t *l = p_luma;
- uint8_t *z = p_alpha;
-
- uint8_t a;
- int32_t current_weight;
- int32_t value;
- int step = ( field > -1 ) ? 2 : 1;
-
- stride_src = stride_src * step;
- int alpha_stride = stride_src / bpp;
- stride_dest = stride_dest * step;
+ if ( line_fn == NULL )
+ line_fn = composite_line_yuv;
// now do the compositing only to cropped extents
for ( i = 0; i < height_src; i += step )
{
- p = p_src;
- q = p_dest;
- o = q;
- l = p_luma;
- z = p_alpha;
-
- for ( j = 0; j < width_src; j ++ )
- {
- a = ( z == NULL ) ? 255 : *z ++;
- current_weight = ( l == NULL ) ? weight : linearstep( l[ j ], l[ j ] + softness, weight );
- value = ( current_weight * ( a + 1 ) ) >> 8;
- *o ++ = ( *p++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16;
- *o ++ = ( *p++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16;
- }
+ line_fn( p_dest, p_src, width_src, p_alpha, weight, p_luma, softness );
p_src += stride_src;
p_dest += stride_dest;
x -= x % 2;
// optimization points - no work to do
- if ( *width <= 0 || *height <= 0 )
+ if ( *width < 1 || *height < 1 )
return 1;
if ( ( x < 0 && -x >= *width ) || ( y < 0 && -y >= *height ) )
h = result.h * height / result.nh;
x &= 0xfffffffe;
- w &= 0xfffffffe;
+ //w &= 0xfffffffe;
// Now we need to create a new destination image
dest = mlt_pool_alloc( w * h * 2 );
{
uint8_t *dest = *image;
uint8_t *src = image_b;
- int bpp = 2;
uint8_t *alpha = mlt_frame_get_alpha_mask( b_frame );
int progressive = mlt_properties_get_int( a_props, "progressive" ) ||
mlt_properties_get_int( a_props, "consumer_progressive" ) ||
int32_t luma_softness = mlt_properties_get_double( properties, "softness" ) * ( 1 << 16 );
uint16_t *luma_bitmap = get_luma( properties, width_b, height_b );
+ composite_line_fn line_fn = mlt_properties_get_int( properties, "_MMX" ) ? composite_line_yuv_mmx : composite_line_yuv;
for ( field = 0; field < ( progressive ? 1 : 2 ); field++ )
{
alignment_calculate( &result );
// Composite the b_frame on the a_frame
- composite_yuv( dest, *width, *height, bpp, src, width_b, height_b, alpha, result, progressive ? -1 : field, luma_bitmap, luma_softness );
+ composite_yuv( dest, *width, *height, src, width_b, height_b, alpha, result, progressive ? -1 : field, luma_bitmap, luma_softness, line_fn );
}
}
}
mlt_transition this = calloc( sizeof( struct mlt_transition_s ), 1 );
if ( this != NULL && mlt_transition_init( this, NULL ) == 0 )
{
+ mlt_properties properties = mlt_transition_properties( this );
+
this->process = composite_process;
- mlt_properties_set( mlt_transition_properties( this ), "start", arg != NULL ? arg : "85%,5%:10%x10%" );
+
+ // Default starting motion and zoom
+ mlt_properties_set( properties, "start", arg != NULL ? arg : "85%,5%:10%x10%" );
// Default factory
- mlt_properties_set( mlt_transition_properties( this ), "factory", "fezzik" );
+ mlt_properties_set( properties, "factory", "fezzik" );
+
+#ifdef USE_MMX
+ //mlt_properties_set_int( properties, "_MMX", composite_have_mmx() );
+#endif
}
return this;
}