added very preliminary mmx for composite.

author ddennedy <ddennedy@d19143bc-622f-0410-bfdd-b5b2a6649095>

Thu, 11 Mar 2004 07:22:22 +0000 (07:22 +0000)

committer ddennedy <ddennedy@d19143bc-622f-0410-bfdd-b5b2a6649095>

Thu, 11 Mar 2004 07:22:22 +0000 (07:22 +0000)
author ddennedy <ddennedy@d19143bc-622f-0410-bfdd-b5b2a6649095>
Thu, 11 Mar 2004 07:22:22 +0000 (07:22 +0000)
committer ddennedy <ddennedy@d19143bc-622f-0410-bfdd-b5b2a6649095>
Thu, 11 Mar 2004 07:22:22 +0000 (07:22 +0000)
diff --git a/src/modules/core/Makefile b/src/modules/core/Makefile

index 4dd185c25d252f2496e581ce719e52984b080a3b..fd7d0dff91d3e13d6a3dfafe517083d902bf0521 100644 (file)
--- a/src/modules/core/Makefile
+++ b/src/modules/core/Makefile
@@ -18,14 +18,19 @@ OBJS = factory.o \
            transition_mix.o \
            transition_region.o 
  
-CFLAGS = -O3 -I../../ -Wall -g -D_FILE_OFFSET_BITS=64 -pthread
+ASM_OBJS = composite_line_yuv_mmx.o
+
+CFLAGS = -O3 -DUSE_MMX -I../../ -Wall -g -D_FILE_OFFSET_BITS=64 -pthread
  
  SRCS := $(OBJS:.o=.c)
  
  all:   $(TARGET)
  
-$(TARGET): $(OBJS)
-               $(CC) -shared -o $@ $(OBJS) $(LDFLAGS)
+$(TARGET): $(OBJS) $(ASM_OBJS)
+               $(CC) -shared -o $@ $(OBJS) $(ASM_OBJS) $(LDFLAGS)
+
+composite_line_yuv_mmx.o: composite_line_yuv_mmx.S
+       $(CC) -o $@ -c composite_line_yuv_mmx.S
  
  depend:        $(SRCS)
                 $(CC) -MM $(CFLAGS) $^ 1>.depend
@@ -34,9 +39,8 @@ dist-clean:   clean
                 rm -f .depend
  
  clean: 
-               rm -f $(OBJS) $(TARGET) 
+               rm -f $(OBJS) $(ASM_OBJS) $(TARGET) 
  
  ifneq ($(wildcard .depend),)
  include .depend
  endif
-
diff --git a/src/modules/core/composite_line_yuv_mmx.S b/src/modules/core/composite_line_yuv_mmx.S

new file mode 100644 (file)

index 0000000..7a5fb80
--- /dev/null
+++ b/src/modules/core/composite_line_yuv_mmx.S
@@ -0,0 +1,203 @@
+       .file "composite_line_yuv_mmx"
+       .version "01.01"
+       
+gcc2_compiled.:
+.data
+
+.text
+       .align 16
+
+#if !defined(__MINGW32__) && !defined(__CYGWIN__)
+.globl composite_line_yuv_mmx
+       .type    composite_line_yuv_mmx,@function
+composite_line_yuv_mmx:
+#else
+.globl _composite_line_yuv_mmx
+_composite_line_yuv_mmx:
+#endif
+
+/*
+ * Arguments
+ *             
+ * dest:            8(%ebp)            %esi
+ * src:         12(%ebp)
+ * width_src:   16(%ebp)       
+ * alpha:       20(%ebp)       
+ * weight:      24(%ebp)       
+ * luma:        28(%ebp)
+ * softness:    32(%ebp)
+ */
+
+/*
+ * Function call entry
+ */
+       pushl %ebp
+       movl %esp,%ebp
+       subl $28,%esp
+       pushl %edi
+       pushl %esi
+       pushl %ebx
+
+/* Initialise */
+       movl 8(%ebp), %esi            # get dest
+       movl $0, %edx                     # j = 0
+       
+.loop:
+
+       movl $0xff, %ecx           # a = 255
+       cmpl $0, 20(%ebp)         # if alpha == NULL
+       je .noalpha
+       movl 20(%ebp), %edi       # a = alpha[ j ]
+       movb (%edi,%edx), %cl
+.noalpha:
+
+       movl 24(%ebp), %eax       # mix = weight
+       cmpl $0, 28(%ebp)         # if luma == NULL
+       je .noluma
+       movl 28(%ebp), %edi       # mix = ...
+       movl %edx, %ebx
+       #sall $1, %ebx
+       movw (%edi,%ebx), %ax # luma[ j*2 ]
+       cmpw %cx, %ax
+       jl .luma0
+       movl %eax, %ebx
+       addl 32(%ebp), %ebx       # + softness
+       cmpw %bx, %cx
+       jge .luma1
+       /* TODO: linear interpolate between edges eax and ebx */
+       jmp .noluma
+.luma0:
+       movl $0, %eax
+       jmp .noluma
+.luma1:
+       movl $0xffff, %eax
+.noluma:
+       shrl $8, %eax
+
+       movl %edx, %ebx           # edx will be destroyed by mulw
+       mull %ecx                  # mix = mix * a...
+       movl %ebx, %edx           # restore edx
+       shrl $8, %eax             # >>8
+       andl $0xff, %eax
+       
+/* put alpha and (1-alpha) into mm0 */
+/* 0 aa 0 1-a 0 aa 0 1-a */
+
+       /* duplicate word */
+       movl %eax, %ecx
+       shll $16, %ecx
+       orl %eax, %ecx
+       
+       movd %ecx, %mm1
+       
+       /* (1 << 16) - mix */
+       movl $0x000000ff, %ecx
+       subl %eax, %ecx
+       andl $0xff, %ecx
+       
+       /* duplicate word */
+       movl %ecx, %eax
+       shll $16, %eax
+       orl %eax, %ecx
+       
+       movd %ecx, %mm0
+       
+       /* unpack words into double words */
+       punpcklwd %mm1, %mm0
+       
+/* put src yuv and dest yuv into mm1 */
+/* 0 UVs 0 UVd 0 Ys 0 Yd */
+
+       movl 12(%ebp), %edi       # get src
+       movb (%edi), %cl
+       shll $8, %ecx
+       movb 1(%edi), %al
+       shll $24, %eax
+       orl %eax, %ecx
+       
+       movb (%esi), %al         # get dest
+       orl %eax, %ecx
+       movb 1(%esi), %al
+       shll $16, %eax
+       orl %eax, %ecx
+       
+       movd %ecx, %mm1
+       punpcklbw %mm4, %mm1
+       
+/* alpha composite */
+       pmaddwd %mm1, %mm0
+       psrld $8, %mm0
+
+/* store result */
+       movd %mm0, %eax
+       movb %al, (%esi)
+       pextrw $2, %mm0, %eax
+               movl $128, %eax
+       movb %al, 1(%esi)
+
+/* for..next */
+       addl $1, %edx             # j++
+       cmpl %edx, 16(%ebp)       # if ( j == width_src )
+       je .out
+       
+       addl $2, %esi
+       addl $2, 12(%ebp)
+       
+       jmp .loop
+
+.out:
+       emms
+       leal -40(%ebp),%esp
+       popl %ebx
+       popl %esi
+       popl %edi
+       movl %ebp,%esp
+       popl %ebp
+       ret
+
+
+/********************************************/
+
+.align 8
+#if !defined(__MINGW32__) && !defined(__CYGWIN__)      
+.globl composite_have_mmx
+       .type    composite_have_mmx,@function
+composite_have_mmx:
+#else
+.globl _composite_have_mmx
+_composite_have_mmx:
+#endif
+       
+       push    %ebx
+
+# Check if bit 21 in flags word is writeable
+
+       pushfl  
+       popl    %eax
+       movl    %eax,%ebx
+       xorl    $0x00200000, %eax
+       pushl   %eax
+       popfl
+       pushfl
+       popl    %eax
+
+       cmpl    %eax, %ebx
+
+       je .notfound
+
+# OK, we have CPUID
+
+       movl    $1, %eax
+       cpuid
+       
+       test    $0x00800000, %edx
+       jz      .notfound
+
+       movl    $1, %eax
+       jmp     .out2
+
+.notfound:
+       movl    $0, %eax
+.out2: 
+       popl    %ebx
+       ret
diff --git a/src/modules/core/filter_resize.c b/src/modules/core/filter_resize.c

index 7a1f5ddb60a6ac2dbe9566262c543c9bdd882768..435cf12767e869af13ae471474f5ddfe6dec22a7 100644 (file)
--- a/src/modules/core/filter_resize.c
+++ b/src/modules/core/filter_resize.c
@@ -130,7 +130,7 @@ static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format *
                 }
         }
  
-       return 0;
+       return error;
  }
  
  /** Filter processing.
diff --git a/src/modules/core/transition_composite.c b/src/modules/core/transition_composite.c

index 48f2d8aa64c902ba1b04f8d8cbd9fd3c118803bf..48d1c6ef0bda1a3b4d97507c88e69a6e55fbb190 100644 (file)
--- a/src/modules/core/transition_composite.c
+++ b/src/modules/core/transition_composite.c
@@ -27,6 +27,14 @@
  #include <string.h>
  #include <math.h>
  
+typedef void ( *composite_line_fn )( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness );
+
+/* mmx function declarations */
+#ifdef USE_MMX
+       void composite_line_yuv_mmx( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness );
+       int composite_have_mmx( void );
+#endif
+
  /** Geometry struct.
  */
  
@@ -155,9 +163,11 @@ static void geometry_calculate( struct geometry_s *output, struct geometry_s *in
         output->mix = in->mix + ( out->mix - in->mix ) * position;
         output->distort = in->distort;
  
-       output->x = ( int )floor( output->x ) & 0xfffffffe;
-       output->w = ( int )floor( output->w ) & 0xfffffffe;
-       output->sw &= 0xfffffffe;
+       // DRD> These break on negative values. I do not think they are needed
+       // since yuv_composite takes care of YUYV group alignment
+       //output->x = ( int )floor( output->x ) & 0xfffffffe;
+       //output->w = ( int )floor( output->w ) & 0xfffffffe;
+       //output->sw &= 0xfffffffe;
  }
  
  void transition_destroy_keys( void *arg )
@@ -481,22 +491,48 @@ static void luma_read_yuv422( uint8_t *image, uint16_t **map, int width, int hei
                 *p++ = ( image[ i ] - 16 ) * 299; // 299 = 65535 / 219
  }
  
+
+/** Composite a source line over a destination line
+*/
+
+static inline
+void composite_line_yuv( uint8_t *dest, uint8_t *src, int width_src, uint8_t *alpha, int weight, uint16_t *luma, int softness )
+{
+       register int j;
+       int a, mix;
+       
+       for ( j = 0; j < width_src; j ++ )
+       {
+               a = ( alpha == NULL ) ? 255 : *alpha ++;
+               mix = ( luma == NULL ) ? weight : linearstep( luma[ j ], luma[ j ] + softness, weight );
+               mix = ( mix * ( a + 1 ) ) >> 8;
+               *dest = ( *src++ * mix + *dest * ( ( 1 << 16 ) - mix ) ) >> 16;
+               dest++;
+               *dest = ( *src++ * mix + *dest * ( ( 1 << 16 ) - mix ) ) >> 16;
+               dest++;
+       }
+}
+
  /** Composite function.
  */
  
-static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int bpp, uint8_t *p_src, int width_src, int height_src, uint8_t *p_alpha, struct geometry_s geometry, int field, uint16_t *p_luma, int32_t softness )
+static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, uint8_t *p_src, int width_src, int height_src, uint8_t *p_alpha, struct geometry_s geometry, int field, uint16_t *p_luma, int32_t softness, composite_line_fn line_fn )
  {
         int ret = 0;
-       int i, j;
+       int i;
         int x_src = 0, y_src = 0;
         int32_t weight = ( 1 << 16 ) * ( geometry.mix / 100 );
-       int stride_src = width_src * bpp;
-       int stride_dest = width_dest * bpp;
+       int step = ( field > -1 ) ? 2 : 1;
+       int bpp = 2;
+       int stride_src = width_src * bpp * step;
+       int stride_dest = width_dest * bpp * step;
+       int alpha_stride = stride_src / bpp;
  
         // Adjust to consumer scale
         int x = geometry.x * width_dest / geometry.nw;
         int y = geometry.y * height_dest / geometry.nh;
  
+       // Align x to a full YUYV group
         x &= 0xfffffffe;
         width_src &= 0xfffffffe;
  
@@ -565,38 +601,13 @@ static int composite_yuv( uint8_t *p_dest, int width_dest, int height_dest, int
                 height_src--;
         }
  
-       uint8_t *p = p_src;
-       uint8_t *q = p_dest;
-       uint8_t *o = p_dest;
-       uint16_t *l = p_luma;
-       uint8_t *z = p_alpha;
-
-       uint8_t a;
-       int32_t current_weight;
-       int32_t value;
-       int step = ( field > -1 ) ? 2 : 1;
-
-       stride_src = stride_src * step;
-       int alpha_stride = stride_src / bpp;
-       stride_dest = stride_dest * step;
+       if ( line_fn == NULL )
+               line_fn = composite_line_yuv;
  
         // now do the compositing only to cropped extents
         for ( i = 0; i < height_src; i += step )
         {
-               p = p_src;
-               q = p_dest;
-               o = q;
-               l = p_luma;
-               z = p_alpha;
-
-               for ( j = 0; j < width_src; j ++ )
-               {
-                       a = ( z == NULL ) ? 255 : *z ++;
-                       current_weight = ( l == NULL ) ? weight : linearstep( l[ j ], l[ j ] + softness, weight );
-                       value = ( current_weight * ( a + 1 ) ) >> 8;
-                       *o ++ = ( *p++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16;
-                       *o ++ = ( *p++ * value + *q++ * ( ( 1 << 16 ) - value ) ) >> 16;
-               }
+               line_fn( p_dest, p_src, width_src, p_alpha, weight, p_luma, softness );
  
                 p_src += stride_src;
                 p_dest += stride_dest;
@@ -805,7 +816,7 @@ static int get_b_frame_image( mlt_transition this, mlt_frame b_frame, uint8_t **
         x -= x % 2;
  
         // optimization points - no work to do
-       if ( *width <= 0 || *height <= 0 )
+       if ( *width < 1 || *height < 1 )
                 return 1;
  
         if ( ( x < 0 && -x >= *width ) || ( y < 0 && -y >= *height ) )
@@ -895,7 +906,7 @@ mlt_frame composite_copy_region( mlt_transition this, mlt_frame a_frame, mlt_pos
         h = result.h * height / result.nh;
  
         x &= 0xfffffffe;
-       w &= 0xfffffffe;
+       //w &= 0xfffffffe;
  
         // Now we need to create a new destination image
         dest = mlt_pool_alloc( w * h * 2 );
@@ -979,7 +990,6 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
                 {
                         uint8_t *dest = *image;
                         uint8_t *src = image_b;
-                       int bpp = 2;
                         uint8_t *alpha = mlt_frame_get_alpha_mask( b_frame );
                         int progressive = mlt_properties_get_int( a_props, "progressive" ) ||
                                         mlt_properties_get_int( a_props, "consumer_progressive" ) ||
@@ -988,6 +998,7 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
                         
                         int32_t luma_softness = mlt_properties_get_double( properties, "softness" ) * ( 1 << 16 );
                         uint16_t *luma_bitmap = get_luma( properties, width_b, height_b );
+                       composite_line_fn line_fn = mlt_properties_get_int( properties, "_MMX" ) ? composite_line_yuv_mmx : composite_line_yuv;
  
                         for ( field = 0; field < ( progressive ? 1 : 2 ); field++ )
                         {
@@ -1001,7 +1012,7 @@ static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_f
                                 alignment_calculate( &result );
  
                                 // Composite the b_frame on the a_frame
-                               composite_yuv( dest, *width, *height, bpp, src, width_b, height_b, alpha, result, progressive ? -1 : field, luma_bitmap, luma_softness );
+                               composite_yuv( dest, *width, *height, src, width_b, height_b, alpha, result, progressive ? -1 : field, luma_bitmap, luma_softness, line_fn );
                         }
                 }
         }
@@ -1030,11 +1041,19 @@ mlt_transition transition_composite_init( char *arg )
         mlt_transition this = calloc( sizeof( struct mlt_transition_s ), 1 );
         if ( this != NULL && mlt_transition_init( this, NULL ) == 0 )
         {
+               mlt_properties properties = mlt_transition_properties( this );
+               
                 this->process = composite_process;
-               mlt_properties_set( mlt_transition_properties( this ), "start", arg != NULL ? arg : "85%,5%:10%x10%" );
+               
+               // Default starting motion and zoom
+               mlt_properties_set( properties, "start", arg != NULL ? arg : "85%,5%:10%x10%" );
                 
                 // Default factory
-               mlt_properties_set( mlt_transition_properties( this ), "factory", "fezzik" );
+               mlt_properties_set( properties, "factory", "fezzik" );
+
+#ifdef USE_MMX
+               //mlt_properties_set_int( properties, "_MMX", composite_have_mmx() );
+#endif
         }
         return this;
  }
diff --git a/src/modules/gtk2/filter_rescale.c b/src/modules/gtk2/filter_rescale.c

index 5151a059b46043bfde9448721386842f0809421f..c57e33d1bd3a86ad59b5a9c00b65f950b82ae3e0 100644 (file)
--- a/src/modules/gtk2/filter_rescale.c
+++ b/src/modules/gtk2/filter_rescale.c
@@ -38,6 +38,8 @@ static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format *
                 *width = 720;
         if ( *height == 0 )
                 *height = 576;
+       if ( *width < 2 || *height < 6 )
+               return 1;
  
         mlt_properties properties = mlt_frame_properties( this );
         int iwidth = *width;
author	ddennedy <ddennedy@d19143bc-622f-0410-bfdd-b5b2a6649095>
	Thu, 11 Mar 2004 07:22:22 +0000 (07:22 +0000)
committer	ddennedy <ddennedy@d19143bc-622f-0410-bfdd-b5b2a6649095>
	Thu, 11 Mar 2004 07:22:22 +0000 (07:22 +0000)
src/modules/core/Makefile		patch \| blob \| history
src/modules/core/composite_line_yuv_mmx.S	[new file with mode: 0644]	patch \| blob
src/modules/core/filter_resize.c		patch \| blob \| history
src/modules/core/transition_composite.c		patch \| blob \| history
src/modules/gtk2/filter_rescale.c		patch \| blob \| history