OBJS = factory.o \
deinterlace.o \
+ yadif.o \
filter_deinterlace.o
ifdef MMX_FLAGS
#define DEINTERLACE_ONEFIELD 4
#define DEINTERLACE_ONEFIELDXV 5
#define DEINTERLACE_LINEARBLEND 6
-
+#define DEINTERLACE_YADIF 7
+#define DEINTERLACE_YADIF_NOSPATIAL 8
+
extern const char *deinterlace_methods[];
#endif
#include <framework/mlt_filter.h>
#include <framework/mlt_log.h>
+#include <framework/mlt_producer.h>
+#include <framework/mlt_events.h>
#include "deinterlace.h"
+#include "yadif.h"
#include <framework/mlt_frame.h>
#include <string.h>
#include <stdlib.h>
+int deinterlace_yadif( mlt_frame frame, mlt_filter filter, uint8_t **image, mlt_image_format *format, int *width, int *height, int mode )
+{
+ mlt_properties properties = MLT_FRAME_PROPERTIES( frame );
+ mlt_frame previous_frame = mlt_properties_get_data( properties, "previous frame", NULL );
+ uint8_t* previous_image = NULL;
+ int previous_width = *width;
+ int previous_height = *height;
+ mlt_frame next_frame = mlt_properties_get_data( properties, "next frame", NULL );
+ uint8_t* next_image = NULL;
+ int next_width = *width;
+ int next_height = *height;
+ yadif_filter *yadif = mlt_properties_get_data( MLT_FILTER_PROPERTIES( filter ), "yadif", NULL );
+
+ mlt_log_debug( MLT_FILTER_SERVICE(filter), "previous %d current %d next %d\n",
+ previous_frame? mlt_frame_get_position(previous_frame) : -1,
+ mlt_frame_get_position(frame),
+ next_frame? mlt_frame_get_position(next_frame) : -1);
+
+ if ( !previous_frame || !next_frame )
+ return 1;
+
+ // Get the preceding frame's image
+ int error = mlt_frame_get_image( previous_frame, &previous_image, format, &previous_width, &previous_height, 0 );
+
+ if ( !error && previous_image && *format == mlt_image_yuv422 )
+ {
+ // Get the current frame's image
+ error = mlt_frame_get_image( frame, image, format, width, height, 0 );
+
+ // Check that we aren't already progressive
+ if ( !error && *image && *format == mlt_image_yuv422 &&
+ !mlt_properties_get_int( MLT_FRAME_PROPERTIES( frame ), "progressive" ) )
+ {
+ // Get the following frame's image
+ error = mlt_frame_get_image( next_frame, &next_image, format, &next_width, &next_height, 0 );
+
+ if ( !error && next_image && *format == mlt_image_yuv422 )
+ {
+ if ( !yadif->ysrc )
+ {
+ // Create intermediate planar planes
+ yadif->yheight = *height;
+ yadif->ywidth = *width;
+ yadif->uvwidth = yadif->ywidth / 2;
+ yadif->ypitch = ( yadif->ywidth + 15 ) / 16 * 16;
+ yadif->uvpitch = ( yadif->uvwidth + 15 ) / 16 * 16;
+ yadif->ysrc = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->ypitch );
+ yadif->usrc = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->uvpitch);
+ yadif->vsrc = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->uvpitch );
+ yadif->yprev = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->ypitch );
+ yadif->uprev = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->uvpitch );
+ yadif->vprev = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->uvpitch );
+ yadif->ynext = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->ypitch );
+ yadif->unext = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->uvpitch );
+ yadif->vnext = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->uvpitch );
+ yadif->ydest = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->ypitch );
+ yadif->udest = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->uvpitch );
+ yadif->vdest = (unsigned char *) mlt_pool_alloc( yadif->yheight * yadif->uvpitch );
+
+ }
+
+ const int order = mlt_properties_get_int( properties, "top_field_first" );
+ const int pitch = *width << 1;
+ const int parity = 0;
+
+ // Convert packed to planar
+ YUY2ToPlanes( *image, pitch, *width, *height, yadif->ysrc,
+ yadif->ypitch, yadif->usrc, yadif->vsrc, yadif->uvpitch, yadif->cpu );
+ YUY2ToPlanes( previous_image, pitch, *width, *height, yadif->yprev,
+ yadif->ypitch, yadif->uprev, yadif->vprev, yadif->uvpitch, yadif->cpu );
+ YUY2ToPlanes( next_image, pitch, *width, *height, yadif->ynext,
+ yadif->ypitch, yadif->unext, yadif->vnext, yadif->uvpitch, yadif->cpu );
+
+ // Deinterlace each plane
+ filter_plane( mode, yadif->ydest, yadif->ypitch, yadif->yprev, yadif->ysrc,
+ yadif->ynext, yadif->ypitch, *width, *height, parity, order, yadif->cpu);
+ filter_plane( mode, yadif->udest, yadif->uvpitch,yadif->uprev, yadif->usrc,
+ yadif->unext, yadif->uvpitch, *width >> 1, *height, parity, order, yadif->cpu);
+ filter_plane( mode, yadif->vdest, yadif->uvpitch, yadif->vprev, yadif->vsrc,
+ yadif->vnext, yadif->uvpitch, *width >> 1, *height, parity, order, yadif->cpu);
+
+ // Convert planar to packed
+ YUY2FromPlanes( *image, pitch, *width, *height, yadif->ydest,
+ yadif->ypitch, yadif->udest, yadif->vdest, yadif->uvpitch, yadif->cpu);
+ }
+ }
+ }
+ return error;
+}
+
/** Do it :-).
*/
static int filter_get_image( mlt_frame this, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
{
int error = 0;
- int deinterlace = mlt_properties_get_int( MLT_FRAME_PROPERTIES( this ), "consumer_deinterlace" );
- int progressive = mlt_properties_get_int( MLT_FRAME_PROPERTIES( this ), "progressive" );
+ mlt_properties properties = MLT_FRAME_PROPERTIES( this );
+ int deinterlace = mlt_properties_get_int( properties, "consumer_deinterlace" );
+ int progressive = mlt_properties_get_int( properties, "progressive" );
// Pop the service off the stack
mlt_filter filter = mlt_frame_pop_service( this );
- // Determine if we need a writable version or not
- if ( deinterlace && !writable )
- writable = !progressive;
-
// Get the input image
if ( deinterlace && !progressive )
- *format = mlt_image_yuv422;
- error = mlt_frame_get_image( this, image, format, width, height, writable );
- progressive = mlt_properties_get_int( MLT_FRAME_PROPERTIES( this ), "progressive" );
- mlt_log_debug( MLT_FILTER_SERVICE( filter ), "xine.deinterlace %d prog %d format %s\n",
- deinterlace, progressive, mlt_image_format_name( *format ) );
-
- // Check that we want progressive and we aren't already progressive
- if ( deinterlace && *format == mlt_image_yuv422 && *image && !progressive )
{
// Determine deinterlace method
char *method_str = mlt_properties_get( MLT_FILTER_PROPERTIES( filter ), "method" );
- int method = DEINTERLACE_LINEARBLEND;
- char *frame_method_str = mlt_properties_get( MLT_FRAME_PROPERTIES( this ), "deinterlace_method" );
+ int method = DEINTERLACE_NONE;
+ char *frame_method_str = mlt_properties_get( properties, "deinterlace_method" );
- if ( frame_method_str != NULL )
+ if ( frame_method_str )
method_str = frame_method_str;
- if ( method_str == NULL )
+ if ( !method_str || strcmp( method_str, "yadif" ) == 0 )
+ method = DEINTERLACE_YADIF;
+ else if ( strcmp( method_str, "yadif-nospatial" ) == 0 )
+ method = DEINTERLACE_YADIF_NOSPATIAL;
+ else if ( strcmp( method_str, "onefield" ) == 0 )
+ method = DEINTERLACE_ONEFIELD;
+ else if ( strcmp( method_str, "linearblend" ) == 0 )
method = DEINTERLACE_LINEARBLEND;
else if ( strcmp( method_str, "bob" ) == 0 )
method = DEINTERLACE_BOB;
method = DEINTERLACE_BOB;
else if ( strcmp( method_str, "greedy" ) == 0 )
method = DEINTERLACE_GREEDY;
- else if ( strcmp( method_str, "onefield" ) == 0 )
- method = DEINTERLACE_ONEFIELD;
+
+ *format = mlt_image_yuv422;
+
+ if ( method == DEINTERLACE_YADIF )
+ {
+ int mode = 0;
+ error = deinterlace_yadif( this, filter, image, format, width, height, mode );
+ progressive = mlt_properties_get_int( properties, "progressive" );
+ }
+ else if ( method == DEINTERLACE_YADIF_NOSPATIAL )
+ {
+ int mode = 2;
+ error = deinterlace_yadif( this, filter, image, format, width, height, mode );
+ progressive = mlt_properties_get_int( properties, "progressive" );
+ }
+ if ( error || ( method > DEINTERLACE_NONE && method < DEINTERLACE_YADIF ) )
+ {
+ // Signal that we no longer need previous and next frames
+ mlt_producer producer = mlt_producer_cut_parent( mlt_frame_get_original_producer(this) );
+ mlt_properties_set_int( MLT_PRODUCER_PROPERTIES(producer), "_need_previous_next", 0 );
- // Deinterlace the image
- deinterlace_yuv( *image, image, *width * 2, *height, method );
+ if ( error )
+ method = DEINTERLACE_ONEFIELD;
+
+ // Get the current frame's image
+ error = mlt_frame_get_image( this, image, format, width, height, writable );
+ progressive = mlt_properties_get_int( properties, "progressive" );
+
+ // Check that we aren't already progressive
+ if ( !progressive && !error && *image && *format == mlt_image_yuv422 )
+ {
+ // Deinterlace the image using one of the Xine deinterlacers
+ int image_size = *width * *height * 2;
+ uint8_t *new_image = mlt_pool_alloc( image_size );
+
+ deinterlace_yuv( new_image, image, *width * 2, *height, method );
+ mlt_properties_set_data( properties, "image", new_image, image_size, mlt_pool_release, NULL );
+ *image = new_image;
+ }
+ }
+ else if ( method == DEINTERLACE_NONE )
+ {
+ error = mlt_frame_get_image( this, image, format, width, height, writable );
+ }
- // Make sure that others know the frame is deinterlaced
- mlt_properties_set_int( MLT_FRAME_PROPERTIES( this ), "progressive", 1 );
+ mlt_log_debug( MLT_FILTER_SERVICE( filter ), "error %d deint %d prog %d fmt %s method %s\n",
+ error, deinterlace, progressive, mlt_image_format_name( *format ), method_str ? method_str : "yadif" );
+
+ if ( !error )
+ {
+ // Make sure that others know the frame is deinterlaced
+ mlt_properties_set_int( properties, "progressive", 1 );
+ }
}
-
+ else
+ {
+ // Pass through
+ error = mlt_frame_get_image( this, image, format, width, height, writable );
+ }
+
return error;
}
return frame;
}
+static void filter_close( mlt_filter this )
+{
+ yadif_filter *yadif = mlt_properties_get_data( MLT_FILTER_PROPERTIES( this ), "yadif", NULL );
+ if ( yadif )
+ {
+ if ( yadif->ysrc )
+ {
+ mlt_pool_release( yadif->ysrc );
+ mlt_pool_release( yadif->usrc );
+ mlt_pool_release( yadif->vsrc );
+ mlt_pool_release( yadif->yprev );
+ mlt_pool_release( yadif->uprev );
+ mlt_pool_release( yadif->vprev );
+ mlt_pool_release( yadif->ynext );
+ mlt_pool_release( yadif->unext );
+ mlt_pool_release( yadif->vnext );
+ mlt_pool_release( yadif->ydest );
+ mlt_pool_release( yadif->udest );
+ mlt_pool_release( yadif->vdest );
+ }
+ mlt_pool_release( yadif );
+ }
+}
+
+static void on_service_changed( mlt_service owner, mlt_service filter )
+{
+ mlt_service service = mlt_properties_get_data( MLT_SERVICE_PROPERTIES(filter), "service", NULL );
+ mlt_properties_set_int( MLT_SERVICE_PROPERTIES(service), "_need_previous_next", 1 );
+}
+
/** Constructor for the filter.
*/
mlt_filter this = mlt_filter_new( );
if ( this != NULL )
{
+ yadif_filter *yadif = mlt_pool_alloc( sizeof( *yadif ) );
+
+ yadif->cpu = 0; // Pure C
+#ifdef USE_SSE
+ yadif->cpu |= AVS_CPU_INTEGER_SSE;
+#endif
+#ifdef USE_SSE2
+ yadif->cpu |= AVS_CPU_SSE2;
+#endif
+ yadif->ysrc = NULL;
this->process = deinterlace_process;
+ this->close = filter_close;
mlt_properties_set( MLT_FILTER_PROPERTIES( this ), "method", arg );
+ mlt_properties_set_data( MLT_FILTER_PROPERTIES( this ), "yadif", yadif, sizeof(*yadif), NULL, NULL );
+ mlt_events_listen( MLT_FILTER_PROPERTIES( this ), this, "service-changed", (mlt_listener) on_service_changed );
+
+#if defined(__GNUC__) && !defined(PIC)
+ // Set SSSE3 bit to cpu
+ asm (\
+ "mov $1, %%eax \n\t"\
+ "push %%ebx \n\t"\
+ "cpuid \n\t"\
+ "pop %%ebx \n\t"\
+ "mov %%ecx, %%edx \n\t"\
+ "shr $9, %%edx \n\t"\
+ "and $1, %%edx \n\t"\
+ "shl $9, %%edx \n\t"\
+ "and $511, %%ebx \n\t"\
+ "or %%edx, %%ebx \n\t"\
+ : "=b"(yadif->cpu) : "p"(yadif->cpu) : "%eax", "%ecx", "%edx");
+#endif
}
return this;
}
--- /dev/null
+/*\r
+ * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>\r
+ *\r
+ * SSE2/SSSE3 version (custom optimization) by h.yamagata\r
+ *\r
+ * Small fix by Alexander Balakhnin (fizick@avisynth.org.ru)\r
+ *\r
+ * MPlayer is free software; you can redistribute it and/or modify\r
+ * it under the terms of the GNU General Public License as published by\r
+ * the Free Software Foundation; either version 2 of the License, or\r
+ * (at your option) any later version.\r
+ *\r
+ * MPlayer is distributed in the hope that it will be useful,\r
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * GNU General Public License for more details.\r
+ *\r
+ * You should have received a copy of the GNU General Public License along\r
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,\r
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\r
+ */\r
+\r
+#define LOAD8(mem,dst) \\r
+ "movq "mem", "#dst" \n\t"\\r
+ "punpcklbw %%xmm7, "#dst" \n\t"\r
+\r
+#define CHECK(pj,mj) \\r
+ "movdqu "#pj"(%[cur],%[mrefs]), %%xmm2 \n\t" /* cur[x-refs-1+j] */\\r
+ "movdqu "#mj"(%[cur],%[prefs]), %%xmm3 \n\t" /* cur[x+refs-1-j] */\\r
+ "movdqa %%xmm2, %%xmm4 \n\t"\\r
+ "movdqa %%xmm2, %%xmm5 \n\t"\\r
+ "pxor %%xmm3, %%xmm4 \n\t"\\r
+ "pavgb %%xmm3, %%xmm5 \n\t"\\r
+ "pand %[pb1], %%xmm4 \n\t"\\r
+ "psubusb %%xmm4, %%xmm5 \n\t"\\r
+ "psrldq $1, %%xmm5 \n\t"\\r
+ "punpcklbw %%xmm7, %%xmm5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\\r
+ "movdqa %%xmm2, %%xmm4 \n\t"\\r
+ "psubusb %%xmm3, %%xmm2 \n\t"\\r
+ "psubusb %%xmm4, %%xmm3 \n\t"\\r
+ "pmaxub %%xmm3, %%xmm2 \n\t"\\r
+ "movdqa %%xmm2, %%xmm3 \n\t"\\r
+ "movdqa %%xmm2, %%xmm4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\\r
+ "psrldq $1, %%xmm3 \n\t" /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\\r
+ "psrldq $2, %%xmm4 \n\t" /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\\r
+ "punpcklbw %%xmm7, %%xmm2 \n\t"\\r
+ "punpcklbw %%xmm7, %%xmm3 \n\t"\\r
+ "punpcklbw %%xmm7, %%xmm4 \n\t"\\r
+ "paddw %%xmm3, %%xmm2 \n\t"\\r
+ "paddw %%xmm4, %%xmm2 \n\t" /* score */\r
+\r
+#define CHECK1 \\r
+ "movdqa %%xmm0, %%xmm3 \n\t"\\r
+ "pcmpgtw %%xmm2, %%xmm3 \n\t" /* if(score < spatial_score) */\\r
+ "pminsw %%xmm2, %%xmm0 \n\t" /* spatial_score= score; */\\r
+ "movdqa %%xmm3, %%xmm6 \n\t"\\r
+ "pand %%xmm3, %%xmm5 \n\t"\\r
+ "pandn %%xmm1, %%xmm3 \n\t"\\r
+ "por %%xmm5, %%xmm3 \n\t"\\r
+ "movdqa %%xmm3, %%xmm1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */\r
+\r
+#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\\r
+ hurts both quality and speed, but matches the C version. */\\r
+ "paddw %[pw1], %%xmm6 \n\t"\\r
+ "psllw $14, %%xmm6 \n\t"\\r
+ "paddsw %%xmm6, %%xmm2 \n\t"\\r
+ "movdqa %%xmm0, %%xmm3 \n\t"\\r
+ "pcmpgtw %%xmm2, %%xmm3 \n\t"\\r
+ "pminsw %%xmm2, %%xmm0 \n\t"\\r
+ "pand %%xmm3, %%xmm5 \n\t"\\r
+ "pandn %%xmm1, %%xmm3 \n\t"\\r
+ "por %%xmm5, %%xmm3 \n\t"\\r
+ "movdqa %%xmm3, %%xmm1 \n\t"\r
+\r
+/* mode argument mod - Fizick */\r
+\r
+/* static attribute_align_arg void FILTER_LINE_FUNC_NAME(YadifContext *yadctx, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){\r
+ const int mode = yadctx->mode; */\r
+static attribute_align_arg void FILTER_LINE_FUNC_NAME(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){\r
+ DECLARE_ALIGNED(16, uint8_t, tmp0[16]);\r
+ DECLARE_ALIGNED(16, uint8_t, tmp1[16]);\r
+ DECLARE_ALIGNED(16, uint8_t, tmp2[16]);\r
+ DECLARE_ALIGNED(16, uint8_t, tmp3[16]);\r
+ int x;\r
+ static DECLARE_ALIGNED(16, const unsigned short, pw_1[]) =\r
+ {\r
+ 0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001\r
+ };\r
+\r
+ static DECLARE_ALIGNED(16, const unsigned short, pb_1[]) =\r
+ {\r
+ 0x0101,0x0101,0x0101,0x0101,0x0101,0x0101,0x0101,0x0101\r
+ };\r
+\r
+\r
+#define FILTER\\r
+ for(x=0; x<w; x+=8){\\r
+ __asm__ volatile(\\r
+ "pxor %%xmm7, %%xmm7 \n\t"\\r
+ LOAD8("(%[cur],%[mrefs])", %%xmm0) /* c = cur[x-refs] */\\r
+ LOAD8("(%[cur],%[prefs])", %%xmm1) /* e = cur[x+refs] */\\r
+ LOAD8("(%["prev2"])", %%xmm2) /* prev2[x] */\\r
+ LOAD8("(%["next2"])", %%xmm3) /* next2[x] */\\r
+ "movdqa %%xmm3, %%xmm4 \n\t"\\r
+ "paddw %%xmm2, %%xmm3 \n\t"\\r
+ "psraw $1, %%xmm3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\\r
+ "movdqa %%xmm0, %[tmp0] \n\t" /* c */\\r
+ "movdqa %%xmm3, %[tmp1] \n\t" /* d */\\r
+ "movdqa %%xmm1, %[tmp2] \n\t" /* e */\\r
+ "psubw %%xmm4, %%xmm2 \n\t"\\r
+ PABS( %%xmm4, %%xmm2) /* temporal_diff0 */\\r
+ LOAD8("(%[prev],%[mrefs])", %%xmm3) /* prev[x-refs] */\\r
+ LOAD8("(%[prev],%[prefs])", %%xmm4) /* prev[x+refs] */\\r
+ "psubw %%xmm0, %%xmm3 \n\t"\\r
+ "psubw %%xmm1, %%xmm4 \n\t"\\r
+ PABS( %%xmm5, %%xmm3)\\r
+ PABS( %%xmm5, %%xmm4)\\r
+ "paddw %%xmm4, %%xmm3 \n\t" /* temporal_diff1 */\\r
+ "psrlw $1, %%xmm2 \n\t"\\r
+ "psrlw $1, %%xmm3 \n\t"\\r
+ "pmaxsw %%xmm3, %%xmm2 \n\t"\\r
+ LOAD8("(%[next],%[mrefs])", %%xmm3) /* next[x-refs] */\\r
+ LOAD8("(%[next],%[prefs])", %%xmm4) /* next[x+refs] */\\r
+ "psubw %%xmm0, %%xmm3 \n\t"\\r
+ "psubw %%xmm1, %%xmm4 \n\t"\\r
+ PABS( %%xmm5, %%xmm3)\\r
+ PABS( %%xmm5, %%xmm4)\\r
+ "paddw %%xmm4, %%xmm3 \n\t" /* temporal_diff2 */\\r
+ "psrlw $1, %%xmm3 \n\t"\\r
+ "pmaxsw %%xmm3, %%xmm2 \n\t"\\r
+ "movdqa %%xmm2, %[tmp3] \n\t" /* diff */\\r
+\\r
+ "paddw %%xmm0, %%xmm1 \n\t"\\r
+ "paddw %%xmm0, %%xmm0 \n\t"\\r
+ "psubw %%xmm1, %%xmm0 \n\t"\\r
+ "psrlw $1, %%xmm1 \n\t" /* spatial_pred */\\r
+ PABS( %%xmm2, %%xmm0) /* ABS(c-e) */\\r
+\\r
+ "movdqu -1(%[cur],%[mrefs]), %%xmm2 \n\t" /* cur[x-refs-1] */\\r
+ "movdqu -1(%[cur],%[prefs]), %%xmm3 \n\t" /* cur[x+refs-1] */\\r
+ "movdqa %%xmm2, %%xmm4 \n\t"\\r
+ "psubusb %%xmm3, %%xmm2 \n\t"\\r
+ "psubusb %%xmm4, %%xmm3 \n\t"\\r
+ "pmaxub %%xmm3, %%xmm2 \n\t"\\r
+ /*"pshuflw $9,%%xmm2, %%xmm3 \n\t"*/\\r
+ /*"pshufhw $9,%%xmm2, %%xmm3 \n\t"*/\\r
+ "movdqa %%xmm2, %%xmm3 \n\t" /* correct replacement (here) */\
+ "psrldq $2, %%xmm3 \n\t"/* for "pshufw $9,%%mm2, %%mm3" - fix by Fizick */\
+ "punpcklbw %%xmm7, %%xmm2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\\r
+ "punpcklbw %%xmm7, %%xmm3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\\r
+ "paddw %%xmm2, %%xmm0 \n\t"\\r
+ "paddw %%xmm3, %%xmm0 \n\t"\\r
+ "psubw %[pw1], %%xmm0 \n\t" /* spatial_score */\\r
+\\r
+ CHECK(-2,0)\\r
+ CHECK1\\r
+ CHECK(-3,1)\\r
+ CHECK2\\r
+ CHECK(0,-2)\\r
+ CHECK1\\r
+ CHECK(1,-3)\\r
+ CHECK2\\r
+\\r
+ /* if(yadctx->mode<2) ... */\\r
+ "movdqa %[tmp3], %%xmm6 \n\t" /* diff */\\r
+ "cmp $2, %[mode] \n\t"\\r
+ "jge 1f \n\t"\\r
+ LOAD8("(%["prev2"],%[mrefs],2)", %%xmm2) /* prev2[x-2*refs] */\\r
+ LOAD8("(%["next2"],%[mrefs],2)", %%xmm4) /* next2[x-2*refs] */\\r
+ LOAD8("(%["prev2"],%[prefs],2)", %%xmm3) /* prev2[x+2*refs] */\\r
+ LOAD8("(%["next2"],%[prefs],2)", %%xmm5) /* next2[x+2*refs] */\\r
+ "paddw %%xmm4, %%xmm2 \n\t"\\r
+ "paddw %%xmm5, %%xmm3 \n\t"\\r
+ "psrlw $1, %%xmm2 \n\t" /* b */\\r
+ "psrlw $1, %%xmm3 \n\t" /* f */\\r
+ "movdqa %[tmp0], %%xmm4 \n\t" /* c */\\r
+ "movdqa %[tmp1], %%xmm5 \n\t" /* d */\\r
+ "movdqa %[tmp2], %%xmm7 \n\t" /* e */\\r
+ "psubw %%xmm4, %%xmm2 \n\t" /* b-c */\\r
+ "psubw %%xmm7, %%xmm3 \n\t" /* f-e */\\r
+ "movdqa %%xmm5, %%xmm0 \n\t"\\r
+ "psubw %%xmm4, %%xmm5 \n\t" /* d-c */\\r
+ "psubw %%xmm7, %%xmm0 \n\t" /* d-e */\\r
+ "movdqa %%xmm2, %%xmm4 \n\t"\\r
+ "pminsw %%xmm3, %%xmm2 \n\t"\\r
+ "pmaxsw %%xmm4, %%xmm3 \n\t"\\r
+ "pmaxsw %%xmm5, %%xmm2 \n\t"\\r
+ "pminsw %%xmm5, %%xmm3 \n\t"\\r
+ "pmaxsw %%xmm0, %%xmm2 \n\t" /* max */\\r
+ "pminsw %%xmm0, %%xmm3 \n\t" /* min */\\r
+ "pxor %%xmm4, %%xmm4 \n\t"\\r
+ "pmaxsw %%xmm3, %%xmm6 \n\t"\\r
+ "psubw %%xmm2, %%xmm4 \n\t" /* -max */\\r
+ "pmaxsw %%xmm4, %%xmm6 \n\t" /* diff= MAX3(diff, min, -max); */\\r
+ "1: \n\t"\\r
+\\r
+ "movdqa %[tmp1], %%xmm2 \n\t" /* d */\\r
+ "movdqa %%xmm2, %%xmm3 \n\t"\\r
+ "psubw %%xmm6, %%xmm2 \n\t" /* d-diff */\\r
+ "paddw %%xmm6, %%xmm3 \n\t" /* d+diff */\\r
+ "pmaxsw %%xmm2, %%xmm1 \n\t"\\r
+ "pminsw %%xmm3, %%xmm1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\\r
+ "packuswb %%xmm1, %%xmm1 \n\t"\\r
+\\r
+ :[tmp0]"=m"(tmp0),\\r
+ [tmp1]"=m"(tmp1),\\r
+ [tmp2]"=m"(tmp2),\\r
+ [tmp3]"=m"(tmp3)\\r
+ :[prev] "r"(prev),\\r
+ [cur] "r"(cur),\\r
+ [next] "r"(next),\\r
+ [prefs]"r"((long)refs),\\r
+ [mrefs]"r"((long)-refs),\\r
+ [pw1] "m"(*pw_1),\\r
+ [pb1] "m"(*pb_1),\\r
+ [mode] "g"(mode)\\r
+ );\\r
+ __asm__ volatile("movq %%xmm1, %0" :"=m"(*dst));\\r
+ dst += 8;\\r
+ prev+= 8;\\r
+ cur += 8;\\r
+ next+= 8;\\r
+ }\r
+\r
+ if(parity){\r
+#define prev2 "prev"\r
+#define next2 "cur"\r
+ FILTER\r
+#undef prev2\r
+#undef next2\r
+ }else{\r
+#define prev2 "cur"\r
+#define next2 "next"\r
+ FILTER\r
+#undef prev2\r
+#undef next2\r
+ }\r
+}\r
+#undef LOAD8\r
+#undef PABS\r
+#undef CHECK\r
+#undef CHECK1\r
+#undef CHECK2\r
+#undef FILTER\r
+#undef FILTER_LINE_FUNC_NAME\r
--- /dev/null
+/*
+ Yadif C-plugin for Avisynth 2.5 - Yet Another DeInterlacing Filter
+ Copyright (C)2007 Alexander G. Balakhnin aka Fizick http://avisynth.org.ru
+ Port of YADIF filter from MPlayer
+ Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Avisynth_C plugin
+ Assembler optimized for GNU C compiler
+
+*/
+#include "yadif.h"
+#include <stdlib.h>
+#include <memory.h>
+
+#define MIN(a,b) ((a) > (b) ? (b) : (a))
+#define MAX(a,b) ((a) < (b) ? (b) : (a))
+#define ABS(a) ((a) > 0 ? (a) : (-(a)))
+
+#define MIN3(a,b,c) MIN(MIN(a,b),c)
+#define MAX3(a,b,c) MAX(MAX(a,b),c)
+
+static void (*filter_line)(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity);
+
+#ifdef __GNUC__
+#define LOAD4(mem,dst) \
+ "movd "mem", "#dst" \n\t"\
+ "punpcklbw %%mm7, "#dst" \n\t"
+
+#define PABS(tmp,dst) \
+ "pxor "#tmp", "#tmp" \n\t"\
+ "psubw "#dst", "#tmp" \n\t"\
+ "pmaxsw "#tmp", "#dst" \n\t"
+
+#define CHECK(pj,mj) \
+ "movq "#pj"(%[cur],%[mrefs]), %%mm2 \n\t" /* cur[x-refs-1+j] */\
+ "movq "#mj"(%[cur],%[prefs]), %%mm3 \n\t" /* cur[x+refs-1-j] */\
+ "movq %%mm2, %%mm4 \n\t"\
+ "movq %%mm2, %%mm5 \n\t"\
+ "pxor %%mm3, %%mm4 \n\t"\
+ "pavgb %%mm3, %%mm5 \n\t"\
+ "pand %[pb1], %%mm4 \n\t"\
+ "psubusb %%mm4, %%mm5 \n\t"\
+ "psrlq $8, %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
+ "movq %%mm2, %%mm4 \n\t"\
+ "psubusb %%mm3, %%mm2 \n\t"\
+ "psubusb %%mm4, %%mm3 \n\t"\
+ "pmaxub %%mm3, %%mm2 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "movq %%mm2, %%mm4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
+ "psrlq $8, %%mm3 \n\t" /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
+ "psrlq $16, %%mm4 \n\t" /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "paddw %%mm4, %%mm2 \n\t" /* score */
+
+#define CHECK1 \
+ "movq %%mm0, %%mm3 \n\t"\
+ "pcmpgtw %%mm2, %%mm3 \n\t" /* if(score < spatial_score) */\
+ "pminsw %%mm2, %%mm0 \n\t" /* spatial_score= score; */\
+ "movq %%mm3, %%mm6 \n\t"\
+ "pand %%mm3, %%mm5 \n\t"\
+ "pandn %%mm1, %%mm3 \n\t"\
+ "por %%mm5, %%mm3 \n\t"\
+ "movq %%mm3, %%mm1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
+
+#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
+ hurts both quality and speed, but matches the C version. */\
+ "paddw %[pw1], %%mm6 \n\t"\
+ "psllw $14, %%mm6 \n\t"\
+ "paddsw %%mm6, %%mm2 \n\t"\
+ "movq %%mm0, %%mm3 \n\t"\
+ "pcmpgtw %%mm2, %%mm3 \n\t"\
+ "pminsw %%mm2, %%mm0 \n\t"\
+ "pand %%mm3, %%mm5 \n\t"\
+ "pandn %%mm1, %%mm3 \n\t"\
+ "por %%mm5, %%mm3 \n\t"\
+ "movq %%mm3, %%mm1 \n\t"
+
+static void filter_line_mmx2(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){
+ static const uint64_t pw_1 = 0x0001000100010001ULL;
+ static const uint64_t pb_1 = 0x0101010101010101ULL;
+// const int mode = p->mode;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+ int x;
+
+#define FILTER\
+ for(x=0; x<w; x+=4){\
+ asm volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ LOAD4("(%[cur],%[mrefs])", %%mm0) /* c = cur[x-refs] */\
+ LOAD4("(%[cur],%[prefs])", %%mm1) /* e = cur[x+refs] */\
+ LOAD4("(%["prev2"])", %%mm2) /* prev2[x] */\
+ LOAD4("(%["next2"])", %%mm3) /* next2[x] */\
+ "movq %%mm3, %%mm4 \n\t"\
+ "paddw %%mm2, %%mm3 \n\t"\
+ "psraw $1, %%mm3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
+ "movq %%mm0, %[tmp0] \n\t" /* c */\
+ "movq %%mm3, %[tmp1] \n\t" /* d */\
+ "movq %%mm1, %[tmp2] \n\t" /* e */\
+ "psubw %%mm4, %%mm2 \n\t"\
+ PABS( %%mm4, %%mm2) /* temporal_diff0 */\
+ LOAD4("(%[prev],%[mrefs])", %%mm3) /* prev[x-refs] */\
+ LOAD4("(%[prev],%[prefs])", %%mm4) /* prev[x+refs] */\
+ "psubw %%mm0, %%mm3 \n\t"\
+ "psubw %%mm1, %%mm4 \n\t"\
+ PABS( %%mm5, %%mm3)\
+ PABS( %%mm5, %%mm4)\
+ "paddw %%mm4, %%mm3 \n\t" /* temporal_diff1 */\
+ "psrlw $1, %%mm2 \n\t"\
+ "psrlw $1, %%mm3 \n\t"\
+ "pmaxsw %%mm3, %%mm2 \n\t"\
+ LOAD4("(%[next],%[mrefs])", %%mm3) /* next[x-refs] */\
+ LOAD4("(%[next],%[prefs])", %%mm4) /* next[x+refs] */\
+ "psubw %%mm0, %%mm3 \n\t"\
+ "psubw %%mm1, %%mm4 \n\t"\
+ PABS( %%mm5, %%mm3)\
+ PABS( %%mm5, %%mm4)\
+ "paddw %%mm4, %%mm3 \n\t" /* temporal_diff2 */\
+ "psrlw $1, %%mm3 \n\t"\
+ "pmaxsw %%mm3, %%mm2 \n\t"\
+ "movq %%mm2, %[tmp3] \n\t" /* diff */\
+\
+ "paddw %%mm0, %%mm1 \n\t"\
+ "paddw %%mm0, %%mm0 \n\t"\
+ "psubw %%mm1, %%mm0 \n\t"\
+ "psrlw $1, %%mm1 \n\t" /* spatial_pred */\
+ PABS( %%mm2, %%mm0) /* ABS(c-e) */\
+\
+ "movq -1(%[cur],%[mrefs]), %%mm2 \n\t" /* cur[x-refs-1] */\
+ "movq -1(%[cur],%[prefs]), %%mm3 \n\t" /* cur[x+refs-1] */\
+ "movq %%mm2, %%mm4 \n\t"\
+ "psubusb %%mm3, %%mm2 \n\t"\
+ "psubusb %%mm4, %%mm3 \n\t"\
+ "pmaxub %%mm3, %%mm2 \n\t"\
+ /*"pshufw $9,%%mm2, %%mm3 \n\t"*/\
+ "movq %%mm2, %%mm3 \n\t" /* replace for "pshufw $9,%%mm2, %%mm3" - Fizick */\
+ "psrlq $16, %%mm3 \n\t"/* replace for "pshufw $9,%%mm2, %%mm3" - Fizick*/\
+ "punpcklbw %%mm7, %%mm2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
+ "punpcklbw %%mm7, %%mm3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm3, %%mm0 \n\t"\
+ "psubw %[pw1], %%mm0 \n\t" /* spatial_score */\
+\
+ CHECK(-2,0)\
+ CHECK1\
+ CHECK(-3,1)\
+ CHECK2\
+ CHECK(0,-2)\
+ CHECK1\
+ CHECK(1,-3)\
+ CHECK2\
+\
+ /* if(p->mode<2) ... */\
+ "movq %[tmp3], %%mm6 \n\t" /* diff */\
+ "cmp $2, %[mode] \n\t"\
+ "jge 1f \n\t"\
+ LOAD4("(%["prev2"],%[mrefs],2)", %%mm2) /* prev2[x-2*refs] */\
+ LOAD4("(%["next2"],%[mrefs],2)", %%mm4) /* next2[x-2*refs] */\
+ LOAD4("(%["prev2"],%[prefs],2)", %%mm3) /* prev2[x+2*refs] */\
+ LOAD4("(%["next2"],%[prefs],2)", %%mm5) /* next2[x+2*refs] */\
+ "paddw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm3 \n\t"\
+ "psrlw $1, %%mm2 \n\t" /* b */\
+ "psrlw $1, %%mm3 \n\t" /* f */\
+ "movq %[tmp0], %%mm4 \n\t" /* c */\
+ "movq %[tmp1], %%mm5 \n\t" /* d */\
+ "movq %[tmp2], %%mm7 \n\t" /* e */\
+ "psubw %%mm4, %%mm2 \n\t" /* b-c */\
+ "psubw %%mm7, %%mm3 \n\t" /* f-e */\
+ "movq %%mm5, %%mm0 \n\t"\
+ "psubw %%mm4, %%mm5 \n\t" /* d-c */\
+ "psubw %%mm7, %%mm0 \n\t" /* d-e */\
+ "movq %%mm2, %%mm4 \n\t"\
+ "pminsw %%mm3, %%mm2 \n\t"\
+ "pmaxsw %%mm4, %%mm3 \n\t"\
+ "pmaxsw %%mm5, %%mm2 \n\t"\
+ "pminsw %%mm5, %%mm3 \n\t"\
+ "pmaxsw %%mm0, %%mm2 \n\t" /* max */\
+ "pminsw %%mm0, %%mm3 \n\t" /* min */\
+ "pxor %%mm4, %%mm4 \n\t"\
+ "pmaxsw %%mm3, %%mm6 \n\t"\
+ "psubw %%mm2, %%mm4 \n\t" /* -max */\
+ "pmaxsw %%mm4, %%mm6 \n\t" /* diff= MAX3(diff, min, -max); */\
+ "1: \n\t"\
+\
+ "movq %[tmp1], %%mm2 \n\t" /* d */\
+ "movq %%mm2, %%mm3 \n\t"\
+ "psubw %%mm6, %%mm2 \n\t" /* d-diff */\
+ "paddw %%mm6, %%mm3 \n\t" /* d+diff */\
+ "pmaxsw %%mm2, %%mm1 \n\t"\
+ "pminsw %%mm3, %%mm1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
+ "packuswb %%mm1, %%mm1 \n\t"\
+\
+ :[tmp0]"=m"(tmp0),\
+ [tmp1]"=m"(tmp1),\
+ [tmp2]"=m"(tmp2),\
+ [tmp3]"=m"(tmp3)\
+ :[prev] "r"(prev),\
+ [cur] "r"(cur),\
+ [next] "r"(next),\
+ [prefs]"r"((long)refs),\
+ [mrefs]"r"((long)-refs),\
+ [pw1] "m"(pw_1),\
+ [pb1] "m"(pb_1),\
+ [mode] "g"(mode)\
+ );\
+ asm volatile("movd %%mm1, %0" :"=m"(*dst));\
+ dst += 4;\
+ prev+= 4;\
+ cur += 4;\
+ next+= 4;\
+ }
+
+ if(parity){
+#define prev2 "prev"
+#define next2 "cur"
+ FILTER
+#undef prev2
+#undef next2
+ }else{
+#define prev2 "cur"
+#define next2 "next"
+ FILTER
+#undef prev2
+#undef next2
+ }
+}
+#undef LOAD4
+#undef PABS
+#undef CHECK
+#undef CHECK1
+#undef CHECK2
+#undef FILTER
+
+#ifndef attribute_align_arg
+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
+# define attribute_align_arg __attribute__((force_align_arg_pointer))
+#else
+# define attribute_align_arg
+#endif
+#endif
+
+// for proper alignment SSE2 we need in GCC 4.2 and above
+#if (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
+
+#ifndef DECLARE_ALIGNED
+#define DECLARE_ALIGNED(n,t,v) t v __attribute__ ((aligned (n)))
+#endif
+
+// ================= SSE2 =================
+#define PABS(tmp,dst) \
+ "pxor "#tmp", "#tmp" \n\t"\
+ "psubw "#dst", "#tmp" \n\t"\
+ "pmaxsw "#tmp", "#dst" \n\t"
+
+#define FILTER_LINE_FUNC_NAME filter_line_sse2
+#include "vf_yadif_template.h"
+
+// ================ SSSE3 =================
+#define PABS(tmp,dst) \
+ "pabsw "#dst", "#dst" \n\t"
+
+#define FILTER_LINE_FUNC_NAME filter_line_ssse3
+#include "vf_yadif_template.h"
+
+#endif
+
+#endif
+
+static void filter_line_c(int mode, uint8_t *dst, const uint8_t *prev, const uint8_t *cur, const uint8_t *next, int w, int refs, int parity){
+ int x;
+ const uint8_t *prev2= parity ? prev : cur ;
+ const uint8_t *next2= parity ? cur : next;
+ for(x=0; x<w; x++){
+ int c= cur[-refs];
+ int d= (prev2[0] + next2[0])>>1;
+ int e= cur[+refs];
+ int temporal_diff0= ABS(prev2[0] - next2[0]);
+ int temporal_diff1=( ABS(prev[-refs] - c) + ABS(prev[+refs] - e) )>>1;
+ int temporal_diff2=( ABS(next[-refs] - c) + ABS(next[+refs] - e) )>>1;
+ int diff= MAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
+ int spatial_pred= (c+e)>>1;
+ int spatial_score= ABS(cur[-refs-1] - cur[+refs-1]) + ABS(c-e)
+ + ABS(cur[-refs+1] - cur[+refs+1]) - 1;
+
+#define CHECK(j)\
+ { int score= ABS(cur[-refs-1+ j] - cur[+refs-1- j])\
+ + ABS(cur[-refs + j] - cur[+refs - j])\
+ + ABS(cur[-refs+1+ j] - cur[+refs+1- j]);\
+ if(score < spatial_score){\
+ spatial_score= score;\
+ spatial_pred= (cur[-refs + j] + cur[+refs - j])>>1;\
+
+ CHECK(-1) CHECK(-2) }} }}
+ CHECK( 1) CHECK( 2) }} }}
+
+ if(mode<2){
+ int b= (prev2[-2*refs] + next2[-2*refs])>>1;
+ int f= (prev2[+2*refs] + next2[+2*refs])>>1;
+#if 0
+ int a= cur[-3*refs];
+ int g= cur[+3*refs];
+ int max= MAX3(d-e, d-c, MIN3(MAX(b-c,f-e),MAX(b-c,b-a),MAX(f-g,f-e)) );
+ int min= MIN3(d-e, d-c, MAX3(MIN(b-c,f-e),MIN(b-c,b-a),MIN(f-g,f-e)) );
+#else
+ int max= MAX3(d-e, d-c, MIN(b-c, f-e));
+ int min= MIN3(d-e, d-c, MAX(b-c, f-e));
+#endif
+
+ diff= MAX3(diff, min, -max);
+ }
+
+ if(spatial_pred > d + diff)
+ spatial_pred = d + diff;
+ else if(spatial_pred < d - diff)
+ spatial_pred = d - diff;
+
+ dst[0] = spatial_pred;
+
+ dst++;
+ cur++;
+ prev++;
+ next++;
+ prev2++;
+ next2++;
+ }
+}
+
+static void interpolate(uint8_t *dst, const uint8_t *cur0, const uint8_t *cur2, int w)
+{
+ int x;
+ for (x=0; x<w; x++) {
+ dst[x] = (cur0[x] + cur2[x] + 1)>>1; // simple average
+ }
+}
+
+void filter_plane(int mode, uint8_t *dst, int dst_stride, const uint8_t *prev0, const uint8_t *cur0, const uint8_t *next0, int refs, int w, int h, int parity, int tff, int cpu){
+
+ int y;
+ filter_line = filter_line_c;
+#ifdef __GNUC__
+#if (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
+ if (cpu & AVS_CPU_SSSE3)
+ filter_line = filter_line_ssse3;
+ else if (cpu & AVS_CPU_SSE2)
+ filter_line = filter_line_sse2;
+ else
+#endif
+ if (cpu & AVS_CPU_INTEGER_SSE)
+ filter_line = filter_line_mmx2;
+#endif
+ y=0;
+ if(((y ^ parity) & 1)){
+ memcpy(dst, cur0 + refs, w);// duplicate 1
+ }else{
+ memcpy(dst, cur0, w);
+ }
+ y=1;
+ if(((y ^ parity) & 1)){
+ interpolate(dst + dst_stride, cur0, cur0 + refs*2, w); // interpolate 0 and 2
+ }else{
+ memcpy(dst + dst_stride, cur0 + refs, w); // copy original
+ }
+ for(y=2; y<h-2; y++){
+ if(((y ^ parity) & 1)){
+ const uint8_t *prev= prev0 + y*refs;
+ const uint8_t *cur = cur0 + y*refs;
+ const uint8_t *next= next0 + y*refs;
+ uint8_t *dst2= dst + y*dst_stride;
+ filter_line(mode, dst2, prev, cur, next, w, refs, (parity ^ tff));
+ }else{
+ memcpy(dst + y*dst_stride, cur0 + y*refs, w); // copy original
+ }
+ }
+ y=h-2;
+ if(((y ^ parity) & 1)){
+ interpolate(dst + (h-2)*dst_stride, cur0 + (h-3)*refs, cur0 + (h-1)*refs, w); // interpolate h-3 and h-1
+ }else{
+ memcpy(dst + (h-2)*dst_stride, cur0 + (h-2)*refs, w); // copy original
+ }
+ y=h-1;
+ if(((y ^ parity) & 1)){
+ memcpy(dst + (h-1)*dst_stride, cur0 + (h-2)*refs, w); // duplicate h-2
+ }else{
+ memcpy(dst + (h-1)*dst_stride, cur0 + (h-1)*refs, w); // copy original
+ }
+
+#ifdef __GNUC__
+ if (cpu >= AVS_CPU_INTEGER_SSE)
+ asm volatile("emms");
+#endif
+}
+
+#ifdef __GNUC__
+#ifndef PIC
+static attribute_align_arg void YUY2ToPlanes_mmx(const unsigned char *srcYUY2, int pitch_yuy2, int width, int height,
+ unsigned char *py, int pitch_y,
+ unsigned char *pu, unsigned char *pv, int pitch_uv)
+{ /* process by 16 bytes (8 pixels), so width is assumed mod 8 */
+ int widthdiv2 = width>>1;
+// static unsigned __int64 Ymask = 0x00FF00FF00FF00FFULL;
+ int h;
+ for (h=0; h<height; h++)
+ {
+ asm (\
+ "pcmpeqb %%mm5, %%mm5 \n\t" /* prepare Ymask FFFFFFFFFFFFFFFF */\
+ "psrlw $8, %%mm5 \n\t" /* Ymask = 00FF00FF00FF00FF */\
+ "xor %%eax, %%eax \n\t"\
+ "xloop%= : \n\t"\
+ "prefetchnta 0xc0(%%edi,%%eax,4) \n\t"\
+ "movq (%%edi,%%eax,4), %%mm0 \n\t" /* src VYUYVYUY - 1 */\
+ "movq 8(%%edi,%%eax,4), %%mm1 \n\t" /* src VYUYVYUY - 2 */\
+ "movq %%mm0, %%mm2 \n\t" /* VYUYVYUY - 1 */\
+ "movq %%mm1, %%mm3 \n\t" /* VYUYVYUY - 2 */\
+ "pand %%mm5, %%mm0 \n\t" /* 0Y0Y0Y0Y - 1 */\
+ "psrlw $8, %%mm2 \n\t" /* 0V0U0V0U - 1 */\
+ "pand %%mm5, %%mm1 \n\t" /* 0Y0Y0Y0Y - 2 */\
+ "psrlw $8, %%mm3 \n\t" /* 0V0U0V0U - 2 */\
+ "packuswb %%mm1, %%mm0 \n\t" /* YYYYYYYY */\
+ "packuswb %%mm3, %%mm2 \n\t" /* VUVUVUVU */\
+ "movntq %%mm0, (%%ebx,%%eax,2) \n\t" /* store y */\
+ "movq %%mm2, %%mm4 \n\t" /* VUVUVUVU */\
+ "pand %%mm5, %%mm2 \n\t" /* 0U0U0U0U */\
+ "psrlw $8, %%mm4 \n\t" /* 0V0V0V0V */\
+ "packuswb %%mm2, %%mm2 \n\t" /* xxxxUUUU */\
+ "packuswb %%mm4, %%mm4 \n\t" /* xxxxVVVV */\
+ "movd %%mm2, (%%edx,%%eax) \n\t" /* store u */\
+ "add $4, %%eax \n\t" \
+ "cmp %%ecx, %%eax \n\t" \
+ "movd %%mm4, -4(%%esi,%%eax) \n\t" /* store v */\
+ "jl xloop%= \n\t"\
+ : : "D"(srcYUY2), "b"(py), "d"(pu), "S"(pv), "c"(widthdiv2) : "%eax");
+
+ srcYUY2 += pitch_yuy2;
+ py += pitch_y;
+ pu += pitch_uv;
+ pv += pitch_uv;
+ }
+ asm ("sfence \n\t emms");
+}
+
+static attribute_align_arg void YUY2FromPlanes_mmx(unsigned char *dstYUY2, int pitch_yuy2, int width, int height,
+ const unsigned char *py, int pitch_y,
+ const unsigned char *pu, const unsigned char *pv, int pitch_uv)
+{
+ int widthdiv2 = width >> 1;
+ int h;
+ for (h=0; h<height; h++)
+ {
+ asm (\
+ "xor %%eax, %%eax \n\t"\
+ "xloop%=: \n\t"\
+ "movd (%%edx,%%eax), %%mm1 \n\t" /* 0000UUUU */\
+ "movd (%%esi,%%eax), %%mm2 \n\t" /* 0000VVVV */\
+ "movq (%%ebx,%%eax,2), %%mm0 \n\t" /* YYYYYYYY */\
+ "punpcklbw %%mm2,%%mm1 \n\t" /* VUVUVUVU */\
+ "movq %%mm0, %%mm3 \n\t" /* YYYYYYYY */\
+ "punpcklbw %%mm1, %%mm0 \n\t" /* VYUYVYUY */\
+ "add $4, %%eax \n\t"\
+ "punpckhbw %%mm1, %%mm3 \n\t" /* VYUYVYUY */\
+ "movntq %%mm0, -16(%%edi,%%eax,4) \n\t" /*store */\
+ "movntq %%mm3, -8(%%edi,%%eax,4) \n\t" /* store */\
+ "cmp %%ecx, %%eax \n\t"\
+ "jl xloop%= \n\t"\
+ : : "b"(py), "d"(pu), "S"(pv), "D"(dstYUY2), "c"(widthdiv2) : "%eax");
+ py += pitch_y;
+ pu += pitch_uv;
+ pv += pitch_uv;
+ dstYUY2 += pitch_yuy2;
+ }
+ asm ("sfence \n\t emms");
+}
+#endif
+#endif
+
+//----------------------------------------------------------------------------------------------
+
+void YUY2ToPlanes(const unsigned char *pSrcYUY2, int nSrcPitchYUY2, int nWidth, int nHeight,
+ unsigned char * pSrcY, int srcPitchY,
+ unsigned char * pSrcU, unsigned char * pSrcV, int srcPitchUV, int cpu)
+{
+
+ int h,w;
+ int w0 = 0;
+#if defined(__GNUC__) && !defined(PIC)
+ if (cpu & AVS_CPU_INTEGER_SSE) {
+ w0 = (nWidth/8)*8;
+ YUY2ToPlanes_mmx(pSrcYUY2, nSrcPitchYUY2, w0, nHeight, pSrcY, srcPitchY, pSrcU, pSrcV, srcPitchUV);
+ }
+#endif
+ for (h=0; h<nHeight; h++)
+ {
+ for (w=w0; w<nWidth; w+=2)
+ {
+ int w2 = w+w;
+ pSrcY[w] = pSrcYUY2[w2];
+ pSrcY[w+1] = pSrcYUY2[w2+2];
+ pSrcU[(w>>1)] = pSrcYUY2[w2+1];
+ pSrcV[(w>>1)] = pSrcYUY2[w2+3];
+ }
+ pSrcY += srcPitchY;
+ pSrcU += srcPitchUV;
+ pSrcV += srcPitchUV;
+ pSrcYUY2 += nSrcPitchYUY2;
+ }
+}
+
+//----------------------------------------------------------------------------------------------
+
+void YUY2FromPlanes(unsigned char *pSrcYUY2, int nSrcPitchYUY2, int nWidth, int nHeight,
+ const unsigned char * pSrcY, int srcPitchY,
+ const unsigned char * pSrcU, const unsigned char * pSrcV, int srcPitchUV, int cpu)
+{
+ int h,w;
+ int w0 = 0;
+#if defined(__GNUC__) && !defined(PIC)
+ if (cpu & AVS_CPU_INTEGER_SSE) {
+ w0 = (nWidth/8)*8;
+ YUY2FromPlanes_mmx(pSrcYUY2, nSrcPitchYUY2, w0, nHeight, pSrcY, srcPitchY, pSrcU, pSrcV, srcPitchUV);
+ }
+#endif
+ for (h=0; h<nHeight; h++)
+ {
+ for (w=w0; w<nWidth; w+=2)
+ {
+ int w2 = w+w;
+ pSrcYUY2[w2] = pSrcY[w];
+ pSrcYUY2[w2+1] = pSrcU[(w>>1)];
+ pSrcYUY2[w2+2] = pSrcY[w+1];
+ pSrcYUY2[w2+3] = pSrcV[(w>>1)];
+ }
+ pSrcY += srcPitchY;
+ pSrcU += srcPitchUV;
+ pSrcV += srcPitchUV;
+ pSrcYUY2 += nSrcPitchYUY2;
+ }
+}
--- /dev/null
+/*
+ Yadif C-plugin for Avisynth 2.5 - Yet Another DeInterlacing Filter
+ Copyright (C)2007 Alexander G. Balakhnin aka Fizick http://avisynth.org.ru
+ Port of YADIF filter from MPlayer
+ Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Avisynth_C plugin
+ Assembler optimized for GNU C compiler
+
+*/
+
+#ifndef YADIF_H_
+#define YADIF_H_
+
+#include <stdint.h>
+
+#define AVS_CPU_INTEGER_SSE 0x1
+#define AVS_CPU_SSE2 0x2
+#define AVS_CPU_SSSE3 0x4
+
+typedef struct yadif_filter {
+ int cpu; // optimization
+ int yheight;
+ int ypitch;
+ int uvpitch;
+ int ywidth;
+ int uvwidth;
+ unsigned char *ysrc;
+ unsigned char *usrc;
+ unsigned char *vsrc;
+ unsigned char *yprev;
+ unsigned char *uprev;
+ unsigned char *vprev;
+ unsigned char *ynext;
+ unsigned char *unext;
+ unsigned char *vnext;
+ unsigned char *ydest;
+ unsigned char *udest;
+ unsigned char *vdest;
+} yadif_filter;
+
+void filter_plane(int mode, uint8_t *dst, int dst_stride, const uint8_t *prev0, const uint8_t *cur0, const uint8_t *next0, int refs, int w, int h, int parity, int tff, int cpu);
+void YUY2ToPlanes(const unsigned char *pSrcYUY2, int nSrcPitchYUY2, int nWidth, int nHeight,
+ unsigned char * pSrcY, int srcPitchY,
+ unsigned char * pSrcU, unsigned char * pSrcV, int srcPitchUV, int cpu);
+void YUY2FromPlanes(unsigned char *pSrcYUY2, int nSrcPitchYUY2, int nWidth, int nHeight,
+ const unsigned char * pSrcY, int srcPitchY,
+ const unsigned char * pSrcU, const unsigned char * pSrcV, int srcPitchUV, int cpu);
+
+#endif