/*****************************************************************************
* algo_phosphor.c : Phosphor algorithm for the VLC deinterlacer
*****************************************************************************
- * Copyright (C) 2011 the VideoLAN team
+ * Copyright (C) 2011 VLC authors and VideoLAN
* $Id$
*
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifdef HAVE_CONFIG_H
* @see RenderPhosphor()
* @see ComposeFrame()
*/
-static void DarkenField( picture_t *p_dst, const int i_field,
- const int i_strength )
+static void DarkenField( picture_t *p_dst,
+ const int i_field, const int i_strength,
+ bool process_chroma )
{
assert( p_dst != NULL );
assert( i_field == 0 || i_field == 1 );
/* Bitwise ANDing with this clears the i_strength highest bits
of each byte */
-#ifdef CAN_COMPILE_MMXEXT
- unsigned u_cpu = vlc_CPU();
- uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know
- number of bits) */
-#endif
const uint8_t remove_high_u8 = 0xFF >> i_strength;
const uint64_t remove_high_u64 = remove_high_u8 *
INT64_C(0x0101010101010101);
For luma, the operation is just a shift + bitwise AND, so we vectorize
even in the C version.
- There is an MMX version, too, because it performs about twice faster.
+ There is an MMX version too, because it performs about twice faster.
*/
int i_plane = Y_PLANE;
uint8_t *p_out, *p_out_end;
uint64_t *po = (uint64_t *)p_out;
int x = 0;
-#ifdef CAN_COMPILE_MMXEXT
- if( u_cpu & CPU_CAPABILITY_MMXEXT )
+ for( ; x < w8; x += 8, ++po )
+ (*po) = ( ((*po) >> i_strength) & remove_high_u64 );
+
+ /* handle the width remainder */
+ uint8_t *po_temp = (uint8_t *)po;
+ for( ; x < w; ++x, ++po_temp )
+ (*po_temp) = ( ((*po_temp) >> i_strength) & remove_high_u8 );
+ }
+
+ /* Process chroma if the field chromas are independent.
+
+ The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
+ The chroma processing is a bit more complicated than luma,
+ and needs MMX for vectorization.
+ */
+ if( process_chroma )
+ {
+ for( i_plane++ /* luma already handled*/;
+ i_plane < p_dst->i_planes;
+ i_plane++ )
{
- movq_m2r( i_strength_u64, mm1 );
- movq_m2r( remove_high_u64, mm2 );
- for( ; x < w8; x += 8 )
+ int w = p_dst->p[i_plane].i_visible_pitch;
+ p_out = p_dst->p[i_plane].p_pixels;
+ p_out_end = p_out + p_dst->p[i_plane].i_pitch
+ * p_dst->p[i_plane].i_visible_lines;
+
+ /* skip first line for bottom field */
+ if( i_field == 1 )
+ p_out += p_dst->p[i_plane].i_pitch;
+
+ for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
{
- movq_m2r( (*po), mm0 );
+ /* Handle the width remainder */
+ uint8_t *po = p_out;
+ for( int x = 0; x < w; ++x, ++po )
+ (*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
+ } /* for p_out... */
+ } /* for i_plane... */
+ } /* if process_chroma */
+}
+
+#ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
+static void DarkenFieldMMX( picture_t *p_dst,
+ const int i_field, const int i_strength,
+ bool process_chroma )
+{
+ assert( p_dst != NULL );
+ assert( i_field == 0 || i_field == 1 );
+ assert( i_strength >= 1 && i_strength <= 3 );
- psrlq_r2r( mm1, mm0 );
- pand_r2r( mm2, mm0 );
+ uint64_t i_strength_u64 = i_strength; /* needs to know number of bits */
+ const uint8_t remove_high_u8 = 0xFF >> i_strength;
+ const uint64_t remove_high_u64 = remove_high_u8 *
+ INT64_C(0x0101010101010101);
- movq_r2m( mm0, (*po++) );
- }
- }
- else
+ int i_plane = Y_PLANE;
+ uint8_t *p_out, *p_out_end;
+ int w = p_dst->p[i_plane].i_visible_pitch;
+ p_out = p_dst->p[i_plane].p_pixels;
+ p_out_end = p_out + p_dst->p[i_plane].i_pitch
+ * p_dst->p[i_plane].i_visible_lines;
+
+ /* skip first line for bottom field */
+ if( i_field == 1 )
+ p_out += p_dst->p[i_plane].i_pitch;
+
+ int wm8 = w % 8; /* remainder */
+ int w8 = w - wm8; /* part of width that is divisible by 8 */
+ for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
+ {
+ uint64_t *po = (uint64_t *)p_out;
+ int x = 0;
+
+ movq_m2r( i_strength_u64, mm1 );
+ movq_m2r( remove_high_u64, mm2 );
+ for( ; x < w8; x += 8 )
{
-#endif
- for( ; x < w8; x += 8, ++po )
- (*po) = ( ((*po) >> i_strength) & remove_high_u64 );
-#ifdef CAN_COMPILE_MMXEXT
+ movq_m2r( (*po), mm0 );
+
+ psrlq_r2r( mm1, mm0 );
+ pand_r2r( mm2, mm0 );
+
+ movq_r2m( mm0, (*po++) );
}
-#endif
/* handle the width remainder */
uint8_t *po_temp = (uint8_t *)po;
The chroma processing is a bit more complicated than luma,
and needs MMX for vectorization.
*/
- if( p_dst->format.i_chroma == VLC_CODEC_I422 ||
- p_dst->format.i_chroma == VLC_CODEC_J422 )
+ if( process_chroma )
{
- for( i_plane = 0 ; i_plane < p_dst->i_planes ; i_plane++ )
+ for( i_plane++ /* luma already handled */;
+ i_plane < p_dst->i_planes;
+ i_plane++ )
{
- if( i_plane == Y_PLANE )
- continue; /* luma already handled */
-
int w = p_dst->p[i_plane].i_visible_pitch;
-#ifdef CAN_COMPILE_MMXEXT
int wm8 = w % 8; /* remainder */
int w8 = w - wm8; /* part of width that is divisible by 8 */
-#endif
+
p_out = p_dst->p[i_plane].p_pixels;
p_out_end = p_out + p_dst->p[i_plane].i_pitch
* p_dst->p[i_plane].i_visible_lines;
{
int x = 0;
-#ifdef CAN_COMPILE_MMXEXT
/* See also easy-to-read C version below. */
- if( u_cpu & CPU_CAPABILITY_MMXEXT )
+ static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
+ movq_m2r( b128, mm5 );
+ movq_m2r( i_strength_u64, mm6 );
+ movq_m2r( remove_high_u64, mm7 );
+
+ uint64_t *po8 = (uint64_t *)p_out;
+ for( ; x < w8; x += 8 )
{
- static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
- movq_m2r( b128, mm5 );
- movq_m2r( i_strength_u64, mm6 );
- movq_m2r( remove_high_u64, mm7 );
-
- uint64_t *po = (uint64_t *)p_out;
- for( ; x < w8; x += 8 )
- {
- movq_m2r( (*po), mm0 );
-
- movq_r2r( mm5, mm2 ); /* 128 */
- movq_r2r( mm0, mm1 ); /* copy of data */
- psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
- psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
-
- /* >> i_strength */
- psrlq_r2r( mm6, mm1 );
- psrlq_r2r( mm6, mm2 );
- pand_r2r( mm7, mm1 );
- pand_r2r( mm7, mm2 );
-
- /* collect results from pos./neg. parts */
- psubb_r2r( mm2, mm1 );
- paddb_r2r( mm5, mm1 );
-
- movq_r2m( mm1, (*po++) );
- }
+ movq_m2r( (*po8), mm0 );
+
+ movq_r2r( mm5, mm2 ); /* 128 */
+ movq_r2r( mm0, mm1 ); /* copy of data */
+ psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
+ psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
+
+ /* >> i_strength */
+ psrlq_r2r( mm6, mm1 );
+ psrlq_r2r( mm6, mm2 );
+ pand_r2r( mm7, mm1 );
+ pand_r2r( mm7, mm2 );
+
+ /* collect results from pos./neg. parts */
+ psubb_r2r( mm2, mm1 );
+ paddb_r2r( mm5, mm1 );
+
+ movq_r2m( mm1, (*po8++) );
}
-#endif
- /* C version - handle the width remainder
- (or everything if no MMX) */
+ /* C version - handle the width remainder */
uint8_t *po = p_out;
for( ; x < w; ++x, ++po )
(*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
} /* for p_out... */
} /* for i_plane... */
- } /* if b_i422 */
+ } /* if process_chroma */
-#ifdef CAN_COMPILE_MMXEXT
- if( u_cpu & CPU_CAPABILITY_MMXEXT )
- emms();
-#endif
+ emms();
}
+#endif
/*****************************************************************************
* Public functions
p_in_top = p_old;
}
- compose_chroma_t cc = CC_ALTLINE; /* initialize to prevent compiler warning */
- switch( p_sys->phosphor.i_chroma_for_420 )
+ compose_chroma_t cc = CC_ALTLINE;
+ if( 2 * p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den &&
+ 2 * p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den )
{
+ /* Only 420 like chroma */
+ switch( p_sys->phosphor.i_chroma_for_420 )
+ {
case PC_BLEND:
cc = CC_MERGE;
break;
/* The above are the only possibilities, if there are no bugs. */
assert(0);
break;
+ }
}
-
- ComposeFrame( p_filter, p_dst, p_in_top, p_in_bottom, cc );
+ ComposeFrame( p_filter, p_dst, p_in_top, p_in_bottom, cc, p_filter->fmt_in.video.i_chroma == VLC_CODEC_YV12 );
/* Simulate phosphor light output decay for the old field.
In most use cases the dimmer is used.
*/
if( p_sys->phosphor.i_dimmer_strength > 0 )
- DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength );
-
+ {
+#ifdef CAN_COMPILE_MMXEXT
+ if( vlc_CPU_MMXEXT() )
+ DarkenFieldMMX( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength,
+ p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den &&
+ p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den );
+ else
+#endif
+ DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength,
+ p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den &&
+ p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den );
+ }
return VLC_SUCCESS;
}