1 /*****************************************************************************
2 * algo_phosphor.c : Phosphor algorithm for the VLC deinterlacer
3 *****************************************************************************
4 * Copyright (C) 2011 the VideoLAN team
7 * Author: Juha Jeronen <juha.jeronen@jyu.fi>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #ifdef CAN_COMPILE_MMXEXT
35 #include <vlc_common.h>
37 #include <vlc_picture.h>
38 #include <vlc_filter.h>
40 #include "deinterlace.h" /* filter_sys_t */
41 #include "helpers.h" /* ComposeFrame() */
43 #include "algo_phosphor.h"
45 /*****************************************************************************
47 *****************************************************************************/
50 * Internal helper function: dims (darkens) the given field
51 * of the given picture.
53 * This is used for simulating CRT light output decay in RenderPhosphor().
55 * The strength "1" is recommended. It's a matter of taste,
56 * so it's parametrized.
58 * Note on chroma formats:
59 * - If input is 4:2:2, all planes are processed.
60 * - If input is 4:2:0, only the luma plane is processed, because both fields
61 * have the same chroma. This will distort colours, especially for high
62 * filter strengths, especially for pixels whose U and/or V values are
63 * far away from the origin (which is at 128 in uint8 format).
65 * @param p_dst Input/output picture. Will be modified in-place.
66 * @param i_field Darken which field? 0 = top, 1 = bottom.
67 * @param i_strength Strength of effect: 1, 2 or 3 (division by 2, 4 or 8).
68 * @see RenderPhosphor()
71 static void DarkenField( picture_t *p_dst, const int i_field,
72 const int i_strength )
74 assert( p_dst != NULL );
75 assert( i_field == 0 || i_field == 1 );
76 assert( i_strength >= 1 && i_strength <= 3 );
78 unsigned u_cpu = vlc_CPU();
80 /* Bitwise ANDing with this clears the i_strength highest bits
82 #ifdef CAN_COMPILE_MMXEXT
83 uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know
86 const uint8_t remove_high_u8 = 0xFF >> i_strength;
87 const uint64_t remove_high_u64 = remove_high_u8 *
88 INT64_C(0x0101010101010101);
92 For luma, the operation is just a shift + bitwise AND, so we vectorize
93 even in the C version.
95 There is an MMX version, too, because it performs about twice faster.
97 int i_plane = Y_PLANE;
98 uint8_t *p_out, *p_out_end;
99 int w = p_dst->p[i_plane].i_visible_pitch;
100 p_out = p_dst->p[i_plane].p_pixels;
101 p_out_end = p_out + p_dst->p[i_plane].i_pitch
102 * p_dst->p[i_plane].i_visible_lines;
104 /* skip first line for bottom field */
106 p_out += p_dst->p[i_plane].i_pitch;
108 int wm8 = w % 8; /* remainder */
109 int w8 = w - wm8; /* part of width that is divisible by 8 */
110 for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
112 uint64_t *po = (uint64_t *)p_out;
113 #ifdef CAN_COMPILE_MMXEXT
114 if( u_cpu & CPU_CAPABILITY_MMXEXT )
116 movq_m2r( i_strength_u64, mm1 );
117 movq_m2r( remove_high_u64, mm2 );
118 for( int x = 0 ; x < w8; x += 8 )
120 movq_m2r( (*po), mm0 );
122 psrlq_r2r( mm1, mm0 );
123 pand_r2r( mm2, mm0 );
125 movq_r2m( mm0, (*po++) );
131 for( int x = 0 ; x < w8; x += 8, ++po )
132 (*po) = ( ((*po) >> i_strength) & remove_high_u64 );
133 #ifdef CAN_COMPILE_MMXEXT
136 /* handle the width remainder */
139 uint8_t *po_temp = (uint8_t *)po;
140 for( int x = 0 ; x < wm8; ++x, ++po_temp )
141 (*po_temp) = ( ((*po_temp) >> i_strength) & remove_high_u8 );
145 /* Process chroma if the field chromas are independent.
147 The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
148 The chroma processing is a bit more complicated than luma,
149 and needs MMX for vectorization.
151 if( p_dst->format.i_chroma == VLC_CODEC_I422 ||
152 p_dst->format.i_chroma == VLC_CODEC_J422 )
154 for( i_plane = 0 ; i_plane < p_dst->i_planes ; i_plane++ )
156 if( i_plane == Y_PLANE )
157 continue; /* luma already handled */
159 int w = p_dst->p[i_plane].i_visible_pitch;
160 #ifdef CAN_COMPILE_MMXEXT
161 int wm8 = w % 8; /* remainder */
162 int w8 = w - wm8; /* part of width that is divisible by 8 */
164 p_out = p_dst->p[i_plane].p_pixels;
165 p_out_end = p_out + p_dst->p[i_plane].i_pitch
166 * p_dst->p[i_plane].i_visible_lines;
168 /* skip first line for bottom field */
170 p_out += p_dst->p[i_plane].i_pitch;
172 for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
174 #ifdef CAN_COMPILE_MMXEXT
175 /* See also easy-to-read C version below. */
176 if( u_cpu & CPU_CAPABILITY_MMXEXT )
178 static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
179 movq_m2r( b128, mm5 );
180 movq_m2r( i_strength_u64, mm6 );
181 movq_m2r( remove_high_u64, mm7 );
183 uint64_t *po = (uint64_t *)p_out;
184 for( int x = 0 ; x < w8; x += 8 )
186 movq_m2r( (*po), mm0 );
188 movq_r2r( mm5, mm2 ); /* 128 */
189 movq_r2r( mm0, mm1 ); /* copy of data */
190 psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
191 psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
194 psrlq_r2r( mm6, mm1 );
195 psrlq_r2r( mm6, mm2 );
196 pand_r2r( mm7, mm1 );
197 pand_r2r( mm7, mm2 );
199 /* collect results from pos./neg. parts */
200 psubb_r2r( mm2, mm1 );
201 paddb_r2r( mm5, mm1 );
203 movq_r2m( mm1, (*po++) );
206 /* handle the width remainder */
209 /* The output is closer to 128 than the input;
210 the result always fits in uint8. */
211 uint8_t *po8 = (uint8_t *)po;
212 for( int x = 0 ; x < wm8; ++x, ++po8 )
213 (*po8) = 128 + ( ((*po8) - 128) /
220 /* 4:2:2 chroma handler, C version */
222 for( int x = 0 ; x < w; ++x, ++po )
223 (*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
224 #ifdef CAN_COMPILE_MMXEXT
228 } /* for i_plane... */
231 #ifdef CAN_COMPILE_MMXEXT
232 if( u_cpu & CPU_CAPABILITY_MMXEXT )
237 /*****************************************************************************
239 *****************************************************************************/
241 /* See header for function doc. */
242 int RenderPhosphor( filter_t *p_filter,
244 int i_order, int i_field )
246 assert( p_filter != NULL );
247 assert( p_dst != NULL );
248 assert( i_order >= 0 && i_order <= 2 ); /* 2 = soft field repeat */
249 assert( i_field == 0 || i_field == 1 );
251 filter_sys_t *p_sys = p_filter->p_sys;
253 /* Last two input frames */
254 picture_t *p_in = p_sys->pp_history[HISTORY_SIZE-1];
255 picture_t *p_old = p_sys->pp_history[HISTORY_SIZE-2];
257 /* Use the same input picture as "old" at the first frame after startup */
261 /* If the history mechanism has failed, we can't do anything. */
265 assert( p_old != NULL );
266 assert( p_in != NULL );
268 /* Decide sources for top & bottom fields of output. */
269 picture_t *p_in_top = p_in;
270 picture_t *p_in_bottom = p_in;
271 /* For the first output field this frame,
272 grab "old" field from previous frame. */
275 if( i_field == 0 ) /* rendering top field */
277 else /* i_field == 1, rendering bottom field */
281 compose_chroma_t cc = CC_ALTLINE; /* initialize to prevent compiler warning */
282 switch( p_sys->phosphor.i_chroma_for_420 )
290 else /* i_field == 1 */
291 cc = CC_SOURCE_BOTTOM;
300 /* The above are the only possibilities, if there are no bugs. */
305 ComposeFrame( p_filter, p_dst, p_in_top, p_in_bottom, cc );
307 /* Simulate phosphor light output decay for the old field.
309 The dimmer can also be switched off in the configuration, but that is
310 more of a technical curiosity or an educational toy for advanced users
311 than a useful deinterlacer mode (although it does make telecined
312 material look slightly better than without any filtering).
314 In most use cases the dimmer is used.
316 if( p_sys->phosphor.i_dimmer_strength > 0 )
317 DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength );