1 /*****************************************************************************
2 * grain.c: add film grain
3 *****************************************************************************
4 * Copyright (C) 2010 Laurent Aimar
7 * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
34 #include <vlc_common.h>
35 #include <vlc_plugin.h>
36 #include <vlc_filter.h>
41 /*****************************************************************************
43 *****************************************************************************/
44 static int Open (vlc_object_t *);
45 static void Close(vlc_object_t *);
47 #define BANK_SIZE (64)
49 #define CFG_PREFIX "grain-"
51 #define VARIANCE_MIN (0.0)
52 #define VARIANCE_MAX (10.0)
53 #define VARIANCE_TEXT N_("Variance")
54 #define VARIANCE_LONGTEXT N_("Variance of the gaussian noise")
57 #define PERIOD_MAX BANK_SIZE
58 #define PERIOD_MIN_TEXT N_("Minimal period")
59 #define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel")
60 #define PERIOD_MAX_TEXT N_("Maximal period")
61 #define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel")
64 set_description(N_("Grain video filter"))
65 set_shortname( N_("Grain"))
66 set_help(N_("Adds filtered gaussian noise"))
67 set_capability( "video filter2", 0 )
68 set_category(CAT_VIDEO)
69 set_subcategory(SUBCAT_VIDEO_VFILTER)
70 add_float_with_range(CFG_PREFIX "variance", 2.0, VARIANCE_MIN, VARIANCE_MAX,
71 NULL, VARIANCE_TEXT, VARIANCE_LONGTEXT, false)
72 add_integer_with_range(CFG_PREFIX "period-min", 1, PERIOD_MIN, PERIOD_MAX,
73 NULL, PERIOD_MIN_TEXT, PERIOD_MIN_LONGTEXT, false)
74 add_integer_with_range(CFG_PREFIX "period-max", 3*PERIOD_MAX/4, PERIOD_MIN, PERIOD_MAX,
75 NULL, PERIOD_MAX_TEXT, PERIOD_MAX_LONGTEXT, false)
76 set_callbacks(Open, Close)
79 /*****************************************************************************
81 *****************************************************************************/
83 #define BLEND_SIZE (8)
89 int16_t bank[BANK_SIZE * BANK_SIZE];
90 int16_t bank_y[BANK_SIZE * BANK_SIZE];
91 int16_t bank_uv[BANK_SIZE * BANK_SIZE];
93 void (*blend)(uint8_t *dst, size_t dst_pitch,
94 const uint8_t *src, size_t src_pitch,
95 const int16_t *noise);
104 /* Simple and *really fast* RNG (xorshift[13,17,5])*/
105 #define URAND_SEED (2463534242)
106 static uint32_t urand(uint32_t *seed)
114 /* Uniform random value between 0 and 1 */
115 static double drand(uint32_t *seed)
117 return urand(seed) / (double)UINT32_MAX;
119 /* Gaussian random value with a mean of 0 and a variance of 1 */
120 static void grand(double *r1, double *r2, uint32_t *seed)
125 u1 = 2 * drand(seed) - 1;
126 u2 = 2 * drand(seed) - 1;
127 s = u1 * u1 + u2 * u2;
130 s = sqrt(-2 * log(s) / s);
135 static void BlockBlend(uint8_t *dst, size_t dst_pitch,
136 const uint8_t *src, size_t src_pitch,
137 const int16_t *noise,
140 for (int y = 0; y < h; y++) {
141 for (int x = 0; x < w; x++) {
142 dst[y * dst_pitch + x] =
143 clip_uint8_vlc(src[y * src_pitch + x] + noise[y * BANK_SIZE +x]);
148 static void BlockBlendC(uint8_t *dst, size_t dst_pitch,
149 const uint8_t *src, size_t src_pitch,
150 const int16_t *noise)
152 BlockBlend(dst, dst_pitch, src, src_pitch, noise,
153 BLEND_SIZE, BLEND_SIZE);
156 #ifdef CAN_COMPILE_SSE2
157 #define _STRING(x) #x
158 #define STRING(x) _STRING(x)
159 static void BlockBlendSse2(uint8_t *dst, size_t dst_pitch,
160 const uint8_t *src, size_t src_pitch,
161 const int16_t *noise)
164 /* TODO It is possible to do the math on 8 bits using
165 * paddusb X and then psubusb -X.
167 asm volatile ("pxor %%xmm0, %%xmm0\n" : :);
168 for (int i = 0; i < 8/2; i++) {
170 "movq (%[src1]), %%xmm1\n"
171 "movq (%[src2]), %%xmm3\n"
172 "movdqu (%[noise]), %%xmm2\n"
173 "movdqu 2*"STRING(BANK_SIZE)"(%[noise]), %%xmm4\n"
175 "punpcklbw %%xmm0, %%xmm1\n"
176 "punpcklbw %%xmm0, %%xmm3\n"
178 "paddsw %%xmm2, %%xmm1\n"
179 "paddsw %%xmm4, %%xmm3\n"
180 "packuswb %%xmm1, %%xmm1\n"
181 "packuswb %%xmm3, %%xmm3\n"
182 "movq %%xmm1, (%[dst1])\n"
183 "movq %%xmm3, (%[dst2])\n"
184 : : [dst1]"r"(&dst[(2*i+0) * dst_pitch]),
185 [dst2]"r"(&dst[(2*i+1) * dst_pitch]),
186 [src1]"r"(&src[(2*i+0) * src_pitch]),
187 [src2]"r"(&src[(2*i+1) * src_pitch]),
188 [noise]"r"(&noise[2*i * BANK_SIZE])
192 # error "BLEND_SIZE unsupported"
195 static void Emms(void)
197 asm volatile ("emms");
202 * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits.
204 static void Scale(int16_t *dst, int16_t *src, int scale)
206 const int N = BANK_SIZE;
207 const int shift = 7 + 8;
209 for (int y = 0; y < N; y++) {
210 for (int x = 0; x < N; x++) {
211 const int v = src[y * N + x];
214 vq = ( v * scale + (1 << (shift-1)) - 1) >> shift;
216 vq = -((-v * scale + (1 << (shift-1)) - 1) >> shift);
222 static void PlaneFilter(filter_t *filter,
223 plane_t *dst, const plane_t *src,
224 int16_t *bank, uint32_t *seed)
226 filter_sys_t *sys = filter->p_sys;
228 for (int y = 0; y < dst->i_visible_lines; y += BLEND_SIZE) {
229 for (int x = 0; x < dst->i_visible_pitch; x += BLEND_SIZE) {
230 int bx = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
231 int by = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
232 const int16_t *noise = &bank[by * BANK_SIZE + bx];
234 int w = dst->i_visible_pitch - x;
235 int h = dst->i_visible_lines - y;
237 const uint8_t *srcp = &src->p_pixels[y * src->i_pitch + x];
238 uint8_t *dstp = &dst->p_pixels[y * dst->i_pitch + x];
240 if (w >= BLEND_SIZE && h >= BLEND_SIZE)
241 sys->blend(dstp, dst->i_pitch, srcp, src->i_pitch, noise);
243 BlockBlend(dstp, dst->i_pitch, srcp, src->i_pitch, noise, w, h);
250 static picture_t *Filter(filter_t *filter, picture_t *src)
252 filter_sys_t *sys = filter->p_sys;
254 picture_t *dst = filter_NewPicture(filter);
256 picture_Release(src);
260 vlc_mutex_lock(&sys->cfg.lock);
261 const double variance = __MIN(__MAX(sys->cfg.variance, VARIANCE_MIN), VARIANCE_MAX);
262 vlc_mutex_unlock(&sys->cfg.lock);
264 const int scale = 256 * sqrt(variance);
265 if (scale != sys->scale) {
267 Scale(sys->bank_y, sys->bank, sys->scale);
268 Scale(sys->bank_uv, sys->bank, sys->scale / 2);
271 for (int i = 0; i < dst->i_planes; i++) {
272 const plane_t *srcp = &src->p[i];
273 plane_t *dstp = &dst->p[i];
275 if (i == 0 || sys->is_uv_filtered) {
276 int16_t *bank = i == 0 ? sys->bank_y :
278 PlaneFilter(filter, dstp, srcp, bank, &sys->seed);
281 plane_CopyPixels(dstp, srcp);
285 picture_CopyProperties(dst, src);
286 picture_Release(src);
291 * Generate a filteried gaussian noise within [-127, 127] range.
293 static int Generate(int16_t *bank, int h_min, int h_max, int v_min, int v_max)
295 const int N = BANK_SIZE;
296 double *workspace = calloc(3 * N * N, sizeof(*workspace));
300 double *gn = &workspace[0 * N * N];
301 double *cij = &workspace[1 * N * N];
302 double *tmp = &workspace[2 * N * N];
304 /* Create a gaussian noise matrix */
305 assert((N % 2) == 0);
306 uint32_t seed = URAND_SEED;
307 for (int y = 0; y < N; y++) {
308 for (int x = 0; x < N/2; x++) {
309 grand(&gn[y * N + 2 * x + 0], &gn[y * N + 2 * x + 1], &seed);
313 /* Clear non selected frequency.
314 * Only the central band is kept */
316 for (int y = 0; y < N; y++) {
317 for (int x = 0; x < N; x++) {
318 if ((x < h_min && y < v_min) || x > h_max || y > v_max) {
324 const double correction = sqrt((double)N * N / (N * N - zero));
326 /* Filter the gaussian noise using an IDCT
327 * The algo is simple/stupid and does C * GN * Ct */
328 for (int i = 0; i < N; i++) {
329 for (int j = 0; j < N; j++) {
330 cij[i * N + j] = i == 0 ? sqrt(1.0f / N) :
331 sqrt(2.0f / N) * cos((2 * j + 1) * i * M_PI / 2 / N);
335 //mtime_t tmul_0 = mdate();
336 for (int i = 0; i < N; i++) {
337 for (int j = 0; j < N; j++) {
339 for (int k = 0; k < N; k++)
340 v += gn[i * N + k] * cij[k * N + j];
344 for (int i = 0; i < N; i++) {
345 for (int j = 0; j < N; j++) {
347 for (int k = 0; k < N; k++)
348 v += cij[k * N + i] * tmp[k * N + j];
349 /* Do not bias when rounding */
352 vq = (int)( v * correction * 127 + 0.5);
354 vq = -(int)(-v * correction * 127 + 0.5);
355 bank[i * N + j] = __MIN(__MAX(vq, INT16_MIN), INT16_MAX);
358 //mtime_t mul_duration = mdate() - tmul_0;
359 //fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000));
365 static int Callback(vlc_object_t *object, char const *cmd,
366 vlc_value_t oldval, vlc_value_t newval, void *data)
368 filter_t *filter = (filter_t *)object;
369 filter_sys_t *sys = filter->p_sys;
370 VLC_UNUSED(oldval); VLC_UNUSED(data);
372 vlc_mutex_lock(&sys->cfg.lock);
373 //if (!strcmp(cmd, CFG_PREFIX "variance"))
374 sys->cfg.variance = newval.f_float;
375 vlc_mutex_unlock(&sys->cfg.lock);
380 static int Open(vlc_object_t *object)
382 filter_t *filter = (filter_t *)object;
384 const vlc_chroma_description_t *chroma =
385 vlc_fourcc_GetChromaDescription(filter->fmt_in.video.i_chroma);
386 if (!chroma || chroma->plane_count < 3) {
387 msg_Err(filter, "Unsupported chroma (%4.4s)",
388 (char*)&(filter->fmt_in.video.i_chroma));
392 filter_sys_t *sys = malloc(sizeof(*sys));
395 sys->is_uv_filtered = true;
397 sys->seed = URAND_SEED;
399 int cutoff_low = BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-max");
400 int cutoff_high= BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-min");
401 cutoff_low = __MIN(__MAX(cutoff_low, 1), BANK_SIZE - 1);
402 cutoff_high = __MIN(__MAX(cutoff_high, 1), BANK_SIZE - 1);
403 if (Generate(sys->bank, cutoff_low, cutoff_high, cutoff_low, cutoff_high)) {
408 sys->blend = BlockBlendC;
410 #if defined(CAN_COMPILE_SSE2) && 1
411 if (vlc_CPU() & CPU_CAPABILITY_SSE2) {
412 sys->blend = BlockBlendSse2;
417 vlc_mutex_init(&sys->cfg.lock);
418 sys->cfg.variance = var_CreateGetFloatCommand(filter, CFG_PREFIX "variance");
419 var_AddCallback(filter, CFG_PREFIX "variance", Callback, NULL);
422 filter->pf_video_filter = Filter;
426 static void Close(vlc_object_t *object)
428 filter_t *filter = (filter_t *)object;
429 filter_sys_t *sys = filter->p_sys;
431 vlc_mutex_destroy(&sys->cfg.lock);