/*****************************************************************************
- * noise.c : "add grain to image" video filter
+ * grain.c: add film grain
*****************************************************************************
- * Copyright (C) 2000-2007 the VideoLAN team
+ * Copyright (C) 2010 Laurent Aimar
* $Id$
*
- * Authors: Antoine Cellerier <dionoea -at- videolan -dot- org>
+ * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
+#include <assert.h>
+#include <math.h>
#include <vlc_common.h>
#include <vlc_plugin.h>
-
#include <vlc_filter.h>
-#include "filter_picture.h"
+#include <vlc_cpu.h>
+
+#include <vlc_rand.h>
/*****************************************************************************
- * Local prototypes
+ * Module descriptor
*****************************************************************************/
-static int Create ( vlc_object_t * );
-static void Destroy ( vlc_object_t * );
+static int Open (vlc_object_t *);
+static void Close(vlc_object_t *);
+
+#define BANK_SIZE (64)
+
+#define CFG_PREFIX "grain-"
-static picture_t *Filter( filter_t *, picture_t * );
+#define VARIANCE_MIN (0.0)
+#define VARIANCE_MAX (10.0)
+#define VARIANCE_TEXT N_("Variance")
+#define VARIANCE_LONGTEXT N_("Variance of the gaussian noise")
-#define FILTER_PREFIX "grain-"
+#define PERIOD_MIN 1
+#define PERIOD_MAX BANK_SIZE
+#define PERIOD_MIN_TEXT N_("Minimal period")
+#define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel")
+#define PERIOD_MAX_TEXT N_("Maximal period")
+#define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel")
+
+vlc_module_begin()
+ set_description(N_("Grain video filter"))
+ set_shortname( N_("Grain"))
+ set_help(N_("Adds filtered gaussian noise"))
+ set_capability( "video filter2", 0 )
+ set_category(CAT_VIDEO)
+ set_subcategory(SUBCAT_VIDEO_VFILTER)
+ add_float_with_range(CFG_PREFIX "variance", 2.0, VARIANCE_MIN, VARIANCE_MAX,
+ NULL, VARIANCE_TEXT, VARIANCE_LONGTEXT, false)
+ add_integer_with_range(CFG_PREFIX "period-min", 1, PERIOD_MIN, PERIOD_MAX,
+ NULL, PERIOD_MIN_TEXT, PERIOD_MIN_LONGTEXT, false)
+ add_integer_with_range(CFG_PREFIX "period-max", 3*PERIOD_MAX/4, PERIOD_MIN, PERIOD_MAX,
+ NULL, PERIOD_MAX_TEXT, PERIOD_MAX_LONGTEXT, false)
+ set_callbacks(Open, Close)
+vlc_module_end()
-#define NOISE_HELP N_("add grain to image")
/*****************************************************************************
- * Module descriptor
+ * Local prototypes
*****************************************************************************/
-vlc_module_begin ()
- set_description( N_("Grain video filter") )
- set_shortname( N_( "Grain" ))
- set_help(NOISE_HELP)
- set_capability( "video filter2", 0 )
- set_category( CAT_VIDEO )
- set_subcategory( SUBCAT_VIDEO_VFILTER )
- set_callbacks( Create, Destroy )
-vlc_module_end ()
+#define BLEND_SIZE (8)
+struct filter_sys_t {
+ bool is_uv_filtered;
+ uint32_t seed;
+
+ int scale;
+ int16_t bank[BANK_SIZE * BANK_SIZE];
+ int16_t bank_y[BANK_SIZE * BANK_SIZE];
+ int16_t bank_uv[BANK_SIZE * BANK_SIZE];
+
+ void (*blend)(uint8_t *dst, size_t dst_pitch,
+ const uint8_t *src, size_t src_pitch,
+ const int16_t *noise);
+ void (*emms)(void);
+
+ struct {
+ vlc_mutex_t lock;
+ double variance;
+ } cfg;
+};
-struct filter_sys_t
+/* Simple and *really fast* RNG (xorshift[13,17,5])*/
+#define URAND_SEED (2463534242)
+static uint32_t urand(uint32_t *seed)
{
- int *p_noise;
-};
+ uint32_t s = *seed;
+ s ^= s << 13;
+ s ^= s >> 17;
+ s ^= s << 5;
+ return *seed = s;
+}
+/* Uniform random value between 0 and 1 */
+static double drand(uint32_t *seed)
+{
+ return urand(seed) / (double)UINT32_MAX;
+}
+/* Gaussian random value with a mean of 0 and a variance of 1 */
+static void grand(double *r1, double *r2, uint32_t *seed)
+{
+ double s;
+ double u1, u2;
+ do {
+ u1 = 2 * drand(seed) - 1;
+ u2 = 2 * drand(seed) - 1;
+ s = u1 * u1 + u2 * u2;
+ } while (s >= 1.0);
+
+ s = sqrt(-2 * log(s) / s);
+ *r1 = u1 * s;
+ *r2 = u2 * s;
+}
+
+static void BlockBlend(uint8_t *dst, size_t dst_pitch,
+ const uint8_t *src, size_t src_pitch,
+ const int16_t *noise,
+ int w, int h)
+{
+ for (int y = 0; y < h; y++) {
+ for (int x = 0; x < w; x++) {
+ dst[y * dst_pitch + x] =
+ clip_uint8_vlc(src[y * src_pitch + x] + noise[y * BANK_SIZE +x]);
+ }
+ }
+}
-static int Create( vlc_object_t *p_this )
+static void BlockBlendC(uint8_t *dst, size_t dst_pitch,
+ const uint8_t *src, size_t src_pitch,
+ const int16_t *noise)
{
- filter_t *p_filter = (filter_t *)p_this;
+ BlockBlend(dst, dst_pitch, src, src_pitch, noise,
+ BLEND_SIZE, BLEND_SIZE);
+}
- switch( p_filter->fmt_in.video.i_chroma )
- {
- CASE_PLANAR_YUV
- break;
+#ifdef CAN_COMPILE_SSE2
+#define _STRING(x) #x
+#define STRING(x) _STRING(x)
+static void BlockBlendSse2(uint8_t *dst, size_t dst_pitch,
+ const uint8_t *src, size_t src_pitch,
+ const int16_t *noise)
+{
+#if BLEND_SIZE == 8
+ /* TODO It is possible to do the math on 8 bits using
+ * paddusb X and then psubusb -X.
+ */
+ asm volatile ("pxor %%xmm0, %%xmm0\n" : :);
+ for (int i = 0; i < 8/2; i++) {
+ asm volatile (
+ "movq (%[src1]), %%xmm1\n"
+ "movq (%[src2]), %%xmm3\n"
+ "movdqu (%[noise]), %%xmm2\n"
+ "movdqu 2*"STRING(BANK_SIZE)"(%[noise]), %%xmm4\n"
+
+ "punpcklbw %%xmm0, %%xmm1\n"
+ "punpcklbw %%xmm0, %%xmm3\n"
+
+ "paddsw %%xmm2, %%xmm1\n"
+ "paddsw %%xmm4, %%xmm3\n"
+ "packuswb %%xmm1, %%xmm1\n"
+ "packuswb %%xmm3, %%xmm3\n"
+ "movq %%xmm1, (%[dst1])\n"
+ "movq %%xmm3, (%[dst2])\n"
+ : : [dst1]"r"(&dst[(2*i+0) * dst_pitch]),
+ [dst2]"r"(&dst[(2*i+1) * dst_pitch]),
+ [src1]"r"(&src[(2*i+0) * src_pitch]),
+ [src2]"r"(&src[(2*i+1) * src_pitch]),
+ [noise]"r"(&noise[2*i * BANK_SIZE])
+ : "memory");
+ }
+#else
+# error "BLEND_SIZE unsupported"
+#endif
+}
+static void Emms(void)
+{
+ asm volatile ("emms");
+}
+#endif
- default:
- msg_Err( p_filter, "Unsupported input chroma (%4s)",
- (char*)&(p_filter->fmt_in.video.i_chroma) );
- return VLC_EGENERIC;
+/**
+ * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits.
+ */
+static void Scale(int16_t *dst, int16_t *src, int scale)
+{
+ const int N = BANK_SIZE;
+ const int shift = 7 + 8;
+
+ for (int y = 0; y < N; y++) {
+ for (int x = 0; x < N; x++) {
+ const int v = src[y * N + x];
+ int vq;
+ if (v >= 0)
+ vq = ( v * scale + (1 << (shift-1)) - 1) >> shift;
+ else
+ vq = -((-v * scale + (1 << (shift-1)) - 1) >> shift);
+ dst[y * N + x] = vq;
+ }
}
+}
- /* Allocate structure */
- p_filter->p_sys = malloc( sizeof( filter_sys_t ) );
- if( p_filter->p_sys == NULL )
- return VLC_ENOMEM;
+static void PlaneFilter(filter_t *filter,
+ plane_t *dst, const plane_t *src,
+ int16_t *bank, uint32_t *seed)
+{
+ filter_sys_t *sys = filter->p_sys;
- p_filter->pf_video_filter = Filter;
+ for (int y = 0; y < dst->i_visible_lines; y += BLEND_SIZE) {
+ for (int x = 0; x < dst->i_visible_pitch; x += BLEND_SIZE) {
+ int bx = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
+ int by = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
+ const int16_t *noise = &bank[by * BANK_SIZE + bx];
- p_filter->p_sys->p_noise = NULL;
+ int w = dst->i_visible_pitch - x;
+ int h = dst->i_visible_lines - y;
- return VLC_SUCCESS;
-}
+ const uint8_t *srcp = &src->p_pixels[y * src->i_pitch + x];
+ uint8_t *dstp = &dst->p_pixels[y * dst->i_pitch + x];
-static void Destroy( vlc_object_t *p_this )
-{
- filter_t *p_filter = (filter_t *)p_this;
- free( p_filter->p_sys->p_noise );
- free( p_filter->p_sys );
+ if (w >= BLEND_SIZE && h >= BLEND_SIZE)
+ sys->blend(dstp, dst->i_pitch, srcp, src->i_pitch, noise);
+ else
+ BlockBlend(dstp, dst->i_pitch, srcp, src->i_pitch, noise, w, h);
+ }
+ }
+ if (sys->emms)
+ sys->emms();
}
-static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
+static picture_t *Filter(filter_t *filter, picture_t *src)
{
- picture_t *p_outpic;
- filter_sys_t *p_sys = p_filter->p_sys;
- int i_index;
+ filter_sys_t *sys = filter->p_sys;
- if( !p_pic ) return NULL;
-
- p_outpic = filter_NewPicture( p_filter );
- if( !p_outpic )
- {
- picture_Release( p_pic );
+ picture_t *dst = filter_NewPicture(filter);
+ if (!dst) {
+ picture_Release(src);
return NULL;
}
- {
- uint8_t *p_in = p_pic->p[Y_PLANE].p_pixels;
- uint8_t *p_out = p_outpic->p[Y_PLANE].p_pixels;
+ vlc_mutex_lock(&sys->cfg.lock);
+ const double variance = __MIN(__MAX(sys->cfg.variance, VARIANCE_MIN), VARIANCE_MAX);
+ vlc_mutex_unlock(&sys->cfg.lock);
- const int i_num_lines = p_pic->p[Y_PLANE].i_visible_lines;
- const int i_num_cols = p_pic->p[Y_PLANE].i_visible_pitch;
- const int i_pitch = p_pic->p[Y_PLANE].i_pitch;
+ const int scale = 256 * sqrt(variance);
+ if (scale != sys->scale) {
+ sys->scale = scale;
+ Scale(sys->bank_y, sys->bank, sys->scale);
+ Scale(sys->bank_uv, sys->bank, sys->scale / 2);
+ }
- int i_line, i_col;
+ for (int i = 0; i < dst->i_planes; i++) {
+ const plane_t *srcp = &src->p[i];
+ plane_t *dstp = &dst->p[i];
- int *p_noise = p_sys->p_noise;
- if( !p_noise )
- {
- p_noise = p_sys->p_noise =
- (int*)malloc(i_pitch*i_num_lines*sizeof(int));
+ if (i == 0 || sys->is_uv_filtered) {
+ int16_t *bank = i == 0 ? sys->bank_y :
+ sys->bank_uv;
+ PlaneFilter(filter, dstp, srcp, bank, &sys->seed);
}
-
- for( i_line = 0; i_line < i_num_lines; i_line++ )
- {
- for( i_col = 0; i_col < i_num_cols; i_col++ )
- {
- p_noise[i_line*i_pitch+i_col] = ((rand()&0x1f)-0x0f);
- }
+ else {
+ plane_CopyPixels(dstp, srcp);
}
+ }
- for( i_line = 2/*0*/ ; i_line < i_num_lines-2/**/; i_line++ )
- {
- for( i_col = 2/*0*/; i_col < i_num_cols/2; i_col++ )
- {
- p_out[i_line*i_pitch+i_col] = clip_uint8_vlc(
- p_in[i_line*i_pitch+i_col]
-#if 0
- + p_noise[i_line*i_pitch+i_col] );
-#else
-/* 2 rows up */
- + (( ( p_noise[(i_line-2)*i_pitch+i_col-2]<<1 )
- + ( p_noise[(i_line-2)*i_pitch+i_col-1]<<2 )
- + ( p_noise[(i_line-2)*i_pitch+i_col]<<2 )
- + ( p_noise[(i_line-2)*i_pitch+i_col+1]<<2 )
- + ( p_noise[(i_line-2)*i_pitch+i_col+2]<<1 )
- /* 1 row up */
- + ( p_noise[(i_line-1)*i_pitch+i_col-2]<<2 )
- + ( p_noise[(i_line-1)*i_pitch+i_col-1]<<3 )
- + ( p_noise[(i_line-1)*i_pitch+i_col]*12 )
- + ( p_noise[(i_line-1)*i_pitch+i_col+1]<<3 )
- + ( p_noise[(i_line-1)*i_pitch+i_col+2]<<2 )
- /* */
- + ( p_noise[i_line*i_pitch+i_col-2]<<2 )
- + ( p_noise[i_line*i_pitch+i_col-1]*12 )
- + ( p_noise[i_line*i_pitch+i_col]<<4 )
- + ( p_noise[i_line*i_pitch+i_col+1]*12 )
- + ( p_noise[i_line*i_pitch+i_col+2]<<2 )
- /* 1 row down */
- + ( p_noise[(i_line+1)*i_pitch+i_col-2]<<2 )
- + ( p_noise[(i_line+1)*i_pitch+i_col-1]<<3 )
- + ( p_noise[(i_line+1)*i_pitch+i_col]*12 )
- + ( p_noise[(i_line+1)*i_pitch+i_col+1]<<3 )
- + ( p_noise[(i_line+1)*i_pitch+i_col+2]<<2 )
- /* 2 rows down */
- + ( p_noise[(i_line+2)*i_pitch+i_col-2]<<1 )
- + ( p_noise[(i_line+2)*i_pitch+i_col-1]<<2 )
- + ( p_noise[(i_line+2)*i_pitch+i_col]<<2 )
- + ( p_noise[(i_line+2)*i_pitch+i_col+1]<<2 )
- + ( p_noise[(i_line+2)*i_pitch+i_col+2]<<1 )
- )>>7/*/152*/));
-#endif
+ picture_CopyProperties(dst, src);
+ picture_Release(src);
+ return dst;
+}
+
+/**
+ * Generate a filteried gaussian noise within [-127, 127] range.
+ */
+static int Generate(int16_t *bank, int h_min, int h_max, int v_min, int v_max)
+{
+ const int N = BANK_SIZE;
+ double *workspace = calloc(3 * N * N, sizeof(*workspace));
+ if (!workspace)
+ return VLC_ENOMEM;
+ double *gn = &workspace[0 * N * N];
+ double *cij = &workspace[1 * N * N];
+ double *tmp = &workspace[2 * N * N];
+
+ /* Create a gaussian noise matrix */
+ assert((N % 2) == 0);
+ uint32_t seed = URAND_SEED;
+ for (int y = 0; y < N; y++) {
+ for (int x = 0; x < N/2; x++) {
+ grand(&gn[y * N + 2 * x + 0], &gn[y * N + 2 * x + 1], &seed);
+ }
+ }
+
+ /* Clear non selected frequency.
+ * Only the central band is kept */
+ int zero = 0;
+ for (int y = 0; y < N; y++) {
+ for (int x = 0; x < N; x++) {
+ if ((x < h_min && y < v_min) || x > h_max || y > v_max) {
+ gn[y * N + x] = 0.0;
+ zero++;
}
- for( ; i_col < i_num_cols; i_col++ )
- p_out[i_line*i_pitch+i_col] = p_in[i_line*i_pitch+i_col];
}
}
+ const double correction = sqrt((double)N * N / (N * N - zero));
+
+ /* Filter the gaussian noise using an IDCT
+ * The algo is simple/stupid and does C * GN * Ct */
+ for (int i = 0; i < N; i++) {
+ for (int j = 0; j < N; j++) {
+ cij[i * N + j] = i == 0 ? sqrt(1.0f / N) :
+ sqrt(2.0f / N) * cos((2 * j + 1) * i * M_PI / 2 / N);
+ }
+ }
+
+ //mtime_t tmul_0 = mdate();
+ for (int i = 0; i < N; i++) {
+ for (int j = 0; j < N; j++) {
+ double v = 0.0;
+ for (int k = 0; k < N; k++)
+ v += gn[i * N + k] * cij[k * N + j];
+ tmp[i * N + j] = v;
+ }
+ }
+ for (int i = 0; i < N; i++) {
+ for (int j = 0; j < N; j++) {
+ double v = 0.0;
+ for (int k = 0; k < N; k++)
+ v += cij[k * N + i] * tmp[k * N + j];
+ /* Do not bias when rounding */
+ int vq;
+ if (v >= 0)
+ vq = (int)( v * correction * 127 + 0.5);
+ else
+ vq = -(int)(-v * correction * 127 + 0.5);
+ bank[i * N + j] = __MIN(__MAX(vq, INT16_MIN), INT16_MAX);
+ }
+ }
+ //mtime_t mul_duration = mdate() - tmul_0;
+ //fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000));
- for( i_index = 1; i_index < p_pic->i_planes; i_index++ )
- {
- uint8_t *p_in = p_pic->p[i_index].p_pixels;
- uint8_t *p_out = p_outpic->p[i_index].p_pixels;
+ free(workspace);
+ return VLC_SUCCESS;
+}
- const int i_lines = p_pic->p[i_index].i_lines;
- const int i_pitch = p_pic->p[i_index].i_pitch;
+static int Callback(vlc_object_t *object, char const *cmd,
+ vlc_value_t oldval, vlc_value_t newval, void *data)
+{
+ filter_t *filter = (filter_t *)object;
+ filter_sys_t *sys = filter->p_sys;
+ VLC_UNUSED(oldval); VLC_UNUSED(data);
- vlc_memcpy( p_out, p_in, i_lines * i_pitch );
+ vlc_mutex_lock(&sys->cfg.lock);
+ //if (!strcmp(cmd, CFG_PREFIX "variance"))
+ sys->cfg.variance = newval.f_float;
+ vlc_mutex_unlock(&sys->cfg.lock);
+ return VLC_SUCCESS;
+}
+
+static int Open(vlc_object_t *object)
+{
+ filter_t *filter = (filter_t *)object;
+
+ const vlc_chroma_description_t *chroma =
+ vlc_fourcc_GetChromaDescription(filter->fmt_in.video.i_chroma);
+ if (!chroma || chroma->plane_count < 3) {
+ msg_Err(filter, "Unsupported chroma (%4.4s)",
+ (char*)&(filter->fmt_in.video.i_chroma));
+ return VLC_EGENERIC;
+ }
+
+ filter_sys_t *sys = malloc(sizeof(*sys));
+ if (!sys)
+ return VLC_ENOMEM;
+ sys->is_uv_filtered = true;
+ sys->scale = -1;
+ sys->seed = URAND_SEED;
+
+ int cutoff_low = BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-max");
+ int cutoff_high= BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-min");
+ cutoff_low = __MIN(__MAX(cutoff_low, 1), BANK_SIZE - 1);
+ cutoff_high = __MIN(__MAX(cutoff_high, 1), BANK_SIZE - 1);
+ if (Generate(sys->bank, cutoff_low, cutoff_high, cutoff_low, cutoff_high)) {
+ free(sys);
+ return VLC_EGENERIC;
}
- return CopyInfoAndRelease( p_outpic, p_pic );
+ sys->blend = BlockBlendC;
+ sys->emms = NULL;
+#if defined(CAN_COMPILE_SSE2) && 1
+ if (vlc_CPU() & CPU_CAPABILITY_SSE2) {
+ sys->blend = BlockBlendSse2;
+ sys->emms = Emms;
+ }
+#endif
+
+ vlc_mutex_init(&sys->cfg.lock);
+ sys->cfg.variance = var_CreateGetFloatCommand(filter, CFG_PREFIX "variance");
+ var_AddCallback(filter, CFG_PREFIX "variance", Callback, NULL);
+
+ filter->p_sys = sys;
+ filter->pf_video_filter = Filter;
+ return VLC_SUCCESS;
}
+
+static void Close(vlc_object_t *object)
+{
+ filter_t *filter = (filter_t *)object;
+ filter_sys_t *sys = filter->p_sys;
+
+ vlc_mutex_destroy(&sys->cfg.lock);
+ free(sys);
+}
+