]> git.sesse.net Git - vlc/commitdiff
Rewrote grain from scratch using filtered gaussian noise.
authorLaurent Aimar <fenrir@videolan.org>
Thu, 13 May 2010 22:58:43 +0000 (00:58 +0200)
committerLaurent Aimar <fenrir@videolan.org>
Fri, 14 May 2010 20:33:41 +0000 (22:33 +0200)
It is faster (and has use a bit of sse2), and better looking IMHO.

modules/video_filter/grain.c

index 8aff4cddf3383decdcd42f5758a91b89d4df4e29..4103ff963420e71453e906e04fbdcdcd1a8e0fba 100644 (file)
@@ -1,10 +1,10 @@
 /*****************************************************************************
- * noise.c : "add grain to image" video filter
+ * grain.c: add film grain
  *****************************************************************************
- * Copyright (C) 2000-2007 the VideoLAN team
+ * Copyright (C) 2010 Laurent Aimar
  * $Id$
  *
- * Authors: Antoine Cellerier <dionoea -at- videolan -dot- org>
+ * Authors: Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 #ifdef HAVE_CONFIG_H
 # include "config.h"
 #endif
+#include <assert.h>
+#include <math.h>
 
 #include <vlc_common.h>
 #include <vlc_plugin.h>
-#include <vlc_rand.h>
-
 #include <vlc_filter.h>
-#include "filter_picture.h"
+#include <vlc_cpu.h>
+
+#include <vlc_rand.h>
 
 /*****************************************************************************
- * Local prototypes
+ * Module descriptor
  *****************************************************************************/
-static int  Create    ( vlc_object_t * );
-static void Destroy   ( vlc_object_t * );
+static int  Open (vlc_object_t *);
+static void Close(vlc_object_t *);
 
-static picture_t *Filter( filter_t *, picture_t * );
+#define BANK_SIZE (64)
 
-#define FILTER_PREFIX "grain-"
+#define CFG_PREFIX "grain-"
+
+#define VARIANCE_MIN        (0.0)
+#define VARIANCE_MAX        (10.0)
+#define VARIANCE_TEXT       N_("Variance")
+#define VARIANCE_LONGTEXT   N_("Variance of the gaussian noise")
+
+#define PERIOD_MIN          1
+#define PERIOD_MAX          BANK_SIZE
+#define PERIOD_MIN_TEXT     N_("Minimal period")
+#define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel")
+#define PERIOD_MAX_TEXT     N_("Maximal period")
+#define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel")
+
+vlc_module_begin()
+    set_description(N_("Grain video filter"))
+    set_shortname( N_("Grain"))
+    set_help(N_("Adds filtered gaussian noise"))
+    set_capability( "video filter2", 0 )
+    set_category(CAT_VIDEO)
+    set_subcategory(SUBCAT_VIDEO_VFILTER)
+    add_float_with_range(CFG_PREFIX "variance", 2.0, VARIANCE_MIN, VARIANCE_MAX,
+                         NULL, VARIANCE_TEXT, VARIANCE_LONGTEXT, false)
+    add_integer_with_range(CFG_PREFIX "period-min", 1, PERIOD_MIN, PERIOD_MAX,
+                           NULL, PERIOD_MIN_TEXT, PERIOD_MIN_LONGTEXT, false)
+    add_integer_with_range(CFG_PREFIX "period-max", 3*PERIOD_MAX/4, PERIOD_MIN, PERIOD_MAX,
+                           NULL, PERIOD_MAX_TEXT, PERIOD_MAX_LONGTEXT, false)
+    set_callbacks(Open, Close)
+vlc_module_end()
 
-#define NOISE_HELP N_("add grain to image")
 /*****************************************************************************
- * Module descriptor
+ * Local prototypes
  *****************************************************************************/
-vlc_module_begin ()
-    set_description( N_("Grain video filter") )
-    set_shortname( N_( "Grain" ))
-    set_help(NOISE_HELP)
-    set_capability( "video filter2", 0 )
-    set_category( CAT_VIDEO )
-    set_subcategory( SUBCAT_VIDEO_VFILTER )
 
-    set_callbacks( Create, Destroy )
-vlc_module_end ()
+#define BLEND_SIZE (8)
+struct filter_sys_t {
+    bool     is_uv_filtered;
+    uint32_t seed;
+
+    int      scale;
+    int16_t  bank[BANK_SIZE * BANK_SIZE];
+    int16_t  bank_y[BANK_SIZE * BANK_SIZE];
+    int16_t  bank_uv[BANK_SIZE * BANK_SIZE];
+
+    void (*blend)(uint8_t *dst, size_t dst_pitch,
+                  const uint8_t *src, size_t src_pitch,
+                  const int16_t *noise);
+    void (*emms)(void);
+
+    struct {
+        vlc_mutex_t lock;
+        double      variance;
+    } cfg;
+};
 
-struct filter_sys_t
+/* Simple and *really fast* RNG (xorshift[13,17,5])*/
+#define URAND_SEED (2463534242)
+static uint32_t urand(uint32_t *seed)
 {
-    int *p_noise;
-};
+    uint32_t s = *seed;
+    s ^= s << 13;
+    s ^= s >> 17;
+    s ^= s << 5;
+    return *seed = s;
+}
+/* Uniform random value between 0 and 1 */
+static double drand(uint32_t *seed)
+{
+    return urand(seed) / (double)UINT32_MAX;
+}
+/* Gaussian random value with a mean of 0 and a variance of 1 */
+static void grand(double *r1, double *r2, uint32_t *seed)
+{
+    double s;
+    double u1, u2;
+    do {
+        u1 = 2 * drand(seed) - 1;
+        u2 = 2 * drand(seed) - 1;
+        s = u1 * u1 + u2 * u2;
+    } while (s >= 1.0);
+
+    s = sqrt(-2 * log(s) / s);
+    *r1 = u1 * s;
+    *r2 = u2 * s;
+}
 
-static int Create( vlc_object_t *p_this )
+static void BlockBlend(uint8_t *dst, size_t dst_pitch,
+                       const uint8_t *src, size_t src_pitch,
+                       const int16_t *noise,
+                       int w, int h)
 {
-    filter_t *p_filter = (filter_t *)p_this;
+    for (int y = 0; y < h; y++) {
+        for (int x = 0; x < w; x++) {
+            dst[y * dst_pitch + x] =
+                clip_uint8_vlc(src[y * src_pitch + x] + noise[y * BANK_SIZE +x]);
+        }
+    }
+}
 
-    switch( p_filter->fmt_in.video.i_chroma )
-    {
-        CASE_PLANAR_YUV
-            break;
+static void BlockBlendC(uint8_t *dst, size_t dst_pitch,
+                        const uint8_t *src, size_t src_pitch,
+                        const int16_t *noise)
+{
+    BlockBlend(dst, dst_pitch, src, src_pitch, noise,
+               BLEND_SIZE, BLEND_SIZE);
+}
 
-        default:
-            msg_Err( p_filter, "Unsupported input chroma (%4.4s)",
-                     (char*)&(p_filter->fmt_in.video.i_chroma) );
-            return VLC_EGENERIC;
+#ifdef CAN_COMPILE_SSE2
+#define _STRING(x) #x
+#define STRING(x) _STRING(x)
+static void BlockBlendSse2(uint8_t *dst, size_t dst_pitch,
+                           const uint8_t *src, size_t src_pitch,
+                           const int16_t *noise)
+{
+#if BLEND_SIZE == 8
+    /* TODO It is possible to do the math on 8 bits using
+     * paddusb X  and then psubusb -X.
+     */
+    asm volatile ("pxor %%xmm0, %%xmm0\n" : :);
+    for (int i = 0; i < 8/2; i++) {
+        asm volatile (
+            "movq       (%[src1]),   %%xmm1\n"
+            "movq       (%[src2]),   %%xmm3\n"
+            "movdqu     (%[noise]), %%xmm2\n"
+            "movdqu 2*"STRING(BANK_SIZE)"(%[noise]), %%xmm4\n"
+
+            "punpcklbw  %%xmm0,     %%xmm1\n"
+            "punpcklbw  %%xmm0,     %%xmm3\n"
+
+            "paddsw    %%xmm2,     %%xmm1\n"
+            "paddsw    %%xmm4,     %%xmm3\n"
+            "packuswb   %%xmm1,     %%xmm1\n"
+            "packuswb   %%xmm3,     %%xmm3\n"
+            "movq       %%xmm1,     (%[dst1])\n"
+            "movq       %%xmm3,     (%[dst2])\n"
+            : : [dst1]"r"(&dst[(2*i+0) * dst_pitch]),
+                [dst2]"r"(&dst[(2*i+1) * dst_pitch]),
+                [src1]"r"(&src[(2*i+0) * src_pitch]),
+                [src2]"r"(&src[(2*i+1) * src_pitch]),
+                [noise]"r"(&noise[2*i * BANK_SIZE])
+            : "memory");
     }
+#else
+#   error "BLEND_SIZE unsupported"
+#endif
+}
+static void Emms(void)
+{
+    asm volatile ("emms");
+}
+#endif
 
-    /* Allocate structure */
-    p_filter->p_sys = malloc( sizeof( filter_sys_t ) );
-    if( p_filter->p_sys == NULL )
-        return VLC_ENOMEM;
+/**
+ * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits.
+ */
+static void Scale(int16_t *dst, int16_t *src, int scale)
+{
+    const int N = BANK_SIZE;
+    const int shift = 7 + 8;
+
+    for (int y = 0; y < N; y++) {
+        for (int x = 0; x < N; x++) {
+            const int v = src[y * N + x];
+            int vq;
+            if (v >= 0)
+                vq =   ( v * scale + (1 << (shift-1)) - 1) >> shift;
+            else
+                vq = -((-v * scale + (1 << (shift-1)) - 1) >> shift);
+            dst[y * N + x] = vq;
+        }
+    }
+}
+
+static void PlaneFilter(filter_t *filter,
+                        plane_t *dst, const plane_t *src,
+                        int16_t *bank, uint32_t *seed)
+{
+    filter_sys_t *sys = filter->p_sys;
 
-    p_filter->pf_video_filter = Filter;
+    for (int y = 0; y < dst->i_visible_lines; y += BLEND_SIZE) {
+        for (int x = 0; x < dst->i_visible_pitch; x += BLEND_SIZE) {
+            int bx = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
+            int by = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1);
+            const int16_t *noise = &bank[by * BANK_SIZE + bx];
 
-    p_filter->p_sys->p_noise = NULL;
+            int w  = dst->i_visible_pitch - x;
+            int h  = dst->i_visible_lines - y;
 
-    return VLC_SUCCESS;
-}
+            const uint8_t *srcp = &src->p_pixels[y * src->i_pitch + x];
+            uint8_t       *dstp = &dst->p_pixels[y * dst->i_pitch + x];
 
-static void Destroy( vlc_object_t *p_this )
-{
-    filter_t *p_filter = (filter_t *)p_this;
-    free( p_filter->p_sys->p_noise );
-    free( p_filter->p_sys );
+            if (w >= BLEND_SIZE && h >= BLEND_SIZE)
+                sys->blend(dstp, dst->i_pitch, srcp, src->i_pitch, noise);
+            else
+                BlockBlend(dstp, dst->i_pitch, srcp, src->i_pitch, noise, w, h);
+        }
+    }
+    if (sys->emms)
+        sys->emms();
 }
 
-static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
+static picture_t *Filter(filter_t *filter, picture_t *src)
 {
-    picture_t *p_outpic;
-    filter_sys_t *p_sys = p_filter->p_sys;
-    int i_index;
-
-    if( !p_pic ) return NULL;
+    filter_sys_t *sys = filter->p_sys;
 
-    p_outpic = filter_NewPicture( p_filter );
-    if( !p_outpic )
-    {
-        picture_Release( p_pic );
+    picture_t *dst = filter_NewPicture(filter);
+    if (!dst) {
+        picture_Release(src);
         return NULL;
     }
 
-    {
-        uint8_t *p_in = p_pic->p[Y_PLANE].p_pixels;
-        uint8_t *p_out = p_outpic->p[Y_PLANE].p_pixels;
+    vlc_mutex_lock(&sys->cfg.lock);
+    const double variance = __MIN(__MAX(sys->cfg.variance, VARIANCE_MIN), VARIANCE_MAX);
+    vlc_mutex_unlock(&sys->cfg.lock);
 
-        const int i_num_lines = p_pic->p[Y_PLANE].i_visible_lines;
-        const int i_num_cols = p_pic->p[Y_PLANE].i_visible_pitch;
-        const int i_pitch = p_pic->p[Y_PLANE].i_pitch;
+    const int scale = 256 * sqrt(variance);
+    if (scale != sys->scale) {
+        sys->scale = scale;
+        Scale(sys->bank_y,  sys->bank, sys->scale);
+        Scale(sys->bank_uv, sys->bank, sys->scale / 2);
+    }
 
-        int i_line, i_col;
+    for (int i = 0; i < dst->i_planes; i++) {
+        const plane_t *srcp = &src->p[i];
+        plane_t       *dstp = &dst->p[i];
 
-        int *p_noise = p_sys->p_noise;
-        if( !p_noise )
-        {
-            p_noise = p_sys->p_noise =
-                (int*)malloc(i_pitch*i_num_lines*sizeof(int));
+        if (i == 0 || sys->is_uv_filtered) {
+            int16_t *bank = i == 0 ? sys->bank_y :
+                                     sys->bank_uv;
+            PlaneFilter(filter, dstp, srcp, bank, &sys->seed);
         }
-
-        for( i_line = 0; i_line < i_num_lines; i_line++ )
-        {
-            for( i_col = 0; i_col < i_num_cols; i_col++ )
-            {
-                p_noise[i_line*i_pitch+i_col] = ((vlc_mrand48()&0x1f)-0x0f);
-            }
+        else {
+            plane_CopyPixels(dstp, srcp);
         }
+    }
 
-        for( i_line = 2/*0*/ ; i_line < i_num_lines-2/**/; i_line++ )
-        {
-            for( i_col = 2/*0*/; i_col < i_num_cols/2; i_col++ )
-            {
-                p_out[i_line*i_pitch+i_col] = clip_uint8_vlc(
-                          p_in[i_line*i_pitch+i_col]
-#if 0
-                        + p_noise[i_line*i_pitch+i_col] );
-#else
-/* 2 rows up */
-              + ((  ( p_noise[(i_line-2)*i_pitch+i_col-2]<<1 )
-              + ( p_noise[(i_line-2)*i_pitch+i_col-1]<<2 )
-              + ( p_noise[(i_line-2)*i_pitch+i_col]<<2 )
-              + ( p_noise[(i_line-2)*i_pitch+i_col+1]<<2 )
-              + ( p_noise[(i_line-2)*i_pitch+i_col+2]<<1 )
-              /* 1 row up */
-              + ( p_noise[(i_line-1)*i_pitch+i_col-2]<<2 )
-              + ( p_noise[(i_line-1)*i_pitch+i_col-1]<<3 )
-              + ( p_noise[(i_line-1)*i_pitch+i_col]*12 )
-              + ( p_noise[(i_line-1)*i_pitch+i_col+1]<<3 )
-              + ( p_noise[(i_line-1)*i_pitch+i_col+2]<<2 )
-              /* */
-              + ( p_noise[i_line*i_pitch+i_col-2]<<2 )
-              + ( p_noise[i_line*i_pitch+i_col-1]*12 )
-              + ( p_noise[i_line*i_pitch+i_col]<<4 )
-              + ( p_noise[i_line*i_pitch+i_col+1]*12 )
-              + ( p_noise[i_line*i_pitch+i_col+2]<<2 )
-              /* 1 row down */
-              + ( p_noise[(i_line+1)*i_pitch+i_col-2]<<2 )
-              + ( p_noise[(i_line+1)*i_pitch+i_col-1]<<3 )
-              + ( p_noise[(i_line+1)*i_pitch+i_col]*12 )
-              + ( p_noise[(i_line+1)*i_pitch+i_col+1]<<3 )
-              + ( p_noise[(i_line+1)*i_pitch+i_col+2]<<2 )
-              /* 2 rows down */
-              + ( p_noise[(i_line+2)*i_pitch+i_col-2]<<1 )
-              + ( p_noise[(i_line+2)*i_pitch+i_col-1]<<2 )
-              + ( p_noise[(i_line+2)*i_pitch+i_col]<<2 )
-              + ( p_noise[(i_line+2)*i_pitch+i_col+1]<<2 )
-              + ( p_noise[(i_line+2)*i_pitch+i_col+2]<<1 )
-              )>>7/*/152*/));
-#endif
+    picture_CopyProperties(dst, src);
+    picture_Release(src);
+    return dst;
+}
+
+/**
+ * Generate a filteried gaussian noise within [-127, 127] range.
+ */
+static int Generate(int16_t *bank, int h_min, int h_max, int v_min, int v_max)
+{
+    const int N = BANK_SIZE;
+    double *workspace = calloc(3 * N * N, sizeof(*workspace));
+    if (!workspace)
+        return VLC_ENOMEM;
+
+    double *gn        = &workspace[0 * N * N];
+    double *cij       = &workspace[1 * N * N];
+    double *tmp       = &workspace[2 * N * N];
+
+    /* Create a gaussian noise matrix */
+    assert((N % 2) == 0);
+    uint32_t seed = URAND_SEED;
+    for (int y = 0; y < N; y++) {
+        for (int x = 0; x < N/2; x++) {
+            grand(&gn[y * N + 2 * x + 0], &gn[y * N + 2 * x + 1], &seed);
+        }
+    }
 
+    /* Clear non selected frequency.
+     * Only the central band is kept */
+    int zero = 0;
+    for (int y = 0; y < N; y++) {
+        for (int x = 0; x < N; x++) {
+            if ((x < h_min && y < v_min) || x > h_max || y > v_max) {
+                gn[y * N + x] = 0.0;
+                zero++;
             }
-            for( ; i_col < i_num_cols; i_col++ )
-                p_out[i_line*i_pitch+i_col] = p_in[i_line*i_pitch+i_col];
+        }
+    }
+    const double correction = sqrt((double)N * N  / (N * N - zero));
+
+    /* Filter the gaussian noise using an IDCT
+     * The algo is simple/stupid and does C * GN * Ct */
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < N; j++) {
+            cij[i * N + j] = i == 0 ? sqrt(1.0f / N) :
+                                      sqrt(2.0f / N) * cos((2 * j + 1) * i * M_PI / 2 / N);
         }
     }
 
-    for( i_index = 1; i_index < p_pic->i_planes; i_index++ )
-    {
-        uint8_t *p_in = p_pic->p[i_index].p_pixels;
-        uint8_t *p_out = p_outpic->p[i_index].p_pixels;
+    //mtime_t tmul_0 = mdate();
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < N; j++) {
+            double v = 0.0;
+            for (int k = 0; k < N; k++)
+                v += gn[i * N + k] * cij[k * N + j];
+            tmp[i * N + j] = v;
+        }
+    }
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < N; j++) {
+            double v = 0.0;
+            for (int k = 0; k < N; k++)
+                v += cij[k * N + i] * tmp[k * N + j];
+            /* Do not bias when rounding */
+            int vq;
+            if (v >= 0)
+                vq =  (int)( v * correction * 127 + 0.5);
+            else
+                vq = -(int)(-v * correction * 127 + 0.5);
+            bank[i * N + j] = __MIN(__MAX(vq, INT16_MIN), INT16_MAX);
+        }
+    }
+    //mtime_t mul_duration = mdate() - tmul_0;
+    //fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000));
 
-        const int i_lines = p_pic->p[i_index].i_lines;
-        const int i_pitch = p_pic->p[i_index].i_pitch;
+    free(workspace);
+    return VLC_SUCCESS;
+}
+
+static int Callback(vlc_object_t *object, char const *cmd,
+                    vlc_value_t oldval, vlc_value_t newval, void *data)
+{
+    filter_t     *filter = (filter_t *)object;
+    filter_sys_t *sys = filter->p_sys;
+    VLC_UNUSED(oldval); VLC_UNUSED(data);
 
-        vlc_memcpy( p_out, p_in, i_lines * i_pitch );
+    vlc_mutex_lock(&sys->cfg.lock);
+    //if (!strcmp(cmd, CFG_PREFIX "variance"))
+        sys->cfg.variance = newval.f_float;
+    vlc_mutex_unlock(&sys->cfg.lock);
 
+    return VLC_SUCCESS;
+}
+
+static int Open(vlc_object_t *object)
+{
+    filter_t *filter = (filter_t *)object;
+
+    const vlc_chroma_description_t *chroma =
+        vlc_fourcc_GetChromaDescription(filter->fmt_in.video.i_chroma);
+    if (!chroma || chroma->plane_count < 3) {
+        msg_Err(filter, "Unsupported chroma (%4.4s)",
+                (char*)&(filter->fmt_in.video.i_chroma));
+        return VLC_EGENERIC;
+    }
+
+    filter_sys_t *sys = malloc(sizeof(*sys));
+    if (!sys)
+        return VLC_ENOMEM;
+    sys->is_uv_filtered = true;
+    sys->scale          = -1;
+    sys->seed           = URAND_SEED;
+
+    int cutoff_low = BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-max");
+    int cutoff_high= BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-min");
+    cutoff_low  = __MIN(__MAX(cutoff_low,  1), BANK_SIZE - 1);
+    cutoff_high = __MIN(__MAX(cutoff_high, 1), BANK_SIZE - 1);
+    if (Generate(sys->bank, cutoff_low, cutoff_high, cutoff_low, cutoff_high)) {
+        free(sys);
+        return VLC_EGENERIC;
     }
 
-    return CopyInfoAndRelease( p_outpic, p_pic );
+    sys->blend = BlockBlendC;
+    sys->emms  = NULL;
+#if defined(CAN_COMPILE_SSE2) && 1
+    if (vlc_CPU() & CPU_CAPABILITY_SSE2) {
+        sys->blend = BlockBlendSse2;
+        sys->emms  = Emms;
+    }
+#endif
+
+    vlc_mutex_init(&sys->cfg.lock);
+    sys->cfg.variance = var_CreateGetFloatCommand(filter, CFG_PREFIX "variance");
+    var_AddCallback(filter, CFG_PREFIX "variance", Callback, NULL);
+
+    filter->p_sys           = sys;
+    filter->pf_video_filter = Filter;
+    return VLC_SUCCESS;
 }
+
+static void Close(vlc_object_t *object)
+{
+    filter_t     *filter = (filter_t *)object;
+    filter_sys_t *sys    = filter->p_sys;
+
+    vlc_mutex_destroy(&sys->cfg.lock);
+    free(sys);
+}
+