X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=modules%2Fvideo_filter%2Fgrain.c;h=4103ff963420e71453e906e04fbdcdcd1a8e0fba;hb=e2ee87feb6ed480068d8a44b8b51b13d09ee663e;hp=57a740e7acdd7bdc9317e409aea3a765a58f8ee4;hpb=14f37b2101842fa6e427f962f689db74eff6faba;p=vlc diff --git a/modules/video_filter/grain.c b/modules/video_filter/grain.c index 57a740e7ac..4103ff9634 100644 --- a/modules/video_filter/grain.c +++ b/modules/video_filter/grain.c @@ -1,10 +1,10 @@ /***************************************************************************** - * noise.c : "add grain to image" video filter + * grain.c: add film grain ***************************************************************************** - * Copyright (C) 2000-2007 the VideoLAN team + * Copyright (C) 2010 Laurent Aimar * $Id$ * - * Authors: Antoine Cellerier + * Authors: Laurent Aimar * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -28,174 +28,407 @@ #ifdef HAVE_CONFIG_H # include "config.h" #endif +#include +#include #include #include +#include +#include -#include "vlc_filter.h" -#include "filter_picture.h" +#include /***************************************************************************** - * Local prototypes + * Module descriptor *****************************************************************************/ -static int Create ( vlc_object_t * ); -static void Destroy ( vlc_object_t * ); +static int Open (vlc_object_t *); +static void Close(vlc_object_t *); + +#define BANK_SIZE (64) -static picture_t *Filter( filter_t *, picture_t * ); +#define CFG_PREFIX "grain-" -#define FILTER_PREFIX "grain-" +#define VARIANCE_MIN (0.0) +#define VARIANCE_MAX (10.0) +#define VARIANCE_TEXT N_("Variance") +#define VARIANCE_LONGTEXT N_("Variance of the gaussian noise") + +#define PERIOD_MIN 1 +#define PERIOD_MAX BANK_SIZE +#define PERIOD_MIN_TEXT N_("Minimal period") +#define PERIOD_MIN_LONGTEXT N_("Minimal period of the noise grain in pixel") +#define PERIOD_MAX_TEXT N_("Maximal period") +#define PERIOD_MAX_LONGTEXT N_("Maximal period of the noise grain in pixel") + +vlc_module_begin() + set_description(N_("Grain video filter")) + set_shortname( N_("Grain")) + set_help(N_("Adds filtered gaussian noise")) + set_capability( "video filter2", 0 ) + set_category(CAT_VIDEO) + set_subcategory(SUBCAT_VIDEO_VFILTER) + add_float_with_range(CFG_PREFIX "variance", 2.0, VARIANCE_MIN, VARIANCE_MAX, + NULL, VARIANCE_TEXT, VARIANCE_LONGTEXT, false) + add_integer_with_range(CFG_PREFIX "period-min", 1, PERIOD_MIN, PERIOD_MAX, + NULL, PERIOD_MIN_TEXT, PERIOD_MIN_LONGTEXT, false) + add_integer_with_range(CFG_PREFIX "period-max", 3*PERIOD_MAX/4, PERIOD_MIN, PERIOD_MAX, + NULL, PERIOD_MAX_TEXT, PERIOD_MAX_LONGTEXT, false) + set_callbacks(Open, Close) +vlc_module_end() /***************************************************************************** - * Module descriptor + * Local prototypes *****************************************************************************/ -vlc_module_begin () - set_description( N_("Grain video filter") ) - set_shortname( N_( "Grain" )) - set_capability( "video filter2", 0 ) - set_category( CAT_VIDEO ) - set_subcategory( SUBCAT_VIDEO_VFILTER ) - set_callbacks( Create, Destroy ) -vlc_module_end () +#define BLEND_SIZE (8) +struct filter_sys_t { + bool is_uv_filtered; + uint32_t seed; + + int scale; + int16_t bank[BANK_SIZE * BANK_SIZE]; + int16_t bank_y[BANK_SIZE * BANK_SIZE]; + int16_t bank_uv[BANK_SIZE * BANK_SIZE]; + + void (*blend)(uint8_t *dst, size_t dst_pitch, + const uint8_t *src, size_t src_pitch, + const int16_t *noise); + void (*emms)(void); + + struct { + vlc_mutex_t lock; + double variance; + } cfg; +}; -struct filter_sys_t +/* Simple and *really fast* RNG (xorshift[13,17,5])*/ +#define URAND_SEED (2463534242) +static uint32_t urand(uint32_t *seed) { - int *p_noise; -}; + uint32_t s = *seed; + s ^= s << 13; + s ^= s >> 17; + s ^= s << 5; + return *seed = s; +} +/* Uniform random value between 0 and 1 */ +static double drand(uint32_t *seed) +{ + return urand(seed) / (double)UINT32_MAX; +} +/* Gaussian random value with a mean of 0 and a variance of 1 */ +static void grand(double *r1, double *r2, uint32_t *seed) +{ + double s; + double u1, u2; + do { + u1 = 2 * drand(seed) - 1; + u2 = 2 * drand(seed) - 1; + s = u1 * u1 + u2 * u2; + } while (s >= 1.0); + + s = sqrt(-2 * log(s) / s); + *r1 = u1 * s; + *r2 = u2 * s; +} -static int Create( vlc_object_t *p_this ) +static void BlockBlend(uint8_t *dst, size_t dst_pitch, + const uint8_t *src, size_t src_pitch, + const int16_t *noise, + int w, int h) { - filter_t *p_filter = (filter_t *)p_this; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + dst[y * dst_pitch + x] = + clip_uint8_vlc(src[y * src_pitch + x] + noise[y * BANK_SIZE +x]); + } + } +} - switch( p_filter->fmt_in.video.i_chroma ) - { - CASE_PLANAR_YUV - break; +static void BlockBlendC(uint8_t *dst, size_t dst_pitch, + const uint8_t *src, size_t src_pitch, + const int16_t *noise) +{ + BlockBlend(dst, dst_pitch, src, src_pitch, noise, + BLEND_SIZE, BLEND_SIZE); +} - default: - msg_Err( p_filter, "Unsupported input chroma (%4s)", - (char*)&(p_filter->fmt_in.video.i_chroma) ); - return VLC_EGENERIC; +#ifdef CAN_COMPILE_SSE2 +#define _STRING(x) #x +#define STRING(x) _STRING(x) +static void BlockBlendSse2(uint8_t *dst, size_t dst_pitch, + const uint8_t *src, size_t src_pitch, + const int16_t *noise) +{ +#if BLEND_SIZE == 8 + /* TODO It is possible to do the math on 8 bits using + * paddusb X and then psubusb -X. + */ + asm volatile ("pxor %%xmm0, %%xmm0\n" : :); + for (int i = 0; i < 8/2; i++) { + asm volatile ( + "movq (%[src1]), %%xmm1\n" + "movq (%[src2]), %%xmm3\n" + "movdqu (%[noise]), %%xmm2\n" + "movdqu 2*"STRING(BANK_SIZE)"(%[noise]), %%xmm4\n" + + "punpcklbw %%xmm0, %%xmm1\n" + "punpcklbw %%xmm0, %%xmm3\n" + + "paddsw %%xmm2, %%xmm1\n" + "paddsw %%xmm4, %%xmm3\n" + "packuswb %%xmm1, %%xmm1\n" + "packuswb %%xmm3, %%xmm3\n" + "movq %%xmm1, (%[dst1])\n" + "movq %%xmm3, (%[dst2])\n" + : : [dst1]"r"(&dst[(2*i+0) * dst_pitch]), + [dst2]"r"(&dst[(2*i+1) * dst_pitch]), + [src1]"r"(&src[(2*i+0) * src_pitch]), + [src2]"r"(&src[(2*i+1) * src_pitch]), + [noise]"r"(&noise[2*i * BANK_SIZE]) + : "memory"); } +#else +# error "BLEND_SIZE unsupported" +#endif +} +static void Emms(void) +{ + asm volatile ("emms"); +} +#endif - /* Allocate structure */ - p_filter->p_sys = malloc( sizeof( filter_sys_t ) ); - if( p_filter->p_sys == NULL ) - return VLC_ENOMEM; +/** + * Scale the given signed data (on 7 bits + 1 for sign) using scale on 8 bits. + */ +static void Scale(int16_t *dst, int16_t *src, int scale) +{ + const int N = BANK_SIZE; + const int shift = 7 + 8; + + for (int y = 0; y < N; y++) { + for (int x = 0; x < N; x++) { + const int v = src[y * N + x]; + int vq; + if (v >= 0) + vq = ( v * scale + (1 << (shift-1)) - 1) >> shift; + else + vq = -((-v * scale + (1 << (shift-1)) - 1) >> shift); + dst[y * N + x] = vq; + } + } +} - p_filter->pf_video_filter = Filter; +static void PlaneFilter(filter_t *filter, + plane_t *dst, const plane_t *src, + int16_t *bank, uint32_t *seed) +{ + filter_sys_t *sys = filter->p_sys; - p_filter->p_sys->p_noise = NULL; + for (int y = 0; y < dst->i_visible_lines; y += BLEND_SIZE) { + for (int x = 0; x < dst->i_visible_pitch; x += BLEND_SIZE) { + int bx = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1); + int by = urand(seed) % (BANK_SIZE - BLEND_SIZE + 1); + const int16_t *noise = &bank[by * BANK_SIZE + bx]; - return VLC_SUCCESS; -} + int w = dst->i_visible_pitch - x; + int h = dst->i_visible_lines - y; -static void Destroy( vlc_object_t *p_this ) -{ - filter_t *p_filter = (filter_t *)p_this; - free( p_filter->p_sys->p_noise ); - free( p_filter->p_sys ); + const uint8_t *srcp = &src->p_pixels[y * src->i_pitch + x]; + uint8_t *dstp = &dst->p_pixels[y * dst->i_pitch + x]; + + if (w >= BLEND_SIZE && h >= BLEND_SIZE) + sys->blend(dstp, dst->i_pitch, srcp, src->i_pitch, noise); + else + BlockBlend(dstp, dst->i_pitch, srcp, src->i_pitch, noise, w, h); + } + } + if (sys->emms) + sys->emms(); } -static picture_t *Filter( filter_t *p_filter, picture_t *p_pic ) +static picture_t *Filter(filter_t *filter, picture_t *src) { - picture_t *p_outpic; - filter_sys_t *p_sys = p_filter->p_sys; - int i_index; - - if( !p_pic ) return NULL; + filter_sys_t *sys = filter->p_sys; - p_outpic = filter_NewPicture( p_filter ); - if( !p_outpic ) - { - picture_Release( p_pic ); + picture_t *dst = filter_NewPicture(filter); + if (!dst) { + picture_Release(src); return NULL; } - { - uint8_t *p_in = p_pic->p[Y_PLANE].p_pixels; - uint8_t *p_out = p_outpic->p[Y_PLANE].p_pixels; + vlc_mutex_lock(&sys->cfg.lock); + const double variance = __MIN(__MAX(sys->cfg.variance, VARIANCE_MIN), VARIANCE_MAX); + vlc_mutex_unlock(&sys->cfg.lock); - const int i_num_lines = p_pic->p[Y_PLANE].i_visible_lines; - const int i_num_cols = p_pic->p[Y_PLANE].i_visible_pitch; - const int i_pitch = p_pic->p[Y_PLANE].i_pitch; + const int scale = 256 * sqrt(variance); + if (scale != sys->scale) { + sys->scale = scale; + Scale(sys->bank_y, sys->bank, sys->scale); + Scale(sys->bank_uv, sys->bank, sys->scale / 2); + } - int i_line, i_col; + for (int i = 0; i < dst->i_planes; i++) { + const plane_t *srcp = &src->p[i]; + plane_t *dstp = &dst->p[i]; - int *p_noise = p_sys->p_noise; - if( !p_noise ) - { - p_noise = p_sys->p_noise = - (int*)malloc(i_pitch*i_num_lines*sizeof(int)); + if (i == 0 || sys->is_uv_filtered) { + int16_t *bank = i == 0 ? sys->bank_y : + sys->bank_uv; + PlaneFilter(filter, dstp, srcp, bank, &sys->seed); } - - for( i_line = 0; i_line < i_num_lines; i_line++ ) - { - for( i_col = 0; i_col < i_num_cols; i_col++ ) - { - p_noise[i_line*i_pitch+i_col] = ((rand()&0x1f)-0x0f); - } + else { + plane_CopyPixels(dstp, srcp); } + } - for( i_line = 2/*0*/ ; i_line < i_num_lines-2/**/; i_line++ ) - { - for( i_col = 2/*0*/; i_col < i_num_cols/2; i_col++ ) - { - p_out[i_line*i_pitch+i_col] = clip_uint8_vlc( - p_in[i_line*i_pitch+i_col] -#if 0 - + p_noise[i_line*i_pitch+i_col] ); -#else -/* 2 rows up */ - + (( ( p_noise[(i_line-2)*i_pitch+i_col-2]<<1 ) - + ( p_noise[(i_line-2)*i_pitch+i_col-1]<<2 ) - + ( p_noise[(i_line-2)*i_pitch+i_col]<<2 ) - + ( p_noise[(i_line-2)*i_pitch+i_col+1]<<2 ) - + ( p_noise[(i_line-2)*i_pitch+i_col+2]<<1 ) - /* 1 row up */ - + ( p_noise[(i_line-1)*i_pitch+i_col-2]<<2 ) - + ( p_noise[(i_line-1)*i_pitch+i_col-1]<<3 ) - + ( p_noise[(i_line-1)*i_pitch+i_col]*12 ) - + ( p_noise[(i_line-1)*i_pitch+i_col+1]<<3 ) - + ( p_noise[(i_line-1)*i_pitch+i_col+2]<<2 ) - /* */ - + ( p_noise[i_line*i_pitch+i_col-2]<<2 ) - + ( p_noise[i_line*i_pitch+i_col-1]*12 ) - + ( p_noise[i_line*i_pitch+i_col]<<4 ) - + ( p_noise[i_line*i_pitch+i_col+1]*12 ) - + ( p_noise[i_line*i_pitch+i_col+2]<<2 ) - /* 1 row down */ - + ( p_noise[(i_line+1)*i_pitch+i_col-2]<<2 ) - + ( p_noise[(i_line+1)*i_pitch+i_col-1]<<3 ) - + ( p_noise[(i_line+1)*i_pitch+i_col]*12 ) - + ( p_noise[(i_line+1)*i_pitch+i_col+1]<<3 ) - + ( p_noise[(i_line+1)*i_pitch+i_col+2]<<2 ) - /* 2 rows down */ - + ( p_noise[(i_line+2)*i_pitch+i_col-2]<<1 ) - + ( p_noise[(i_line+2)*i_pitch+i_col-1]<<2 ) - + ( p_noise[(i_line+2)*i_pitch+i_col]<<2 ) - + ( p_noise[(i_line+2)*i_pitch+i_col+1]<<2 ) - + ( p_noise[(i_line+2)*i_pitch+i_col+2]<<1 ) - )>>7/*/152*/)); -#endif + picture_CopyProperties(dst, src); + picture_Release(src); + return dst; +} + +/** + * Generate a filteried gaussian noise within [-127, 127] range. + */ +static int Generate(int16_t *bank, int h_min, int h_max, int v_min, int v_max) +{ + const int N = BANK_SIZE; + double *workspace = calloc(3 * N * N, sizeof(*workspace)); + if (!workspace) + return VLC_ENOMEM; + + double *gn = &workspace[0 * N * N]; + double *cij = &workspace[1 * N * N]; + double *tmp = &workspace[2 * N * N]; + /* Create a gaussian noise matrix */ + assert((N % 2) == 0); + uint32_t seed = URAND_SEED; + for (int y = 0; y < N; y++) { + for (int x = 0; x < N/2; x++) { + grand(&gn[y * N + 2 * x + 0], &gn[y * N + 2 * x + 1], &seed); + } + } + + /* Clear non selected frequency. + * Only the central band is kept */ + int zero = 0; + for (int y = 0; y < N; y++) { + for (int x = 0; x < N; x++) { + if ((x < h_min && y < v_min) || x > h_max || y > v_max) { + gn[y * N + x] = 0.0; + zero++; } - for( ; i_col < i_num_cols; i_col++ ) - p_out[i_line*i_pitch+i_col] = p_in[i_line*i_pitch+i_col]; + } + } + const double correction = sqrt((double)N * N / (N * N - zero)); + + /* Filter the gaussian noise using an IDCT + * The algo is simple/stupid and does C * GN * Ct */ + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + cij[i * N + j] = i == 0 ? sqrt(1.0f / N) : + sqrt(2.0f / N) * cos((2 * j + 1) * i * M_PI / 2 / N); } } - for( i_index = 1; i_index < p_pic->i_planes; i_index++ ) - { - uint8_t *p_in = p_pic->p[i_index].p_pixels; - uint8_t *p_out = p_outpic->p[i_index].p_pixels; + //mtime_t tmul_0 = mdate(); + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + double v = 0.0; + for (int k = 0; k < N; k++) + v += gn[i * N + k] * cij[k * N + j]; + tmp[i * N + j] = v; + } + } + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + double v = 0.0; + for (int k = 0; k < N; k++) + v += cij[k * N + i] * tmp[k * N + j]; + /* Do not bias when rounding */ + int vq; + if (v >= 0) + vq = (int)( v * correction * 127 + 0.5); + else + vq = -(int)(-v * correction * 127 + 0.5); + bank[i * N + j] = __MIN(__MAX(vq, INT16_MIN), INT16_MAX); + } + } + //mtime_t mul_duration = mdate() - tmul_0; + //fprintf(stderr, "IDCT took %d ms\n", (int)(mul_duration / 1000)); - const int i_lines = p_pic->p[i_index].i_lines; - const int i_pitch = p_pic->p[i_index].i_pitch; + free(workspace); + return VLC_SUCCESS; +} + +static int Callback(vlc_object_t *object, char const *cmd, + vlc_value_t oldval, vlc_value_t newval, void *data) +{ + filter_t *filter = (filter_t *)object; + filter_sys_t *sys = filter->p_sys; + VLC_UNUSED(oldval); VLC_UNUSED(data); + + vlc_mutex_lock(&sys->cfg.lock); + //if (!strcmp(cmd, CFG_PREFIX "variance")) + sys->cfg.variance = newval.f_float; + vlc_mutex_unlock(&sys->cfg.lock); + + return VLC_SUCCESS; +} + +static int Open(vlc_object_t *object) +{ + filter_t *filter = (filter_t *)object; + + const vlc_chroma_description_t *chroma = + vlc_fourcc_GetChromaDescription(filter->fmt_in.video.i_chroma); + if (!chroma || chroma->plane_count < 3) { + msg_Err(filter, "Unsupported chroma (%4.4s)", + (char*)&(filter->fmt_in.video.i_chroma)); + return VLC_EGENERIC; + } - vlc_memcpy( p_out, p_in, i_lines * i_pitch ); + filter_sys_t *sys = malloc(sizeof(*sys)); + if (!sys) + return VLC_ENOMEM; + sys->is_uv_filtered = true; + sys->scale = -1; + sys->seed = URAND_SEED; + + int cutoff_low = BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-max"); + int cutoff_high= BANK_SIZE - var_InheritInteger(filter, CFG_PREFIX "period-min"); + cutoff_low = __MIN(__MAX(cutoff_low, 1), BANK_SIZE - 1); + cutoff_high = __MIN(__MAX(cutoff_high, 1), BANK_SIZE - 1); + if (Generate(sys->bank, cutoff_low, cutoff_high, cutoff_low, cutoff_high)) { + free(sys); + return VLC_EGENERIC; + } + sys->blend = BlockBlendC; + sys->emms = NULL; +#if defined(CAN_COMPILE_SSE2) && 1 + if (vlc_CPU() & CPU_CAPABILITY_SSE2) { + sys->blend = BlockBlendSse2; + sys->emms = Emms; } +#endif + + vlc_mutex_init(&sys->cfg.lock); + sys->cfg.variance = var_CreateGetFloatCommand(filter, CFG_PREFIX "variance"); + var_AddCallback(filter, CFG_PREFIX "variance", Callback, NULL); + + filter->p_sys = sys; + filter->pf_video_filter = Filter; + return VLC_SUCCESS; +} - return CopyInfoAndRelease( p_outpic, p_pic ); +static void Close(vlc_object_t *object) +{ + filter_t *filter = (filter_t *)object; + filter_sys_t *sys = filter->p_sys; + + vlc_mutex_destroy(&sys->cfg.lock); + free(sys); } +