From: Rov Juvano Date: Sun, 22 Jun 2008 16:09:27 +0000 (-0400) Subject: Add Scaletempo audio filter X-Git-Tag: 0.9.0-test1~95 X-Git-Url: https://git.sesse.net/?p=vlc;a=commitdiff_plain;h=3c5f3a24ced2212136529fb8637072070d5ca58f Add Scaletempo audio filter Scaletempo maintains the audio pitch when playback rate != 1.0 (i.e. no chipmunk effect). This fixes the pitch scaling caused by using the resampler to handle playback rate. Ported from GStreamer. Inspired by SoundTouch library by Olli Parviainen. Signed-off-by: Rémi Denis-Courmont --- diff --git a/configure.ac b/configure.ac index f079a08cd8..e9deddce72 100644 --- a/configure.ac +++ b/configure.ac @@ -1216,6 +1216,7 @@ if test "${SYS}" != "mingwce"; then VLC_ADD_PLUGIN([normvol]) VLC_ADD_PLUGIN([equalizer]) VLC_ADD_PLUGIN([param_eq]) + VLC_ADD_PLUGIN([scaletempo]) VLC_ADD_PLUGIN([converter_float]) VLC_ADD_PLUGIN([a52tospdif]) VLC_ADD_PLUGIN([dtstospdif]) diff --git a/modules/audio_filter/Modules.am b/modules/audio_filter/Modules.am index eade69d939..5255a3b5a5 100644 --- a/modules/audio_filter/Modules.am +++ b/modules/audio_filter/Modules.am @@ -3,3 +3,4 @@ SOURCES_equalizer = equalizer.c equalizer_presets.h SOURCES_normvol = normvol.c SOURCES_audio_format = format.c SOURCES_param_eq = param_eq.c +SOURCES_scaletempo = scaletempo.c diff --git a/modules/audio_filter/scaletempo.c b/modules/audio_filter/scaletempo.c new file mode 100644 index 0000000000..a15bf8a49c --- /dev/null +++ b/modules/audio_filter/scaletempo.c @@ -0,0 +1,512 @@ +/***************************************************************************** + * scaletempo.c: Scale audio tempo while maintaining pitch + ***************************************************************************** + * Copyright © 2008 the VideoLAN team + * $Id$ + * + * Authors: Rov Juvano + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. + *****************************************************************************/ + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include + +#include /* for memset */ +#include /* form INT_MIN */ + +/***************************************************************************** + * Module descriptor + *****************************************************************************/ +static int Open( vlc_object_t * ); +static void Close( vlc_object_t * ); +static void DoWork( aout_instance_t *, aout_filter_t *, + aout_buffer_t *, aout_buffer_t * ); + +vlc_module_begin(); + set_description( N_("Scale audio tempo in sync with playback rate") ); + set_shortname( N_("Scaletempo") ); + set_capability( "audio filter", 0 ); + set_category( CAT_AUDIO ); + set_subcategory( SUBCAT_AUDIO_AFILTER ); + + add_integer_with_range( "scaletempo-stride", 30, 1, 2000, NULL, + N_("Stride Length"), N_("Length in milliseconds to output each stride"), true ); + add_float_with_range( "scaletempo-overlap", .20, 0.0, 1.0, NULL, + N_("Overlap Length"), N_("Percentage of stride to overlap"), true ); + add_integer_with_range( "scaletempo-search", 14, 0, 200, NULL, + N_("Search Length"), N_("Length in milliseconds to search for best overlap position"), true ); + + set_callbacks( Open, Close ); +vlc_module_end(); + +/* + * Scaletempo works by producing audio in constant sized chunks (a "stride") but + * consuming chunks proportional to the playback rate. + * + * Scaletempo then smooths the output by blending the end of one stride with + * the next ("overlap"). + * + * Scaletempo smooths the overlap further by searching within the input buffer + * for the best overlap position. Scaletempo uses a statistical cross correlation + * (roughly a dot-product). Scaletempo consumes most of its CPU cycles here. + * + * NOTE: + * sample: a single audio sample for one channel + * frame: a single set of samples, one for each channel + * VLC uses these terms differently + */ +typedef struct aout_filter_sys_t +{ + /* Filter static config */ + double scale; + /* parameters */ + uint ms_stride; + double percent_overlap; + uint ms_search; + /* audio format */ + uint samples_per_frame; /* AKA number of channels */ + uint bytes_per_sample; + uint bytes_per_frame; + uint sample_rate; + /* stride */ + double frames_stride_scaled; + double frames_stride_error; + uint bytes_stride; + double bytes_stride_scaled; + uint bytes_queue_max; + uint bytes_queued; + uint bytes_to_slide; + uint8_t *buf_queue; + /* overlap */ + uint samples_overlap; + uint samples_standing; + uint bytes_overlap; + uint bytes_standing; + void *buf_overlap; + void *table_blend; + void (*output_overlap)( aout_filter_t *p_filter, void *p_out_buf, uint bytes_off ); + /* best overlap */ + uint frames_search; + void *buf_pre_corr; + void *table_window; + uint (*best_overlap_offset)( aout_filter_t *p_filter ); + /* for "audio filter" only, manage own buffers */ + int i_buf; + uint8_t *p_buffers[2]; +} aout_filter_sys_t; + +/***************************************************************************** + * best_overlap_offset: calculate best offset for overlap + *****************************************************************************/ +static uint best_overlap_offset_float( aout_filter_t *p_filter ) +{ + aout_filter_sys_t *p = p_filter->p_sys; + float *pw, *po, *ppc, *search_start; + float best_corr = INT_MIN; + uint best_off = 0; + uint i, off; + + pw = p->table_window; + po = p->buf_overlap; + po += p->samples_per_frame; + ppc = p->buf_pre_corr; + for( i = p->samples_per_frame; i < p->samples_overlap; i++ ) { + *ppc++ = *pw++ * *po++; + } + + search_start = (float *)p->buf_queue + p->samples_per_frame; + for( off = 0; off < p->frames_search; off++ ) { + float corr = 0; + float *ps = search_start; + ppc = p->buf_pre_corr; + for( i = p->samples_per_frame; i < p->samples_overlap; i++ ) { + corr += *ppc++ * *ps++; + } + if( corr > best_corr ) { + best_corr = corr; + best_off = off; + } + search_start += p->samples_per_frame; + } + + return best_off * p->bytes_per_frame; +} + +/***************************************************************************** + * output_overlap: blend end of previous stride with beginning of current stride + *****************************************************************************/ +static void output_overlap_float( aout_filter_t *p_filter, + void *buf_out, + uint bytes_off ) +{ + aout_filter_sys_t *p = p_filter->p_sys; + float *pout = buf_out; + float *pb = p->table_blend; + float *po = p->buf_overlap; + float *pin = (float *)( p->buf_queue + bytes_off ); + uint i; + for( i = 0; i < p->samples_overlap; i++ ) { + *pout++ = *po - *pb++ * ( *po - *pin++ ); po++; + } +} + +/***************************************************************************** + * fill_queue: fill p_sys->buf_queue as much possible, skipping samples as needed + *****************************************************************************/ +static size_t fill_queue( aout_filter_t *p_filter, + uint8_t *p_buffer, + size_t i_buffer, + size_t offset ) +{ + aout_filter_sys_t *p = p_filter->p_sys; + uint bytes_in = i_buffer - offset; + size_t offset_unchanged = offset; + + if( p->bytes_to_slide > 0 ) { + if( p->bytes_to_slide < p->bytes_queued ) { + uint bytes_in_move = p->bytes_queued - p->bytes_to_slide; + memmove( p->buf_queue, + p->buf_queue + p->bytes_to_slide, + bytes_in_move ); + p->bytes_to_slide = 0; + p->bytes_queued = bytes_in_move; + } else { + uint bytes_in_skip; + p->bytes_to_slide -= p->bytes_queued; + bytes_in_skip = __MIN( p->bytes_to_slide, bytes_in ); + p->bytes_queued = 0; + p->bytes_to_slide -= bytes_in_skip; + offset += bytes_in_skip; + bytes_in -= bytes_in_skip; + } + } + + if( bytes_in > 0 ) { + uint bytes_in_copy = __MIN( p->bytes_queue_max - p->bytes_queued, bytes_in ); + memcpy( p->buf_queue + p->bytes_queued, + p_buffer + offset, + bytes_in_copy ); + p->bytes_queued += bytes_in_copy; + offset += bytes_in_copy; + } + + return offset - offset_unchanged; +} + +/***************************************************************************** + * transform_buffer: main filter loop + *****************************************************************************/ +static size_t transform_buffer( aout_filter_t *p_filter, + uint8_t *p_buffer, + size_t i_buffer, + uint8_t *pout ) +{ + aout_filter_sys_t *p = p_filter->p_sys; + + size_t offset_in = fill_queue( p_filter, p_buffer, i_buffer, 0 ); + uint bytes_out = 0; + while( p->bytes_queued >= p->bytes_queue_max ) { + uint bytes_off = 0; + + // output stride + if( p->output_overlap ) { + if( p->best_overlap_offset ) { + bytes_off = p->best_overlap_offset( p_filter ); + } + p->output_overlap( p_filter, pout, bytes_off ); + } + memcpy( pout + p->bytes_overlap, + p->buf_queue + bytes_off + p->bytes_overlap, + p->bytes_standing ); + pout += p->bytes_stride; + bytes_out += p->bytes_stride; + + // input stride + memcpy( p->buf_overlap, + p->buf_queue + bytes_off + p->bytes_stride, + p->bytes_overlap ); + double frames_to_slide = p->frames_stride_scaled + p->frames_stride_error; + uint frames_to_stride_whole = (int)frames_to_slide; + p->bytes_to_slide = frames_to_stride_whole * p->bytes_per_frame; + p->frames_stride_error = frames_to_slide - frames_to_stride_whole; + + offset_in += fill_queue( p_filter, p_buffer, i_buffer, offset_in ); + } + + return bytes_out; +} + +/***************************************************************************** + * calculate_output_buffer_size + *****************************************************************************/ +static size_t calculate_output_buffer_size( aout_filter_t *p_filter, + size_t bytes_in ) +{ + aout_filter_sys_t *p = p_filter->p_sys; + size_t bytes_out = 0; + int bytes_to_out = bytes_in + p->bytes_queued - p->bytes_to_slide; + if( bytes_to_out >= (int)p->bytes_queue_max ) { + /* while (total_buffered - stride_length * n >= queue_max) n++ */ + bytes_out = p->bytes_stride * ( (uint)( + ( bytes_to_out - p->bytes_queue_max + /* rounding protection */ p->bytes_per_frame ) + / p->bytes_stride_scaled ) + 1 ); + } + return bytes_out; +} + +/***************************************************************************** + * reinit_buffers: reinitializes buffers in p_filter->p_sys + *****************************************************************************/ +static int reinit_buffers( aout_filter_t *p_filter ) +{ + aout_filter_sys_t *p = p_filter->p_sys; + uint i,j; + + uint frames_stride = p->ms_stride * p->sample_rate / 1000.0; + p->bytes_stride = frames_stride * p->bytes_per_frame; + + /* overlap */ + uint frames_overlap = frames_stride * p->percent_overlap; + if( frames_overlap < 1 ) { /* if no overlap */ + p->bytes_overlap = 0; + p->bytes_standing = p->bytes_stride; + p->samples_standing = p->bytes_standing / p->bytes_per_sample; + p->output_overlap = NULL; + } else { + uint prev_overlap = p->bytes_overlap; + p->bytes_overlap = frames_overlap * p->bytes_per_frame; + p->samples_overlap = frames_overlap * p->samples_per_frame; + p->bytes_standing = p->bytes_stride - p->bytes_overlap; + p->samples_standing = p->bytes_standing / p->bytes_per_sample; + p->buf_overlap = malloc( p->bytes_overlap ); + p->table_blend = malloc( p->samples_overlap * 4 ); /* sizeof (int32|float) */ + if( ! p->buf_overlap || ! p->table_blend ) { + return VLC_ENOMEM; + } + if( p->bytes_overlap > prev_overlap ) { + memset( (uint8_t *)p->buf_overlap + prev_overlap, 0, p->bytes_overlap - prev_overlap ); + } + float *pb = p->table_blend; + float t = (float)frames_overlap; + for( i = 0; isamples_per_frame; j++ ) { + *pb++ = v; + } + } + p->output_overlap = output_overlap_float; + } + + /* best overlap */ + p->frames_search = ( frames_overlap <= 1 ) ? 0 : p->ms_search * p->sample_rate / 1000.0; + if( p->frames_search < 1 ) { /* if no search */ + p->best_overlap_offset = NULL; + } else { + uint bytes_pre_corr = ( p->samples_overlap - p->samples_per_frame ) * 4; /* sizeof (int32|float) */ + p->buf_pre_corr = malloc( bytes_pre_corr ); + p->table_window = malloc( bytes_pre_corr ); + if( ! p->buf_pre_corr || ! p->table_window ) { + return VLC_ENOMEM; + } + float *pw = p->table_window; + for( i = 1; isamples_per_frame; j++ ) { + *pw++ = v; + } + } + p->best_overlap_offset = best_overlap_offset_float; + } + + uint new_size = ( p->frames_search + frames_stride + frames_overlap ) * p->bytes_per_frame; + if( p->bytes_queued > new_size ) { + if( p->bytes_to_slide > p->bytes_queued ) { + p->bytes_to_slide -= p->bytes_queued; + p->bytes_queued = 0; + } else { + uint new_queued = __MIN( p->bytes_queued - p->bytes_to_slide, new_size ); + memmove( p->buf_queue, + p->buf_queue + p->bytes_queued - new_queued, + new_queued ); + p->bytes_to_slide = 0; + p->bytes_queued = new_queued; + } + } + p->bytes_queue_max = new_size; + p->buf_queue = malloc( p->bytes_queue_max ); + if( ! p->buf_queue ) { + return VLC_ENOMEM; + } + + p->bytes_stride_scaled = p->bytes_stride * p->scale; + p->frames_stride_scaled = p->bytes_stride_scaled / p->bytes_per_frame; + + msg_Dbg( VLC_OBJECT(p_filter), + "%.3f scale, %.3f stride_in, %i stride_out, %i standing, %i overlap, %i search, %i queue, %s mode", + p->scale, + p->frames_stride_scaled, + (int)( p->bytes_stride / p->bytes_per_frame ), + (int)( p->bytes_standing / p->bytes_per_frame ), + (int)( p->bytes_overlap / p->bytes_per_frame ), + p->frames_search, + (int)( p->bytes_queue_max / p->bytes_per_frame ), + "fl32"); + + return VLC_SUCCESS; +} + +/***************************************************************************** + * Open: initialize as "audio filter" + *****************************************************************************/ +static int Open( vlc_object_t *p_this ) +{ + aout_filter_t *p_filter = (aout_filter_t *)p_this; + aout_filter_sys_t *p_sys; + bool b_fit = true; + + if( p_filter->input.i_format != VLC_FOURCC('f','l','3','2' ) || + p_filter->output.i_format != VLC_FOURCC('f','l','3','2') ) + { + b_fit = false; + p_filter->input.i_format = p_filter->output.i_format = VLC_FOURCC('f','l','3','2'); + msg_Warn( p_filter, "bad input or output format" ); + } + if( ! AOUT_FMTS_SIMILAR( &p_filter->input, &p_filter->output ) ) + { + b_fit = false; + memcpy( &p_filter->output, &p_filter->input, sizeof(audio_sample_format_t) ); + msg_Warn( p_filter, "input and output formats are not similar" ); + } + + if( ! b_fit ) + { + return VLC_EGENERIC; + } + + p_filter->pf_do_work = DoWork; + p_filter->b_in_place = false; + + /* Allocate structure */ + p_sys = p_filter->p_sys = malloc( sizeof(aout_filter_sys_t) ); + if( ! p_sys ) + { + return VLC_ENOMEM; + } + + p_sys->scale = 1.0; + p_sys->sample_rate = p_filter->input.i_rate; + p_sys->samples_per_frame = aout_FormatNbChannels( &p_filter->input ); + p_sys->bytes_per_sample = 4; + p_sys->bytes_per_frame = p_sys->samples_per_frame * p_sys->bytes_per_sample; + + msg_Dbg( p_this, "format: %5i rate, %i nch, %i bps, %s", + p_sys->sample_rate, + p_sys->samples_per_frame, + p_sys->bytes_per_sample, + "fl32" ); + + p_sys->ms_stride = config_GetInt( p_this, "scaletempo-stride" ); + p_sys->percent_overlap = config_GetFloat( p_this, "scaletempo-overlap" ); + p_sys->ms_search = config_GetInt( p_this, "scaletempo-search" ); + + msg_Dbg( p_this, "params: %i stride, %.3f overlap, %i search", + p_sys->ms_stride, p_sys->percent_overlap, p_sys->ms_search ); + + p_sys->i_buf = 0; + p_sys->p_buffers[0] = NULL; + p_sys->p_buffers[1] = NULL; + + p_sys->buf_queue = NULL; + p_sys->buf_overlap = NULL; + p_sys->table_blend = NULL; + p_sys->buf_pre_corr = NULL; + p_sys->table_window = NULL; + p_sys->bytes_overlap = 0; + p_sys->bytes_queued = 0; + p_sys->bytes_to_slide = 0; + p_sys->frames_stride_error = 0; + return reinit_buffers( p_filter ); +} + +static void Close( vlc_object_t *p_this ) +{ + aout_filter_t *p_filter = (aout_filter_t *)p_this; + aout_filter_sys_t *p_sys = p_filter->p_sys; + free( p_sys->buf_queue ); + free( p_sys->buf_overlap ); + free( p_sys->table_blend ); + free( p_sys->buf_pre_corr ); + free( p_sys->table_window ); + free( p_sys->p_buffers[0] ); + free( p_sys->p_buffers[1] ); + free( p_filter->p_sys ); +} + +/***************************************************************************** + * DoWork: aout_filter wrapper for transform_buffer + *****************************************************************************/ +static void DoWork( aout_instance_t * p_aout, aout_filter_t * p_filter, + aout_buffer_t * p_in_buf, aout_buffer_t * p_out_buf ) +{ + VLC_UNUSED(p_aout); + aout_filter_sys_t *p = p_filter->p_sys; + + if( p_filter->input.i_rate == p->sample_rate ) { + memcpy( p_out_buf->p_buffer, p_in_buf->p_buffer, p_in_buf->i_nb_bytes ); + p_out_buf->i_nb_bytes = p_in_buf->i_nb_bytes; + p_out_buf->i_nb_samples = p_in_buf->i_nb_samples; + return; + } + + double scale = p_filter->input.i_rate / (double)p->sample_rate; + if( scale != p->scale ) { + p->scale = scale; + p->bytes_stride_scaled = p->bytes_stride * p->scale; + p->frames_stride_scaled = p->bytes_stride_scaled / p->bytes_per_frame; + p->bytes_to_slide = 0; + msg_Dbg( p_filter, "%.3f scale, %.3f stride_in, %i stride_out", + p->scale, + p->frames_stride_scaled, + (int)( p->bytes_stride / p->bytes_per_frame ) ); + } + + size_t i_outsize = calculate_output_buffer_size ( p_filter, p_in_buf->i_nb_bytes ); + if( i_outsize > p_out_buf->i_size ) { + void *temp = realloc( p->p_buffers[ p->i_buf ], i_outsize ); + if( temp == NULL ) + { + return; + } + p->p_buffers[ p->i_buf ] = temp; + p_out_buf->p_buffer = p->p_buffers[ p->i_buf ]; + p->i_buf = ! p->i_buf; + } + + size_t bytes_out = transform_buffer( p_filter, + p_in_buf->p_buffer, p_in_buf->i_nb_bytes, + p_out_buf->p_buffer ); + + p_out_buf->i_nb_bytes = bytes_out; + p_out_buf->i_nb_samples = bytes_out / p->bytes_per_frame; +}