1 /*****************************************************************************
2 * scaletempo.c: Scale audio tempo while maintaining pitch
3 *****************************************************************************
4 * Copyright © 2008 the VideoLAN team
7 * Authors: Rov Juvano <rovjuvano@users.sourceforge.net>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 /*****************************************************************************
26 *****************************************************************************/
31 #include <vlc_common.h>
32 #include <vlc_plugin.h>
35 #include <string.h> /* for memset */
36 #include <limits.h> /* form INT_MIN */
38 /*****************************************************************************
40 *****************************************************************************/
41 static int Open( vlc_object_t * );
42 static void Close( vlc_object_t * );
43 static void DoWork( aout_instance_t *, aout_filter_t *,
44 aout_buffer_t *, aout_buffer_t * );
47 set_description( N_("Scale audio tempo in sync with playback rate") );
48 set_shortname( N_("Scaletempo") );
49 set_capability( "audio filter", 0 );
50 set_category( CAT_AUDIO );
51 set_subcategory( SUBCAT_AUDIO_AFILTER );
53 add_integer_with_range( "scaletempo-stride", 30, 1, 2000, NULL,
54 N_("Stride Length"), N_("Length in milliseconds to output each stride"), true );
55 add_float_with_range( "scaletempo-overlap", .20, 0.0, 1.0, NULL,
56 N_("Overlap Length"), N_("Percentage of stride to overlap"), true );
57 add_integer_with_range( "scaletempo-search", 14, 0, 200, NULL,
58 N_("Search Length"), N_("Length in milliseconds to search for best overlap position"), true );
60 set_callbacks( Open, Close );
64 * Scaletempo works by producing audio in constant sized chunks (a "stride") but
65 * consuming chunks proportional to the playback rate.
67 * Scaletempo then smooths the output by blending the end of one stride with
68 * the next ("overlap").
70 * Scaletempo smooths the overlap further by searching within the input buffer
71 * for the best overlap position. Scaletempo uses a statistical cross correlation
72 * (roughly a dot-product). Scaletempo consumes most of its CPU cycles here.
75 * sample: a single audio sample for one channel
76 * frame: a single set of samples, one for each channel
77 * VLC uses these terms differently
79 typedef struct aout_filter_sys_t
81 /* Filter static config */
85 double percent_overlap;
88 unsigned samples_per_frame; /* AKA number of channels */
89 unsigned bytes_per_sample;
90 unsigned bytes_per_frame;
93 double frames_stride_scaled;
94 double frames_stride_error;
95 unsigned bytes_stride;
96 double bytes_stride_scaled;
97 unsigned bytes_queue_max;
98 unsigned bytes_queued;
99 unsigned bytes_to_slide;
102 unsigned samples_overlap;
103 unsigned samples_standing;
104 unsigned bytes_overlap;
105 unsigned bytes_standing;
108 void (*output_overlap)( aout_filter_t *p_filter, void *p_out_buf, unsigned bytes_off );
110 unsigned frames_search;
113 unsigned(*best_overlap_offset)( aout_filter_t *p_filter );
114 /* for "audio filter" only, manage own buffers */
116 uint8_t *p_buffers[2];
119 /*****************************************************************************
120 * best_overlap_offset: calculate best offset for overlap
121 *****************************************************************************/
122 static unsigned best_overlap_offset_float( aout_filter_t *p_filter )
124 aout_filter_sys_t *p = p_filter->p_sys;
125 float *pw, *po, *ppc, *search_start;
126 float best_corr = INT_MIN;
127 unsigned best_off = 0;
130 pw = p->table_window;
132 po += p->samples_per_frame;
133 ppc = p->buf_pre_corr;
134 for( i = p->samples_per_frame; i < p->samples_overlap; i++ ) {
135 *ppc++ = *pw++ * *po++;
138 search_start = (float *)p->buf_queue + p->samples_per_frame;
139 for( off = 0; off < p->frames_search; off++ ) {
141 float *ps = search_start;
142 ppc = p->buf_pre_corr;
143 for( i = p->samples_per_frame; i < p->samples_overlap; i++ ) {
144 corr += *ppc++ * *ps++;
146 if( corr > best_corr ) {
150 search_start += p->samples_per_frame;
153 return best_off * p->bytes_per_frame;
156 /*****************************************************************************
157 * output_overlap: blend end of previous stride with beginning of current stride
158 *****************************************************************************/
159 static void output_overlap_float( aout_filter_t *p_filter,
163 aout_filter_sys_t *p = p_filter->p_sys;
164 float *pout = buf_out;
165 float *pb = p->table_blend;
166 float *po = p->buf_overlap;
167 float *pin = (float *)( p->buf_queue + bytes_off );
169 for( i = 0; i < p->samples_overlap; i++ ) {
170 *pout++ = *po - *pb++ * ( *po - *pin++ ); po++;
174 /*****************************************************************************
175 * fill_queue: fill p_sys->buf_queue as much possible, skipping samples as needed
176 *****************************************************************************/
177 static size_t fill_queue( aout_filter_t *p_filter,
182 aout_filter_sys_t *p = p_filter->p_sys;
183 unsigned bytes_in = i_buffer - offset;
184 size_t offset_unchanged = offset;
186 if( p->bytes_to_slide > 0 ) {
187 if( p->bytes_to_slide < p->bytes_queued ) {
188 unsigned bytes_in_move = p->bytes_queued - p->bytes_to_slide;
189 memmove( p->buf_queue,
190 p->buf_queue + p->bytes_to_slide,
192 p->bytes_to_slide = 0;
193 p->bytes_queued = bytes_in_move;
195 unsigned bytes_in_skip;
196 p->bytes_to_slide -= p->bytes_queued;
197 bytes_in_skip = __MIN( p->bytes_to_slide, bytes_in );
199 p->bytes_to_slide -= bytes_in_skip;
200 offset += bytes_in_skip;
201 bytes_in -= bytes_in_skip;
206 unsigned bytes_in_copy = __MIN( p->bytes_queue_max - p->bytes_queued, bytes_in );
207 memcpy( p->buf_queue + p->bytes_queued,
210 p->bytes_queued += bytes_in_copy;
211 offset += bytes_in_copy;
214 return offset - offset_unchanged;
217 /*****************************************************************************
218 * transform_buffer: main filter loop
219 *****************************************************************************/
220 static size_t transform_buffer( aout_filter_t *p_filter,
225 aout_filter_sys_t *p = p_filter->p_sys;
227 size_t offset_in = fill_queue( p_filter, p_buffer, i_buffer, 0 );
228 unsigned bytes_out = 0;
229 while( p->bytes_queued >= p->bytes_queue_max ) {
230 unsigned bytes_off = 0;
233 if( p->output_overlap ) {
234 if( p->best_overlap_offset ) {
235 bytes_off = p->best_overlap_offset( p_filter );
237 p->output_overlap( p_filter, pout, bytes_off );
239 memcpy( pout + p->bytes_overlap,
240 p->buf_queue + bytes_off + p->bytes_overlap,
242 pout += p->bytes_stride;
243 bytes_out += p->bytes_stride;
246 memcpy( p->buf_overlap,
247 p->buf_queue + bytes_off + p->bytes_stride,
249 double frames_to_slide = p->frames_stride_scaled + p->frames_stride_error;
250 unsigned frames_to_stride_whole = (int)frames_to_slide;
251 p->bytes_to_slide = frames_to_stride_whole * p->bytes_per_frame;
252 p->frames_stride_error = frames_to_slide - frames_to_stride_whole;
254 offset_in += fill_queue( p_filter, p_buffer, i_buffer, offset_in );
260 /*****************************************************************************
261 * calculate_output_buffer_size
262 *****************************************************************************/
263 static size_t calculate_output_buffer_size( aout_filter_t *p_filter,
266 aout_filter_sys_t *p = p_filter->p_sys;
267 size_t bytes_out = 0;
268 int bytes_to_out = bytes_in + p->bytes_queued - p->bytes_to_slide;
269 if( bytes_to_out >= (int)p->bytes_queue_max ) {
270 /* while (total_buffered - stride_length * n >= queue_max) n++ */
271 bytes_out = p->bytes_stride * ( (unsigned)(
272 ( bytes_to_out - p->bytes_queue_max + /* rounding protection */ p->bytes_per_frame )
273 / p->bytes_stride_scaled ) + 1 );
278 /*****************************************************************************
279 * reinit_buffers: reinitializes buffers in p_filter->p_sys
280 *****************************************************************************/
281 static int reinit_buffers( aout_filter_t *p_filter )
283 aout_filter_sys_t *p = p_filter->p_sys;
286 unsigned frames_stride = p->ms_stride * p->sample_rate / 1000.0;
287 p->bytes_stride = frames_stride * p->bytes_per_frame;
290 unsigned frames_overlap = frames_stride * p->percent_overlap;
291 if( frames_overlap < 1 ) { /* if no overlap */
292 p->bytes_overlap = 0;
293 p->bytes_standing = p->bytes_stride;
294 p->samples_standing = p->bytes_standing / p->bytes_per_sample;
295 p->output_overlap = NULL;
297 unsigned prev_overlap = p->bytes_overlap;
298 p->bytes_overlap = frames_overlap * p->bytes_per_frame;
299 p->samples_overlap = frames_overlap * p->samples_per_frame;
300 p->bytes_standing = p->bytes_stride - p->bytes_overlap;
301 p->samples_standing = p->bytes_standing / p->bytes_per_sample;
302 p->buf_overlap = malloc( p->bytes_overlap );
303 p->table_blend = malloc( p->samples_overlap * 4 ); /* sizeof (int32|float) */
304 if( ! p->buf_overlap || ! p->table_blend ) {
307 if( p->bytes_overlap > prev_overlap ) {
308 memset( (uint8_t *)p->buf_overlap + prev_overlap, 0, p->bytes_overlap - prev_overlap );
310 float *pb = p->table_blend;
311 float t = (float)frames_overlap;
312 for( i = 0; i<frames_overlap; i++ ) {
314 for( j = 0; j < p->samples_per_frame; j++ ) {
318 p->output_overlap = output_overlap_float;
322 p->frames_search = ( frames_overlap <= 1 ) ? 0 : p->ms_search * p->sample_rate / 1000.0;
323 if( p->frames_search < 1 ) { /* if no search */
324 p->best_overlap_offset = NULL;
326 unsigned bytes_pre_corr = ( p->samples_overlap - p->samples_per_frame ) * 4; /* sizeof (int32|float) */
327 p->buf_pre_corr = malloc( bytes_pre_corr );
328 p->table_window = malloc( bytes_pre_corr );
329 if( ! p->buf_pre_corr || ! p->table_window ) {
332 float *pw = p->table_window;
333 for( i = 1; i<frames_overlap; i++ ) {
334 float v = i * ( frames_overlap - i );
335 for( j = 0; j < p->samples_per_frame; j++ ) {
339 p->best_overlap_offset = best_overlap_offset_float;
342 unsigned new_size = ( p->frames_search + frames_stride + frames_overlap ) * p->bytes_per_frame;
343 if( p->bytes_queued > new_size ) {
344 if( p->bytes_to_slide > p->bytes_queued ) {
345 p->bytes_to_slide -= p->bytes_queued;
348 unsigned new_queued = __MIN( p->bytes_queued - p->bytes_to_slide, new_size );
349 memmove( p->buf_queue,
350 p->buf_queue + p->bytes_queued - new_queued,
352 p->bytes_to_slide = 0;
353 p->bytes_queued = new_queued;
356 p->bytes_queue_max = new_size;
357 p->buf_queue = malloc( p->bytes_queue_max );
358 if( ! p->buf_queue ) {
362 p->bytes_stride_scaled = p->bytes_stride * p->scale;
363 p->frames_stride_scaled = p->bytes_stride_scaled / p->bytes_per_frame;
365 msg_Dbg( VLC_OBJECT(p_filter),
366 "%.3f scale, %.3f stride_in, %i stride_out, %i standing, %i overlap, %i search, %i queue, %s mode",
368 p->frames_stride_scaled,
369 (int)( p->bytes_stride / p->bytes_per_frame ),
370 (int)( p->bytes_standing / p->bytes_per_frame ),
371 (int)( p->bytes_overlap / p->bytes_per_frame ),
373 (int)( p->bytes_queue_max / p->bytes_per_frame ),
379 /*****************************************************************************
380 * Open: initialize as "audio filter"
381 *****************************************************************************/
382 static int Open( vlc_object_t *p_this )
384 aout_filter_t *p_filter = (aout_filter_t *)p_this;
385 aout_filter_sys_t *p_sys;
388 if( p_filter->input.i_format != VLC_FOURCC('f','l','3','2' ) ||
389 p_filter->output.i_format != VLC_FOURCC('f','l','3','2') )
392 p_filter->input.i_format = p_filter->output.i_format = VLC_FOURCC('f','l','3','2');
393 msg_Warn( p_filter, "bad input or output format" );
395 if( ! AOUT_FMTS_SIMILAR( &p_filter->input, &p_filter->output ) )
398 memcpy( &p_filter->output, &p_filter->input, sizeof(audio_sample_format_t) );
399 msg_Warn( p_filter, "input and output formats are not similar" );
407 p_filter->pf_do_work = DoWork;
408 p_filter->b_in_place = false;
410 /* Allocate structure */
411 p_sys = p_filter->p_sys = malloc( sizeof(aout_filter_sys_t) );
418 p_sys->sample_rate = p_filter->input.i_rate;
419 p_sys->samples_per_frame = aout_FormatNbChannels( &p_filter->input );
420 p_sys->bytes_per_sample = 4;
421 p_sys->bytes_per_frame = p_sys->samples_per_frame * p_sys->bytes_per_sample;
423 msg_Dbg( p_this, "format: %5i rate, %i nch, %i bps, %s",
425 p_sys->samples_per_frame,
426 p_sys->bytes_per_sample,
429 p_sys->ms_stride = config_GetInt( p_this, "scaletempo-stride" );
430 p_sys->percent_overlap = config_GetFloat( p_this, "scaletempo-overlap" );
431 p_sys->ms_search = config_GetInt( p_this, "scaletempo-search" );
433 msg_Dbg( p_this, "params: %i stride, %.3f overlap, %i search",
434 p_sys->ms_stride, p_sys->percent_overlap, p_sys->ms_search );
437 p_sys->p_buffers[0] = NULL;
438 p_sys->p_buffers[1] = NULL;
440 p_sys->buf_queue = NULL;
441 p_sys->buf_overlap = NULL;
442 p_sys->table_blend = NULL;
443 p_sys->buf_pre_corr = NULL;
444 p_sys->table_window = NULL;
445 p_sys->bytes_overlap = 0;
446 p_sys->bytes_queued = 0;
447 p_sys->bytes_to_slide = 0;
448 p_sys->frames_stride_error = 0;
449 return reinit_buffers( p_filter );
452 static void Close( vlc_object_t *p_this )
454 aout_filter_t *p_filter = (aout_filter_t *)p_this;
455 aout_filter_sys_t *p_sys = p_filter->p_sys;
456 free( p_sys->buf_queue );
457 free( p_sys->buf_overlap );
458 free( p_sys->table_blend );
459 free( p_sys->buf_pre_corr );
460 free( p_sys->table_window );
461 free( p_sys->p_buffers[0] );
462 free( p_sys->p_buffers[1] );
463 free( p_filter->p_sys );
466 /*****************************************************************************
467 * DoWork: aout_filter wrapper for transform_buffer
468 *****************************************************************************/
469 static void DoWork( aout_instance_t * p_aout, aout_filter_t * p_filter,
470 aout_buffer_t * p_in_buf, aout_buffer_t * p_out_buf )
473 aout_filter_sys_t *p = p_filter->p_sys;
475 if( p_filter->input.i_rate == p->sample_rate ) {
476 memcpy( p_out_buf->p_buffer, p_in_buf->p_buffer, p_in_buf->i_nb_bytes );
477 p_out_buf->i_nb_bytes = p_in_buf->i_nb_bytes;
478 p_out_buf->i_nb_samples = p_in_buf->i_nb_samples;
482 double scale = p_filter->input.i_rate / (double)p->sample_rate;
483 if( scale != p->scale ) {
485 p->bytes_stride_scaled = p->bytes_stride * p->scale;
486 p->frames_stride_scaled = p->bytes_stride_scaled / p->bytes_per_frame;
487 p->bytes_to_slide = 0;
488 msg_Dbg( p_filter, "%.3f scale, %.3f stride_in, %i stride_out",
490 p->frames_stride_scaled,
491 (int)( p->bytes_stride / p->bytes_per_frame ) );
494 size_t i_outsize = calculate_output_buffer_size ( p_filter, p_in_buf->i_nb_bytes );
495 if( i_outsize > p_out_buf->i_size ) {
496 void *temp = realloc( p->p_buffers[ p->i_buf ], i_outsize );
501 p->p_buffers[ p->i_buf ] = temp;
502 p_out_buf->p_buffer = p->p_buffers[ p->i_buf ];
503 p->i_buf = ! p->i_buf;
506 size_t bytes_out = transform_buffer( p_filter,
507 p_in_buf->p_buffer, p_in_buf->i_nb_bytes,
508 p_out_buf->p_buffer );
510 p_out_buf->i_nb_bytes = bytes_out;
511 p_out_buf->i_nb_samples = bytes_out / p->bytes_per_frame;