]> git.sesse.net Git - vlc/blob - modules/audio_filter/scaletempo.c
aout_filter_t.(in|out)put -> aout_filter_t.fmt_(in|out).audio
[vlc] / modules / audio_filter / scaletempo.c
1 /*****************************************************************************
2  * scaletempo.c: Scale audio tempo while maintaining pitch
3  *****************************************************************************
4  * Copyright © 2008 the VideoLAN team
5  * $Id$
6  *
7  * Authors: Rov Juvano <rovjuvano@users.sourceforge.net>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22  *****************************************************************************/
23
24 /*****************************************************************************
25  * Preamble
26  *****************************************************************************/
27 #ifdef HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30
31 #include <vlc_common.h>
32 #include <vlc_plugin.h>
33 #include <vlc_aout.h>
34
35 #include <string.h> /* for memset */
36 #include <limits.h> /* form INT_MIN */
37
38 /*****************************************************************************
39  * Module descriptor
40  *****************************************************************************/
41 static int  Open( vlc_object_t * );
42 static void Close( vlc_object_t * );
43 static void DoWork( aout_instance_t *, aout_filter_t *,
44                     aout_buffer_t *, aout_buffer_t * );
45
46 vlc_module_begin ()
47     set_description( N_("Audio tempo scaler synched with rate") )
48     set_shortname( N_("Scaletempo") )
49     set_capability( "audio filter", 0 )
50     set_category( CAT_AUDIO )
51     set_subcategory( SUBCAT_AUDIO_AFILTER )
52
53     add_integer_with_range( "scaletempo-stride", 30, 1, 2000, NULL,
54         N_("Stride Length"), N_("Length in milliseconds to output each stride"), true )
55     add_float_with_range( "scaletempo-overlap", .20, 0.0, 1.0, NULL,
56         N_("Overlap Length"), N_("Percentage of stride to overlap"), true )
57     add_integer_with_range( "scaletempo-search", 14, 0, 200, NULL,
58         N_("Search Length"), N_("Length in milliseconds to search for best overlap position"), true )
59
60     set_callbacks( Open, Close )
61 vlc_module_end ()
62
63 /*
64  * Scaletempo works by producing audio in constant sized chunks (a "stride") but
65  * consuming chunks proportional to the playback rate.
66  *
67  * Scaletempo then smooths the output by blending the end of one stride with
68  * the next ("overlap").
69  *
70  * Scaletempo smooths the overlap further by searching within the input buffer
71  * for the best overlap position.  Scaletempo uses a statistical cross correlation
72  * (roughly a dot-product).  Scaletempo consumes most of its CPU cycles here.
73  *
74  * NOTE:
75  * sample: a single audio sample for one channel
76  * frame: a single set of samples, one for each channel
77  * VLC uses these terms differently
78  */
79 struct aout_filter_sys_t
80 {
81     /* Filter static config */
82     double    scale;
83     /* parameters */
84     unsigned  ms_stride;
85     double    percent_overlap;
86     unsigned  ms_search;
87     /* audio format */
88     unsigned  samples_per_frame;  /* AKA number of channels */
89     unsigned  bytes_per_sample;
90     unsigned  bytes_per_frame;
91     unsigned  sample_rate;
92     /* stride */
93     double    frames_stride_scaled;
94     double    frames_stride_error;
95     unsigned  bytes_stride;
96     double    bytes_stride_scaled;
97     unsigned  bytes_queue_max;
98     unsigned  bytes_queued;
99     unsigned  bytes_to_slide;
100     uint8_t  *buf_queue;
101     /* overlap */
102     unsigned  samples_overlap;
103     unsigned  samples_standing;
104     unsigned  bytes_overlap;
105     unsigned  bytes_standing;
106     void     *buf_overlap;
107     void     *table_blend;
108     void    (*output_overlap)( aout_filter_t *p_filter, void *p_out_buf, unsigned bytes_off );
109     /* best overlap */
110     unsigned  frames_search;
111     void     *buf_pre_corr;
112     void     *table_window;
113     unsigned(*best_overlap_offset)( aout_filter_t *p_filter );
114     /* for "audio filter" only, manage own buffers */
115     int       i_buf;
116     uint8_t  *p_buffers[2];
117 };
118
119 /*****************************************************************************
120  * best_overlap_offset: calculate best offset for overlap
121  *****************************************************************************/
122 static unsigned best_overlap_offset_float( aout_filter_t *p_filter )
123 {
124     aout_filter_sys_t *p = p_filter->p_sys;
125     float *pw, *po, *ppc, *search_start;
126     float best_corr = INT_MIN;
127     unsigned best_off = 0;
128     unsigned i, off;
129
130     pw  = p->table_window;
131     po  = p->buf_overlap;
132     po += p->samples_per_frame;
133     ppc = p->buf_pre_corr;
134     for( i = p->samples_per_frame; i < p->samples_overlap; i++ ) {
135       *ppc++ = *pw++ * *po++;
136     }
137
138     search_start = (float *)p->buf_queue + p->samples_per_frame;
139     for( off = 0; off < p->frames_search; off++ ) {
140       float corr = 0;
141       float *ps = search_start;
142       ppc = p->buf_pre_corr;
143       for( i = p->samples_per_frame; i < p->samples_overlap; i++ ) {
144         corr += *ppc++ * *ps++;
145       }
146       if( corr > best_corr ) {
147         best_corr = corr;
148         best_off  = off;
149       }
150       search_start += p->samples_per_frame;
151     }
152
153     return best_off * p->bytes_per_frame;
154 }
155
156 /*****************************************************************************
157  * output_overlap: blend end of previous stride with beginning of current stride
158  *****************************************************************************/
159 static void output_overlap_float( aout_filter_t   *p_filter,
160                                   void            *buf_out,
161                                   unsigned         bytes_off )
162 {
163     aout_filter_sys_t *p = p_filter->p_sys;
164     float *pout = buf_out;
165     float *pb   = p->table_blend;
166     float *po   = p->buf_overlap;
167     float *pin  = (float *)( p->buf_queue + bytes_off );
168     unsigned i;
169     for( i = 0; i < p->samples_overlap; i++ ) {
170         *pout++ = *po - *pb++ * ( *po - *pin++ ); po++;
171     }
172 }
173
174 /*****************************************************************************
175  * fill_queue: fill p_sys->buf_queue as much possible, skipping samples as needed
176  *****************************************************************************/
177 static size_t fill_queue( aout_filter_t *p_filter,
178                           uint8_t       *p_buffer,
179                           size_t         i_buffer,
180                           size_t         offset )
181 {
182     aout_filter_sys_t *p = p_filter->p_sys;
183     unsigned bytes_in = i_buffer - offset;
184     size_t offset_unchanged = offset;
185
186     if( p->bytes_to_slide > 0 ) {
187         if( p->bytes_to_slide < p->bytes_queued ) {
188             unsigned bytes_in_move = p->bytes_queued - p->bytes_to_slide;
189             memmove( p->buf_queue,
190                      p->buf_queue + p->bytes_to_slide,
191                      bytes_in_move );
192             p->bytes_to_slide = 0;
193             p->bytes_queued   = bytes_in_move;
194         } else {
195             unsigned bytes_in_skip;
196             p->bytes_to_slide -= p->bytes_queued;
197             bytes_in_skip      = __MIN( p->bytes_to_slide, bytes_in );
198             p->bytes_queued    = 0;
199             p->bytes_to_slide -= bytes_in_skip;
200             offset            += bytes_in_skip;
201             bytes_in          -= bytes_in_skip;
202         }
203     }
204
205     if( bytes_in > 0 ) {
206         unsigned bytes_in_copy = __MIN( p->bytes_queue_max - p->bytes_queued, bytes_in );
207         memcpy( p->buf_queue + p->bytes_queued,
208                 p_buffer + offset,
209                 bytes_in_copy );
210         p->bytes_queued += bytes_in_copy;
211         offset          += bytes_in_copy;
212     }
213
214     return offset - offset_unchanged;
215 }
216
217 /*****************************************************************************
218  * transform_buffer: main filter loop
219  *****************************************************************************/
220 static size_t transform_buffer( aout_filter_t   *p_filter,
221                                 uint8_t         *p_buffer,
222                                 size_t           i_buffer,
223                                 uint8_t         *pout )
224 {
225     aout_filter_sys_t *p = p_filter->p_sys;
226
227     size_t offset_in = fill_queue( p_filter, p_buffer, i_buffer, 0 );
228     unsigned bytes_out = 0;
229     while( p->bytes_queued >= p->bytes_queue_max ) {
230         unsigned bytes_off = 0;
231
232         // output stride
233         if( p->output_overlap ) {
234             if( p->best_overlap_offset ) {
235                 bytes_off = p->best_overlap_offset( p_filter );
236             }
237             p->output_overlap( p_filter, pout, bytes_off );
238         }
239         memcpy( pout + p->bytes_overlap,
240                 p->buf_queue + bytes_off + p->bytes_overlap,
241                 p->bytes_standing );
242         pout += p->bytes_stride;
243         bytes_out += p->bytes_stride;
244
245         // input stride
246         memcpy( p->buf_overlap,
247                 p->buf_queue + bytes_off + p->bytes_stride,
248                 p->bytes_overlap );
249         double frames_to_slide = p->frames_stride_scaled + p->frames_stride_error;
250         unsigned frames_to_stride_whole = (int)frames_to_slide;
251         p->bytes_to_slide       = frames_to_stride_whole * p->bytes_per_frame;
252         p->frames_stride_error  = frames_to_slide - frames_to_stride_whole;
253
254         offset_in += fill_queue( p_filter, p_buffer, i_buffer, offset_in );
255     }
256
257     return bytes_out;
258 }
259
260 /*****************************************************************************
261  * calculate_output_buffer_size
262  *****************************************************************************/
263 static size_t calculate_output_buffer_size( aout_filter_t   *p_filter,
264                                             size_t           bytes_in )
265 {
266     aout_filter_sys_t *p = p_filter->p_sys;
267     size_t bytes_out = 0;
268     int bytes_to_out = bytes_in + p->bytes_queued - p->bytes_to_slide;
269     if( bytes_to_out >= (int)p->bytes_queue_max ) {
270       /* while (total_buffered - stride_length * n >= queue_max) n++ */
271       bytes_out = p->bytes_stride * ( (unsigned)(
272           ( bytes_to_out - p->bytes_queue_max + /* rounding protection */ p->bytes_per_frame )
273           / p->bytes_stride_scaled ) + 1 );
274     }
275     return bytes_out;
276 }
277
278 /*****************************************************************************
279  * reinit_buffers: reinitializes buffers in p_filter->p_sys
280  *****************************************************************************/
281 static int reinit_buffers( aout_filter_t *p_filter )
282 {
283     aout_filter_sys_t *p = p_filter->p_sys;
284     unsigned i,j;
285
286     unsigned frames_stride = p->ms_stride * p->sample_rate / 1000.0;
287     p->bytes_stride = frames_stride * p->bytes_per_frame;
288
289     /* overlap */
290     unsigned frames_overlap = frames_stride * p->percent_overlap;
291     if( frames_overlap < 1 )
292     { /* if no overlap */
293         p->bytes_overlap    = 0;
294         p->bytes_standing   = p->bytes_stride;
295         p->samples_standing = p->bytes_standing / p->bytes_per_sample;
296         p->output_overlap   = NULL;
297     }
298     else
299     {
300         unsigned prev_overlap   = p->bytes_overlap;
301         p->bytes_overlap    = frames_overlap * p->bytes_per_frame;
302         p->samples_overlap  = frames_overlap * p->samples_per_frame;
303         p->bytes_standing   = p->bytes_stride - p->bytes_overlap;
304         p->samples_standing = p->bytes_standing / p->bytes_per_sample;
305         p->buf_overlap      = malloc( p->bytes_overlap );
306         p->table_blend      = malloc( p->samples_overlap * 4 ); /* sizeof (int32|float) */
307         if( !p->buf_overlap || !p->table_blend )
308             return VLC_ENOMEM;
309         if( p->bytes_overlap > prev_overlap )
310             memset( (uint8_t *)p->buf_overlap + prev_overlap, 0, p->bytes_overlap - prev_overlap );
311
312         float *pb = p->table_blend;
313         float t = (float)frames_overlap;
314         for( i = 0; i<frames_overlap; i++ )
315         {
316             float v = i / t;
317             for( j = 0; j < p->samples_per_frame; j++ )
318                 *pb++ = v;
319         }
320         p->output_overlap = output_overlap_float;
321     }
322
323     /* best overlap */
324     p->frames_search = ( frames_overlap <= 1 ) ? 0 : p->ms_search * p->sample_rate / 1000.0;
325     if( p->frames_search < 1 )
326     { /* if no search */
327         p->best_overlap_offset = NULL;
328     }
329     else
330     {
331         unsigned bytes_pre_corr = ( p->samples_overlap - p->samples_per_frame ) * 4; /* sizeof (int32|float) */
332         p->buf_pre_corr = malloc( bytes_pre_corr );
333         p->table_window = malloc( bytes_pre_corr );
334         if( ! p->buf_pre_corr || ! p->table_window )
335             return VLC_ENOMEM;
336         float *pw = p->table_window;
337         for( i = 1; i<frames_overlap; i++ )
338         {
339             float v = i * ( frames_overlap - i );
340             for( j = 0; j < p->samples_per_frame; j++ )
341                 *pw++ = v;
342         }
343         p->best_overlap_offset = best_overlap_offset_float;
344     }
345
346     unsigned new_size = ( p->frames_search + frames_stride + frames_overlap ) * p->bytes_per_frame;
347     if( p->bytes_queued > new_size )
348     {
349         if( p->bytes_to_slide > p->bytes_queued )
350         {
351           p->bytes_to_slide -= p->bytes_queued;
352           p->bytes_queued    = 0;
353         }
354         else
355         {
356             unsigned new_queued = __MIN( p->bytes_queued - p->bytes_to_slide, new_size );
357             memmove( p->buf_queue,
358                      p->buf_queue + p->bytes_queued - new_queued,
359                      new_queued );
360             p->bytes_to_slide = 0;
361             p->bytes_queued   = new_queued;
362         }
363     }
364     p->bytes_queue_max = new_size;
365     p->buf_queue = malloc( p->bytes_queue_max );
366     if( ! p->buf_queue )
367         return VLC_ENOMEM;
368
369     p->bytes_stride_scaled  = p->bytes_stride * p->scale;
370     p->frames_stride_scaled = p->bytes_stride_scaled / p->bytes_per_frame;
371
372     msg_Dbg( VLC_OBJECT(p_filter),
373              "%.3f scale, %.3f stride_in, %i stride_out, %i standing, %i overlap, %i search, %i queue, %s mode",
374              p->scale,
375              p->frames_stride_scaled,
376              (int)( p->bytes_stride / p->bytes_per_frame ),
377              (int)( p->bytes_standing / p->bytes_per_frame ),
378              (int)( p->bytes_overlap / p->bytes_per_frame ),
379              p->frames_search,
380              (int)( p->bytes_queue_max / p->bytes_per_frame ),
381              "fl32");
382
383     return VLC_SUCCESS;
384 }
385
386 /*****************************************************************************
387  * Open: initialize as "audio filter"
388  *****************************************************************************/
389 static int Open( vlc_object_t *p_this )
390 {
391     aout_filter_t     *p_filter = (aout_filter_t *)p_this;
392     aout_filter_sys_t *p_sys;
393     bool b_fit = true;
394
395     if( p_filter->fmt_in.audio.i_format != VLC_CODEC_FL32 ||
396         p_filter->fmt_out.audio.i_format != VLC_CODEC_FL32 )
397     {
398         b_fit = false;
399         p_filter->fmt_in.audio.i_format = p_filter->fmt_out.audio.i_format = VLC_CODEC_FL32;
400         msg_Warn( p_filter, "bad input or output format" );
401     }
402     if( ! AOUT_FMTS_SIMILAR( &p_filter->fmt_in.audio, &p_filter->fmt_out.audio ) )
403     {
404         b_fit = false;
405         memcpy( &p_filter->fmt_out.audio, &p_filter->fmt_in.audio, sizeof(audio_sample_format_t) );
406         msg_Warn( p_filter, "input and output formats are not similar" );
407     }
408
409     if( ! b_fit )
410         return VLC_EGENERIC;
411
412     p_filter->pf_do_work = DoWork;
413     p_filter->b_in_place = false;
414
415     /* Allocate structure */
416     p_sys = p_filter->p_sys = malloc( sizeof(aout_filter_sys_t) );
417     if( ! p_sys )
418         return VLC_ENOMEM;
419
420     p_sys->scale             = 1.0;
421     p_sys->sample_rate       = p_filter->fmt_in.audio.i_rate;
422     p_sys->samples_per_frame = aout_FormatNbChannels( &p_filter->fmt_in.audio );
423     p_sys->bytes_per_sample  = 4;
424     p_sys->bytes_per_frame   = p_sys->samples_per_frame * p_sys->bytes_per_sample;
425
426     msg_Dbg( p_this, "format: %5i rate, %i nch, %i bps, %s",
427              p_sys->sample_rate,
428              p_sys->samples_per_frame,
429              p_sys->bytes_per_sample,
430              "fl32" );
431
432     p_sys->ms_stride       = config_GetInt(   p_this, "scaletempo-stride" );
433     p_sys->percent_overlap = config_GetFloat( p_this, "scaletempo-overlap" );
434     p_sys->ms_search       = config_GetInt(   p_this, "scaletempo-search" );
435
436     msg_Dbg( p_this, "params: %i stride, %.3f overlap, %i search",
437              p_sys->ms_stride, p_sys->percent_overlap, p_sys->ms_search );
438
439     p_sys->i_buf = 0;
440     p_sys->p_buffers[0] = NULL;
441     p_sys->p_buffers[1] = NULL;
442
443     p_sys->buf_queue      = NULL;
444     p_sys->buf_overlap    = NULL;
445     p_sys->table_blend    = NULL;
446     p_sys->buf_pre_corr   = NULL;
447     p_sys->table_window   = NULL;
448     p_sys->bytes_overlap  = 0;
449     p_sys->bytes_queued   = 0;
450     p_sys->bytes_to_slide = 0;
451     p_sys->frames_stride_error = 0;
452
453     if( reinit_buffers( p_filter ) != VLC_SUCCESS )
454     {
455         Close( p_this );
456         return VLC_EGENERIC;
457     }
458     return VLC_SUCCESS;
459 }
460
461 static void Close( vlc_object_t *p_this )
462 {
463     aout_filter_t *p_filter = (aout_filter_t *)p_this;
464     aout_filter_sys_t *p_sys = p_filter->p_sys;
465     free( p_sys->buf_queue );
466     free( p_sys->buf_overlap );
467     free( p_sys->table_blend );
468     free( p_sys->buf_pre_corr );
469     free( p_sys->table_window );
470     free( p_sys->p_buffers[0] );
471     free( p_sys->p_buffers[1] );
472     free( p_filter->p_sys );
473 }
474
475 /*****************************************************************************
476  * DoWork: aout_filter wrapper for transform_buffer
477  *****************************************************************************/
478 static void DoWork( aout_instance_t * p_aout, aout_filter_t * p_filter,
479                     aout_buffer_t * p_in_buf, aout_buffer_t * p_out_buf )
480 {
481     VLC_UNUSED(p_aout);
482     aout_filter_sys_t *p = p_filter->p_sys;
483
484     if( p_filter->fmt_in.audio.i_rate == p->sample_rate ) {
485       memcpy( p_out_buf->p_buffer, p_in_buf->p_buffer, p_in_buf->i_buffer );
486       p_out_buf->i_buffer   = p_in_buf->i_buffer;
487       p_out_buf->i_nb_samples = p_in_buf->i_nb_samples;
488       return;
489     }
490
491     double scale = p_filter->fmt_in.audio.i_rate / (double)p->sample_rate;
492     if( scale != p->scale ) {
493       p->scale = scale;
494       p->bytes_stride_scaled  = p->bytes_stride * p->scale;
495       p->frames_stride_scaled = p->bytes_stride_scaled / p->bytes_per_frame;
496       p->bytes_to_slide = 0;
497       msg_Dbg( p_filter, "%.3f scale, %.3f stride_in, %i stride_out",
498                p->scale,
499                p->frames_stride_scaled,
500                (int)( p->bytes_stride / p->bytes_per_frame ) );
501     }
502
503     size_t i_outsize = calculate_output_buffer_size ( p_filter, p_in_buf->i_buffer );
504     if( i_outsize > p_out_buf->i_buffer ) {
505 #if 0   /* FIXME: This requires audio filter2 to work */
506         p_out_buf = block_Realloc( p_out_buf, i_outsize, 0 );
507         if( p_out_buf == NULL )
508             abort();
509 #else   /* This fails horribly if we have more than two buffers in the
510          * pipeline, or if the buffer is passed to another thread... XXX */
511         void *temp = realloc( p->p_buffers[ p->i_buf ], i_outsize );
512         if( temp == NULL )
513         {
514             return;
515         }
516         p->p_buffers[ p->i_buf ] = temp;
517         p_out_buf->p_buffer = p->p_buffers[ p->i_buf ];
518         p->i_buf = ! p->i_buf;
519 #endif
520     }
521
522     size_t bytes_out = transform_buffer( p_filter,
523         p_in_buf->p_buffer, p_in_buf->i_buffer,
524         p_out_buf->p_buffer );
525
526     p_out_buf->i_buffer   = bytes_out;
527     p_out_buf->i_nb_samples = bytes_out / p->bytes_per_frame;
528 }