]> git.sesse.net Git - vlc/blob - modules/codec/avcodec/audio.c
PGS subtitles: use origial frame size (fix #6324)
[vlc] / modules / codec / avcodec / audio.c
1 /*****************************************************************************
2  * audio.c: audio decoder using libavcodec library
3  *****************************************************************************
4  * Copyright (C) 1999-2003 VLC authors and VideoLAN
5  * $Id$
6  *
7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8  *          Gildas Bazin <gbazin@videolan.org>
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms of the GNU Lesser General Public License as published by
12  * the Free Software Foundation; either version 2.1 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this program; if not, write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23  *****************************************************************************/
24
25 /*****************************************************************************
26  * Preamble
27  *****************************************************************************/
28 #ifdef HAVE_CONFIG_H
29 # include "config.h"
30 #endif
31
32 #include <assert.h>
33
34 #include <vlc_common.h>
35 #include <vlc_aout.h>
36 #include <vlc_codec.h>
37 #include <vlc_avcodec.h>
38
39 #include <libavcodec/avcodec.h>
40 #include <libavutil/mem.h>
41
42 #include <libavutil/audioconvert.h>
43
44 #include "avcodec.h"
45
46 /*****************************************************************************
47  * decoder_sys_t : decoder descriptor
48  *****************************************************************************/
49 struct decoder_sys_t
50 {
51     AVCODEC_COMMON_MEMBERS
52
53     /*
54      * Output properties
55      */
56     audio_sample_format_t aout_format;
57     date_t                end_date;
58
59     /* */
60     int     i_reject_count;
61
62     /* */
63     bool    b_extract;
64     int     pi_extraction[AOUT_CHAN_MAX];
65     int     i_previous_channels;
66     uint64_t i_previous_layout;
67 };
68
69 #define BLOCK_FLAG_PRIVATE_REALLOCATED (1 << BLOCK_FLAG_PRIVATE_SHIFT)
70
71 static void SetupOutputFormat( decoder_t *p_dec, bool b_trust );
72
73 static void InitDecoderConfig( decoder_t *p_dec, AVCodecContext *p_context )
74 {
75     if( p_dec->fmt_in.i_extra > 0 )
76     {
77         const uint8_t * const p_src = p_dec->fmt_in.p_extra;
78
79         int i_offset = 0;
80         int i_size = p_dec->fmt_in.i_extra;
81
82         if( p_dec->fmt_in.i_codec == VLC_CODEC_ALAC )
83         {
84             static const uint8_t p_pattern[] = { 0, 0, 0, 36, 'a', 'l', 'a', 'c' };
85             /* Find alac atom XXX it is a bit ugly */
86             for( i_offset = 0; i_offset < i_size - (int)sizeof(p_pattern); i_offset++ )
87             {
88                 if( !memcmp( &p_src[i_offset], p_pattern, sizeof(p_pattern) ) )
89                     break;
90             }
91             i_size = __MIN( p_dec->fmt_in.i_extra - i_offset, 36 );
92             if( i_size < 36 )
93                 i_size = 0;
94         }
95
96         if( i_size > 0 )
97         {
98             p_context->extradata =
99                 av_malloc( i_size + FF_INPUT_BUFFER_PADDING_SIZE );
100             if( p_context->extradata )
101             {
102                 uint8_t *p_dst = p_context->extradata;
103
104                 p_context->extradata_size = i_size;
105
106                 memcpy( &p_dst[0],            &p_src[i_offset], i_size );
107                 memset( &p_dst[i_size], 0, FF_INPUT_BUFFER_PADDING_SIZE );
108             }
109         }
110     }
111     else
112     {
113         p_context->extradata_size = 0;
114         p_context->extradata = NULL;
115     }
116 }
117
118 /**
119  * Allocates decoded audio buffer for libavcodec to use.
120  */
121 #if (LIBAVCODEC_VERSION_MAJOR >= 55)
122 typedef struct
123 {
124     block_t self;
125     AVFrame *frame;
126 } vlc_av_frame_t;
127
128 static void vlc_av_frame_Release(block_t *block)
129 {
130     vlc_av_frame_t *b = (void *)block;
131
132     av_frame_free(&b->frame);
133     free(b);
134 }
135
136 static block_t *vlc_av_frame_Wrap(AVFrame *frame)
137 {
138     for (unsigned i = 1; i < AV_NUM_DATA_POINTERS; i++)
139         assert(frame->linesize[i] == 0); /* only packed frame supported */
140
141     if (av_frame_make_writable(frame)) /* TODO: read-only block_t */
142         return NULL;
143
144     vlc_av_frame_t *b = malloc(sizeof (*b));
145     if (unlikely(b == NULL))
146         return NULL;
147
148     block_t *block = &b->self;
149
150     block_Init(block, frame->extended_data[0], frame->linesize[0]);
151     block->i_nb_samples = frame->nb_samples;
152     block->pf_release = vlc_av_frame_Release;
153     b->frame = frame;
154     return block;
155 }
156 #else
157 static int GetAudioBuf( AVCodecContext *ctx, AVFrame *buf )
158 {
159     block_t *block;
160     bool planar = av_sample_fmt_is_planar( ctx->sample_fmt );
161     unsigned channels = planar ? 1 : ctx->channels;
162     unsigned planes = planar ? ctx->channels : 1;
163
164     int bytes = av_samples_get_buffer_size( &buf->linesize[0], channels,
165                                             buf->nb_samples, ctx->sample_fmt,
166                                             16 );
167     assert( bytes >= 0 );
168     block = block_Alloc( bytes * planes );
169     if( unlikely(block == NULL) )
170         return AVERROR(ENOMEM);
171
172     block->i_nb_samples = buf->nb_samples;
173     buf->opaque = block;
174
175     if( planes > AV_NUM_DATA_POINTERS )
176     {
177         uint8_t **ext = malloc( sizeof( *ext ) * planes );
178         if( unlikely(ext == NULL) )
179         {
180             block_Release( block );
181             return AVERROR(ENOMEM);
182         }
183         buf->extended_data = ext;
184     }
185     else
186         buf->extended_data = buf->data;
187
188     uint8_t *buffer = block->p_buffer;
189     for( unsigned i = 0; i < planes; i++ )
190     {
191         buf->linesize[i] = buf->linesize[0];
192         buf->extended_data[i] = buffer;
193         buffer += bytes;
194     }
195
196     return 0;
197 }
198 #endif
199
200 /*****************************************************************************
201  * InitAudioDec: initialize audio decoder
202  *****************************************************************************
203  * The avcodec codec will be opened, some memory allocated.
204  *****************************************************************************/
205 int InitAudioDec( decoder_t *p_dec, AVCodecContext *p_context,
206                       AVCodec *p_codec, int i_codec_id, const char *psz_namecodec )
207 {
208     decoder_sys_t *p_sys;
209
210     /* Allocate the memory needed to store the decoder's structure */
211     if( ( p_dec->p_sys = p_sys = malloc(sizeof(*p_sys)) ) == NULL )
212     {
213         return VLC_ENOMEM;
214     }
215
216     p_codec->type = AVMEDIA_TYPE_AUDIO;
217     p_context->codec_type = AVMEDIA_TYPE_AUDIO;
218     p_context->codec_id = i_codec_id;
219 #if (LIBAVCODEC_VERSION_MAJOR >= 55)
220     p_context->refcounted_frames = true;
221 #else
222     p_context->get_buffer = GetAudioBuf;
223 #endif
224     p_sys->p_context = p_context;
225     p_sys->p_codec = p_codec;
226     p_sys->i_codec_id = i_codec_id;
227     p_sys->psz_namecodec = psz_namecodec;
228     p_sys->b_delayed_open = true;
229
230     // Initialize decoder extradata
231     InitDecoderConfig( p_dec, p_context);
232
233     /* ***** Open the codec ***** */
234     if( ffmpeg_OpenCodec( p_dec ) < 0 )
235     {
236         msg_Err( p_dec, "cannot open codec (%s)", p_sys->psz_namecodec );
237         av_free( p_sys->p_context->extradata );
238         free( p_sys );
239         return VLC_EGENERIC;
240     }
241
242     p_sys->i_reject_count = 0;
243     p_sys->b_extract = false;
244     p_sys->i_previous_channels = 0;
245     p_sys->i_previous_layout = 0;
246
247     /* */
248     p_dec->fmt_out.i_cat = AUDIO_ES;
249     /* Try to set as much information as possible but do not trust it */
250     SetupOutputFormat( p_dec, false );
251
252     date_Set( &p_sys->end_date, 0 );
253     if( p_dec->fmt_out.audio.i_rate )
254         date_Init( &p_sys->end_date, p_dec->fmt_out.audio.i_rate, 1 );
255     else if( p_dec->fmt_in.audio.i_rate )
256         date_Init( &p_sys->end_date, p_dec->fmt_in.audio.i_rate, 1 );
257
258     return VLC_SUCCESS;
259 }
260
261 /*****************************************************************************
262  * DecodeAudio: Called to decode one frame
263  *****************************************************************************/
264 block_t * DecodeAudio ( decoder_t *p_dec, block_t **pp_block )
265 {
266     decoder_sys_t *p_sys = p_dec->p_sys;
267     AVCodecContext *ctx = p_sys->p_context;
268
269     if( !pp_block || !*pp_block )
270         return NULL;
271
272     block_t *p_block = *pp_block;
273
274     if( !ctx->extradata_size && p_dec->fmt_in.i_extra && p_sys->b_delayed_open)
275     {
276         InitDecoderConfig( p_dec, ctx );
277         if( ffmpeg_OpenCodec( p_dec ) )
278             msg_Err( p_dec, "Cannot open decoder %s", p_sys->psz_namecodec );
279     }
280
281     if( p_sys->b_delayed_open )
282         goto end;
283
284     if( p_block->i_flags & (BLOCK_FLAG_DISCONTINUITY|BLOCK_FLAG_CORRUPTED) )
285     {
286         avcodec_flush_buffers( ctx );
287         date_Set( &p_sys->end_date, 0 );
288
289         if( p_sys->i_codec_id == AV_CODEC_ID_MP2 || p_sys->i_codec_id == AV_CODEC_ID_MP3 )
290             p_sys->i_reject_count = 3;
291
292         goto end;
293     }
294
295     /* We've just started the stream, wait for the first PTS. */
296     if( !date_Get( &p_sys->end_date ) && p_block->i_pts <= VLC_TS_INVALID )
297         goto end;
298
299     if( p_block->i_buffer <= 0 )
300         goto end;
301
302     if( (p_block->i_flags & BLOCK_FLAG_PRIVATE_REALLOCATED) == 0 )
303     {
304         p_block = block_Realloc( p_block, 0, p_block->i_buffer + FF_INPUT_BUFFER_PADDING_SIZE );
305         if( !p_block )
306             return NULL;
307         *pp_block = p_block;
308         p_block->i_buffer -= FF_INPUT_BUFFER_PADDING_SIZE;
309         memset( &p_block->p_buffer[p_block->i_buffer], 0, FF_INPUT_BUFFER_PADDING_SIZE );
310
311         p_block->i_flags |= BLOCK_FLAG_PRIVATE_REALLOCATED;
312     }
313
314 #if (LIBAVCODEC_VERSION_MAJOR >= 55)
315     AVFrame *frame = av_frame_alloc();
316     if (unlikely(frame == NULL))
317         goto end;
318 #else
319     AVFrame *frame = &(AVFrame) { };
320 #endif
321
322     for( int got_frame = 0; !got_frame; )
323     {
324         if( p_block->i_buffer == 0 )
325             goto end;
326
327         AVPacket pkt;
328         av_init_packet( &pkt );
329         pkt.data = p_block->p_buffer;
330         pkt.size = p_block->i_buffer;
331
332         int used = avcodec_decode_audio4( ctx, frame, &got_frame, &pkt );
333         if( used < 0 )
334         {
335             msg_Warn( p_dec, "cannot decode one frame (%zu bytes)",
336                       p_block->i_buffer );
337             goto end;
338         }
339
340         assert( p_block->i_buffer >= (unsigned)used );
341         p_block->p_buffer += used;
342         p_block->i_buffer -= used;
343     }
344
345     if( ctx->channels <= 0 || ctx->channels > 8 || ctx->sample_rate <= 0 )
346     {
347         msg_Warn( p_dec, "invalid audio properties channels count %d, sample rate %d",
348                   ctx->channels, ctx->sample_rate );
349         goto end;
350     }
351
352     if( p_dec->fmt_out.audio.i_rate != (unsigned int)ctx->sample_rate )
353         date_Init( &p_sys->end_date, ctx->sample_rate, 1 );
354
355     if( p_block->i_pts > VLC_TS_INVALID &&
356         p_block->i_pts > date_Get( &p_sys->end_date ) )
357     {
358         date_Set( &p_sys->end_date, p_block->i_pts );
359     }
360
361     if( p_block->i_buffer == 0 )
362     {   /* Done with this buffer */
363         block_Release( p_block );
364         *pp_block = NULL;
365     }
366
367 #if (LIBAVCODEC_VERSION_MAJOR < 55)
368     /* NOTE WELL: Beyond this point, p_block refers to the DECODED block! */
369     p_block = frame->opaque;
370 #endif
371     SetupOutputFormat( p_dec, true );
372     if( decoder_UpdateAudioFormat( p_dec ) )
373         goto drop;
374
375     /* Interleave audio if required */
376     if( av_sample_fmt_is_planar( ctx->sample_fmt ) )
377 #if (LIBAVCODEC_VERSION_MAJOR >= 55)
378     {
379         p_block = block_Alloc(frame->linesize[0] * ctx->channels);
380         if (unlikely(p_block == NULL))
381             goto drop;
382
383         const void *planes[ctx->channels];
384         for (int i = 0; i < ctx->channels; i++)
385             planes[i] = frame->extended_data[i];
386
387         aout_Interleave(p_block->p_buffer, planes, frame->nb_samples,
388                         ctx->channels, p_dec->fmt_out.audio.i_format);
389         p_block->i_nb_samples = frame->nb_samples;
390         av_frame_free(&frame);
391     }
392     else
393     {
394         p_block = vlc_av_frame_Wrap(frame);
395         if (unlikely(p_block == NULL))
396             goto drop;
397     }
398 #else
399     {
400         block_t *p_buffer = block_Alloc( p_block->i_buffer );
401         if( unlikely(p_buffer == NULL) )
402             goto drop;
403
404         const void *planes[ctx->channels];
405         for( int i = 0; i < ctx->channels; i++)
406             planes[i] = frame->extended_data[i];
407
408         aout_Interleave( p_buffer->p_buffer, planes, frame->nb_samples,
409                          ctx->channels, p_dec->fmt_out.audio.i_format );
410         if( ctx->channels > AV_NUM_DATA_POINTERS )
411             free( frame->extended_data );
412         block_Release( p_block );
413         p_block = p_buffer;
414     }
415     p_block->i_nb_samples = frame->nb_samples;
416 #endif
417
418     if (p_sys->b_extract)
419     {   /* TODO: do not drop channels... at least not here */
420         block_t *p_buffer = block_Alloc( p_dec->fmt_out.audio.i_bytes_per_frame
421                                          * p_block->i_nb_samples );
422         if( unlikely(p_buffer == NULL) )
423             goto drop;
424         aout_ChannelExtract( p_buffer->p_buffer,
425                              p_dec->fmt_out.audio.i_channels,
426                              p_block->p_buffer, ctx->channels,
427                              p_block->i_nb_samples, p_sys->pi_extraction,
428                              p_dec->fmt_out.audio.i_bitspersample );
429         p_buffer->i_nb_samples = p_block->i_nb_samples;
430         block_Release( p_block );
431         p_block = p_buffer;
432     }
433
434     /* Silent unwanted samples */
435     if( p_sys->i_reject_count > 0 )
436     {
437         memset( p_block->p_buffer, 0, p_block->i_buffer );
438         p_sys->i_reject_count--;
439     }
440
441     p_block->i_buffer = p_block->i_nb_samples
442                         * p_dec->fmt_out.audio.i_bytes_per_frame;
443     p_block->i_pts = date_Get( &p_sys->end_date );
444     p_block->i_length = date_Increment( &p_sys->end_date,
445                                       p_block->i_nb_samples ) - p_block->i_pts;
446     return p_block;
447
448 end:
449     *pp_block = NULL;
450 drop:
451     block_Release(p_block);
452     return NULL;
453 }
454
455 /*****************************************************************************
456  *
457  *****************************************************************************/
458
459 vlc_fourcc_t GetVlcAudioFormat( int fmt )
460 {
461     static const vlc_fourcc_t fcc[] = {
462         [AV_SAMPLE_FMT_U8]    = VLC_CODEC_U8,
463         [AV_SAMPLE_FMT_S16]   = VLC_CODEC_S16N,
464         [AV_SAMPLE_FMT_S32]   = VLC_CODEC_S32N,
465         [AV_SAMPLE_FMT_FLT]   = VLC_CODEC_FL32,
466         [AV_SAMPLE_FMT_DBL]   = VLC_CODEC_FL64,
467         [AV_SAMPLE_FMT_U8P]   = VLC_CODEC_U8,
468         [AV_SAMPLE_FMT_S16P]  = VLC_CODEC_S16N,
469         [AV_SAMPLE_FMT_S32P]  = VLC_CODEC_S32N,
470         [AV_SAMPLE_FMT_FLTP]  = VLC_CODEC_FL32,
471         [AV_SAMPLE_FMT_DBLP]  = VLC_CODEC_FL64,
472     };
473     if( (sizeof(fcc) / sizeof(fcc[0])) > (unsigned)fmt )
474         return fcc[fmt];
475     return VLC_CODEC_S16N;
476 }
477
478 static const uint64_t pi_channels_map[][2] =
479 {
480     { AV_CH_FRONT_LEFT,        AOUT_CHAN_LEFT },
481     { AV_CH_FRONT_RIGHT,       AOUT_CHAN_RIGHT },
482     { AV_CH_FRONT_CENTER,      AOUT_CHAN_CENTER },
483     { AV_CH_LOW_FREQUENCY,     AOUT_CHAN_LFE },
484     { AV_CH_BACK_LEFT,         AOUT_CHAN_REARLEFT },
485     { AV_CH_BACK_RIGHT,        AOUT_CHAN_REARRIGHT },
486     { AV_CH_FRONT_LEFT_OF_CENTER, 0 },
487     { AV_CH_FRONT_RIGHT_OF_CENTER, 0 },
488     { AV_CH_BACK_CENTER,       AOUT_CHAN_REARCENTER },
489     { AV_CH_SIDE_LEFT,         AOUT_CHAN_MIDDLELEFT },
490     { AV_CH_SIDE_RIGHT,        AOUT_CHAN_MIDDLERIGHT },
491     { AV_CH_TOP_CENTER,        0 },
492     { AV_CH_TOP_FRONT_LEFT,    0 },
493     { AV_CH_TOP_FRONT_CENTER,  0 },
494     { AV_CH_TOP_FRONT_RIGHT,   0 },
495     { AV_CH_TOP_BACK_LEFT,     0 },
496     { AV_CH_TOP_BACK_CENTER,   0 },
497     { AV_CH_TOP_BACK_RIGHT,    0 },
498     { AV_CH_STEREO_LEFT,       0 },
499     { AV_CH_STEREO_RIGHT,      0 },
500 };
501
502 static void SetupOutputFormat( decoder_t *p_dec, bool b_trust )
503 {
504     decoder_sys_t *p_sys = p_dec->p_sys;
505
506     p_dec->fmt_out.i_codec = GetVlcAudioFormat( p_sys->p_context->sample_fmt );
507     p_dec->fmt_out.audio.i_format = p_dec->fmt_out.i_codec;
508     p_dec->fmt_out.audio.i_rate = p_sys->p_context->sample_rate;
509
510     /* */
511     if( p_sys->i_previous_channels == p_sys->p_context->channels &&
512         p_sys->i_previous_layout == p_sys->p_context->channel_layout )
513         return;
514     if( b_trust )
515     {
516         p_sys->i_previous_channels = p_sys->p_context->channels;
517         p_sys->i_previous_layout = p_sys->p_context->channel_layout;
518     }
519
520     /* Specified order
521      * FIXME should we use fmt_in.audio.i_physical_channels or not ?
522      */
523     const unsigned i_order_max = 8 * sizeof(p_sys->p_context->channel_layout);
524     uint32_t pi_order_src[i_order_max];
525     int i_channels_src = 0;
526
527     if( p_sys->p_context->channel_layout )
528     {
529         for( unsigned i = 0; i < sizeof(pi_channels_map)/sizeof(*pi_channels_map); i++ )
530         {
531             if( p_sys->p_context->channel_layout & pi_channels_map[i][0] )
532                 pi_order_src[i_channels_src++] = pi_channels_map[i][1];
533         }
534     }
535     else
536     {
537         /* Create default order  */
538         if( b_trust )
539             msg_Warn( p_dec, "Physical channel configuration not set : guessing" );
540         for( unsigned int i = 0; i < __MIN( i_order_max, (unsigned)p_sys->p_context->channels ); i++ )
541         {
542             if( i < sizeof(pi_channels_map)/sizeof(*pi_channels_map) )
543                 pi_order_src[i_channels_src++] = pi_channels_map[i][1];
544         }
545     }
546     if( i_channels_src != p_sys->p_context->channels && b_trust )
547         msg_Err( p_dec, "Channel layout not understood" );
548
549     uint32_t i_layout_dst;
550     int      i_channels_dst;
551     p_sys->b_extract = aout_CheckChannelExtraction( p_sys->pi_extraction,
552                                                     &i_layout_dst, &i_channels_dst,
553                                                     NULL, pi_order_src, i_channels_src );
554     if( i_channels_dst != i_channels_src && b_trust )
555         msg_Warn( p_dec, "%d channels are dropped", i_channels_src - i_channels_dst );
556
557     p_dec->fmt_out.audio.i_physical_channels =
558     p_dec->fmt_out.audio.i_original_channels = i_layout_dst;
559     aout_FormatPrepare( &p_dec->fmt_out.audio );
560 }
561