1 /*****************************************************************************
2 * aac.c: FDK-AAC Encoder plugin for vlc.
3 *****************************************************************************
4 * Copyright (C) 2012 Sergio Ammirata
6 * Authors: Sergio Ammirata <sergio@ammirata.net>
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 *****************************************************************************/
23 /*****************************************************************************
25 *****************************************************************************/
30 #include <fdk-aac/aacenc_lib.h>
32 #include <vlc_common.h>
33 #include <vlc_plugin.h>
34 #include <vlc_codec.h>
36 static int OpenEncoder( vlc_object_t * );
37 static void CloseEncoder( vlc_object_t * );
39 #define ENC_CFG_PREFIX "sout-fdkaac-"
41 #define AOT_TEXT N_("Encoder Profile")
42 #define AOT_LONGTEXT N_( "Encoder Algorithm to use" )
44 #define SIDEBAND_TEXT N_("Enable spectral band replication")
45 #define SIDEBAND_LONGTEXT N_( "This is an optional feature only for the AAC-ELD profile" )
47 #define VBR_QUALITY_TEXT N_("VBR Quality")
48 #define VBR_QUALITY_LONGTEXT N_( "Quality of the VBR Encoding (0=cbr, 1-5 constant quality vbr, 5 is best" )
50 #define AFTERBURNER_TEXT N_("Enable afterburner library")
51 #define AFTERBURNER_LONGTEXT N_( "This library will produce higher quality audio at the expense of additional CPU usage (default is enabled)" )
53 #define SIGNALING_TEXT N_("Signaling mode of the extension AOT")
54 #define SIGNALING_LONGTEXT N_( "1 is explicit for SBR and implicit for PS (default), 2 is explicit hierarchical" )
56 #define CH_ORDER_MPEG 0 /*!< MPEG channel ordering (e. g. 5.1: C, L, R, SL, SR, LFE) */
57 #define CH_ORDER_WAV 1 /*!< WAV fileformat channel ordering (e. g. 5.1: L, R, C, LFE, SL, SR) */
58 #define CH_ORDER_WG4 2 /*!< WG4 fileformat channel ordering (e. g. 5.1: L, R, SL, SR, C, LFE) */
60 #define PROFILE_AAC_LC 2
61 #define PROFILE_AAC_HE 5
62 #define PROFILE_AAC_HE_v2 29
63 #define PROFILE_AAC_LD 23
64 #define PROFILE_AAC_ELD 39
66 #define SIGNALING_COMPATIBLE 1
67 #define SIGNALING_HIERARCHICAL 2
69 static const int pi_aot_values[] = { PROFILE_AAC_LC, PROFILE_AAC_HE, PROFILE_AAC_HE_v2, PROFILE_AAC_LD, PROFILE_AAC_ELD };
70 static const char *const ppsz_aot_descriptions[] =
71 { N_("AAC-LC"), N_("HE-AAC"), N_("HE-AAC-v2"), N_("AAC-LD"), N_("AAC-ELD") };
74 set_shortname( N_("FDKAAC") )
75 set_description( N_("FDK-AAC Audio encoder") )
76 set_capability( "encoder", 50 )
77 set_callbacks( OpenEncoder, CloseEncoder )
78 add_shortcut( "fdkaac" )
79 set_category( CAT_INPUT )
80 set_subcategory( SUBCAT_INPUT_ACODEC )
81 add_integer( ENC_CFG_PREFIX "profile", PROFILE_AAC_LC, AOT_TEXT,
83 change_integer_list( pi_aot_values, ppsz_aot_descriptions );
84 add_bool( ENC_CFG_PREFIX "sbr", false, SIDEBAND_TEXT,
85 SIDEBAND_LONGTEXT, false )
86 add_integer( ENC_CFG_PREFIX "vbr", 0, VBR_QUALITY_TEXT,
87 VBR_QUALITY_LONGTEXT, false )
88 change_integer_range (0, 5)
89 add_bool( ENC_CFG_PREFIX "afterburner", true, AFTERBURNER_TEXT,
90 AFTERBURNER_LONGTEXT, true )
91 add_integer( ENC_CFG_PREFIX "signaling", SIGNALING_COMPATIBLE, SIGNALING_TEXT,
92 SIGNALING_LONGTEXT, true )
93 change_integer_range (0, 2)
96 /*****************************************************************************
98 *****************************************************************************/
99 static block_t *EncodeAudio( encoder_t *p_enc, block_t *p_buf );
101 static const char *const ppsz_enc_options[] = {
102 "profile", "sbr", "vbr", "afterburner", "signaling", NULL
105 /*****************************************************************************
106 * encoder_sys_t : aac encoder descriptor
107 *****************************************************************************/
110 double d_compression_ratio;
112 int i_aot; /* This stores the aac profile chosen */
113 int i_vbr; /* cbr or vbr-quality value chosen */
114 int i_signaling; /* Library feature for backwards compatibility */
115 int i_encoderdelay; /* Samples delay introduced by the profile */
117 int i_maxoutputsize; /* Maximum buffer size for encoded output */
118 HANDLE_AACENCODER handle;
119 bool b_afterburner; /* Library feature for additional quality */
120 bool b_eld_sbr; /* Spectral band replication option for ELD profile */
123 static const char *aac_get_errorstring(AACENC_ERROR erraac)
128 case AACENC_INVALID_HANDLE:
129 return "Invalid handle";
130 case AACENC_MEMORY_ERROR:
131 return "Memory allocation error";
132 case AACENC_UNSUPPORTED_PARAMETER:
133 return "Unsupported parameter";
134 case AACENC_INVALID_CONFIG:
135 return "Invalid config";
136 case AACENC_INIT_ERROR:
137 return "Initialization error";
138 case AACENC_INIT_AAC_ERROR:
139 return "AAC library initialization error";
140 case AACENC_INIT_SBR_ERROR:
141 return "SBR library initialization error";
142 case AACENC_INIT_TP_ERROR:
143 return "Transport library initialization error";
144 case AACENC_INIT_META_ERROR:
145 return "Metadata library initialization error";
146 case AACENC_ENCODE_ERROR:
147 return "Encoding error";
148 case AACENC_ENCODE_EOF:
149 return "End of file";
151 return "Unknown error";
155 /*****************************************************************************
156 * OpenDecoder: open the encoder.
157 *****************************************************************************/
158 static int OpenEncoder( vlc_object_t *p_this )
161 encoder_sys_t *p_sys;
164 bool b_profile_selected;
170 p_enc = (encoder_t *)p_this;
171 b_profile_selected = false;
175 if( p_enc->fmt_out.i_codec != VLC_FOURCC( 'l', 'a', 'a', 'c' ) &&
176 p_enc->fmt_out.i_codec != VLC_FOURCC( 'h', 'a', 'a', 'c' ) &&
177 p_enc->fmt_out.i_codec != VLC_FOURCC( 's', 'a', 'a', 'c' ) &&
178 p_enc->fmt_out.i_codec != VLC_CODEC_MP4A )
182 else if ( p_enc->fmt_out.i_codec == VLC_FOURCC( 'l', 'a', 'a', 'c' ) )
184 b_profile_selected = true;
185 i_profile = PROFILE_AAC_LC;
187 else if ( p_enc->fmt_out.i_codec == VLC_FOURCC( 'h', 'a', 'a', 'c' ) )
189 b_profile_selected = true;
190 i_profile = PROFILE_AAC_HE;
192 else if ( p_enc->fmt_out.i_codec == VLC_FOURCC( 's', 'a', 'a', 'c' ) )
194 b_profile_selected = true;
195 i_profile = PROFILE_AAC_HE_v2;
198 switch (p_enc->fmt_in.audio.i_channels) {
199 case 1: mode = MODE_1; sce = 1; cpe = 0; break;
200 case 2: mode = MODE_2; sce = 0; cpe = 1; break;
201 case 3: mode = MODE_1_2; sce = 1; cpe = 1; break;
202 case 4: mode = MODE_1_2_1; sce = 2; cpe = 1; break;
203 case 5: mode = MODE_1_2_2; sce = 1; cpe = 2; break;
204 case 6: mode = MODE_1_2_2_1; sce = 2; cpe = 2; break;
205 case 8: mode = MODE_1_2_2_2_1; sce = 2; cpe = 3; break;
207 msg_Err( p_enc, "we do not support > 8 input channels, this input has %i",
208 p_enc->fmt_in.audio.i_channels );
212 msg_Info(p_enc, "Initializing AAC Encoder, %i channels", p_enc->fmt_in.audio.i_channels);
214 /* Allocate the memory needed to store the encoder's structure */
215 p_sys = (encoder_sys_t *)malloc(sizeof(encoder_sys_t));
216 if( unlikely( !p_sys ) )
218 p_enc->p_sys = p_sys;
219 p_enc->fmt_in.i_codec = VLC_CODEC_S16N;
220 p_enc->fmt_out.i_cat = AUDIO_ES;
221 p_enc->fmt_out.i_codec = VLC_CODEC_MP4A;
223 config_ChainParse( p_enc, ENC_CFG_PREFIX, ppsz_enc_options, p_enc->p_cfg );
225 if ( b_profile_selected == false )
226 p_sys->i_aot = var_InheritInteger( p_enc, ENC_CFG_PREFIX "profile" );
228 p_sys->i_aot = i_profile;
229 p_sys->b_eld_sbr = var_InheritBool( p_enc, ENC_CFG_PREFIX "sbr" );
230 p_sys->i_vbr = var_InheritInteger( p_enc, ENC_CFG_PREFIX "vbr" );
231 p_sys->b_afterburner = var_InheritBool( p_enc, ENC_CFG_PREFIX "afterburner" );
232 p_sys->i_signaling = var_InheritInteger( p_enc, ENC_CFG_PREFIX "signaling" );
233 p_sys->i_pts_last = 0;
235 if ((p_sys->i_aot == PROFILE_AAC_HE || p_sys->i_aot == PROFILE_AAC_HE_v2) && p_sys->i_vbr > 3)
237 msg_Warn(p_enc, "Maximum VBR quality for this profile is 3, setting vbr=3");
240 if ((erraac = aacEncOpen(&p_sys->handle, 0, p_enc->fmt_in.audio.i_channels)) != AACENC_OK) {
241 msg_Err(p_enc, "Unable to open encoder: %s", aac_get_errorstring(erraac));
245 if ( p_sys->i_aot == PROFILE_AAC_HE_v2 && p_enc->fmt_in.audio.i_channels != 2 )
247 msg_Err(p_enc, "The HE-AAC-v2 profile can only be used with stereo sources");
250 if ( p_sys->i_aot == PROFILE_AAC_ELD && p_enc->fmt_in.audio.i_channels != 2 )
252 msg_Err(p_enc, "The ELD-AAC profile can only be used with stereo sources");
255 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_AOT, p_sys->i_aot)) != AACENC_OK) {
256 msg_Err(p_enc, "Unable to set the Profile %i: %s", p_sys->i_aot, aac_get_errorstring(erraac));
259 if (p_sys->i_aot == PROFILE_AAC_ELD && p_sys->b_eld_sbr) {
260 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_SBR_MODE, 1)) != AACENC_OK) {
261 msg_Err(p_enc, "Unable to set SBR mode for ELD: %s", aac_get_errorstring(erraac));
265 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_SAMPLERATE,
266 p_enc->fmt_out.audio.i_rate)) != AACENC_OK) {
267 msg_Err(p_enc, "Unable to set the sample rate %i: %s",p_enc->fmt_out.audio.i_rate,
268 aac_get_errorstring(erraac));
271 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_CHANNELMODE, mode)) != AACENC_OK) {
272 msg_Err(p_enc, "Unable to set the channel mode: %s", aac_get_errorstring(erraac));
275 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_CHANNELORDER, CH_ORDER_WG4)) != AACENC_OK) {
276 msg_Err(p_enc, "Unable to set the sound channel order: %s", aac_get_errorstring(erraac));
279 if (p_sys->i_vbr != 0) {
280 if ((erraac = aacEncoder_SetParam(p_sys->handle,
281 AACENC_BITRATEMODE, p_sys->i_vbr)) != AACENC_OK) {
282 msg_Err(p_enc, "Unable to set the VBR bitrate mode: %s", aac_get_errorstring(erraac));
286 if (p_enc->fmt_out.i_bitrate == 0) {
287 if (p_sys->i_aot == PROFILE_AAC_HE_v2) {
291 i_bitrate = (96*sce + 128*cpe) * p_enc->fmt_out.audio.i_rate / 44;
292 if (p_sys->i_aot == PROFILE_AAC_HE ||
293 p_sys->i_aot == PROFILE_AAC_HE_v2 ||
296 p_enc->fmt_out.i_bitrate = i_bitrate;
297 msg_Info(p_enc, "Setting optimal bitrate of %i", i_bitrate);
301 i_bitrate = p_enc->fmt_out.i_bitrate;
303 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_BITRATE,
304 i_bitrate)) != AACENC_OK) {
305 msg_Err(p_enc, "Unable to set the bitrate %i: %s", i_bitrate,
306 aac_get_errorstring(erraac));
310 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_TRANSMUX, 0)) != AACENC_OK) {
311 msg_Err(p_enc, "Unable to set the ADTS transmux: %s", aac_get_errorstring(erraac));
314 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_SIGNALING_MODE,
315 (int)p_sys->i_signaling)) != AACENC_OK) {
316 /* use explicit backward compatible =1 */
317 /* use explicit hierarchical signaling =2 */
318 msg_Err(p_enc, "Unable to set signaling mode: %s", aac_get_errorstring(erraac));
321 if ((erraac = aacEncoder_SetParam(p_sys->handle, AACENC_AFTERBURNER,
322 (int)p_sys->b_afterburner)) != AACENC_OK) {
323 msg_Err(p_enc, "Unable to set the afterburner mode: %s", aac_get_errorstring(erraac));
326 if ((erraac = aacEncEncode(p_sys->handle, NULL, NULL, NULL, NULL)) != AACENC_OK) {
327 msg_Err(p_enc, "Unable to initialize the encoder: %s", aac_get_errorstring(erraac));
330 AACENC_InfoStruct info = { 0 };
331 if ((erraac = aacEncInfo(p_sys->handle, &info)) != AACENC_OK) {
332 msg_Err(p_enc, "Unable to get the encoder info: %s", aac_get_errorstring(erraac));
336 /* The maximum packet size is 6144 bits aka 768 bytes per channel. */
337 p_sys->i_maxoutputsize = 768*p_enc->fmt_in.audio.i_channels;
338 p_enc->fmt_in.audio.i_bitspersample = 16;
339 p_sys->i_frame_size = info.frameLength;
340 p_sys->i_encoderdelay = info.encoderDelay;
342 p_enc->fmt_out.i_extra = info.confSize;
343 if( p_enc->fmt_out.i_extra )
345 p_enc->fmt_out.p_extra = malloc( p_enc->fmt_out.i_extra );
346 if ( p_enc->fmt_out.p_extra == NULL )
348 msg_Err(p_enc, "Unable to allocate fmt_out.p_extra");
351 memcpy( p_enc->fmt_out.p_extra, info.confBuf,
352 p_enc->fmt_out.i_extra );
355 p_enc->pf_encode_audio = EncodeAudio;
358 // TODO: Add more debug info to this config printout
359 msg_Dbg(p_enc, "fmt_out.p_extra = %i", p_enc->fmt_out.i_extra);
365 aacEncClose(&p_sys->handle);
370 /****************************************************************************
371 * EncodeAudio: the whole thing
372 ****************************************************************************/
373 static block_t *EncodeAudio( encoder_t *p_enc, block_t *p_aout_buf )
375 encoder_sys_t *p_sys;
384 p_sys = p_enc->p_sys;
387 if ( likely( p_aout_buf ) )
389 p_buffer = (int16_t *)p_aout_buf->p_buffer;
390 i_samples = p_aout_buf->i_nb_samples;
391 i_pts_out = p_aout_buf->i_pts - (mtime_t)((double)CLOCK_FREQ *
392 (double)p_sys->i_encoderdelay /
393 (double)p_enc->fmt_out.audio.i_rate);
394 if (p_sys->i_pts_last == 0)
395 p_sys->i_pts_last = i_pts_out - (mtime_t)((double)CLOCK_FREQ *
396 (double)(p_sys->i_frame_size) /
397 (double)p_enc->fmt_out.audio.i_rate);
402 i_pts_out = p_sys->i_pts_last;
405 i_samples_left = i_samples;
408 while ( i_samples_left >= 0 )
410 AACENC_BufDesc in_buf = { 0 }, out_buf = { 0 };
411 AACENC_InArgs in_args = { 0 };
412 AACENC_OutArgs out_args = { 0 };
413 int in_identifier = IN_AUDIO_DATA;
414 int in_size, in_elem_size;
415 int out_identifier = OUT_BITSTREAM_DATA;
416 int out_size, out_elem_size;
417 void *in_ptr, *out_ptr;
419 if ( unlikely(i_samples == 0) ) {
420 // this forces the encoder to purge whatever is left in the internal buffer
421 in_args.numInSamples = -1;
423 in_ptr = p_buffer + (i_samples - i_samples_left)*p_enc->fmt_in.audio.i_channels;
424 in_size = 2*p_enc->fmt_in.audio.i_channels*i_samples_left;
426 in_args.numInSamples = p_enc->fmt_in.audio.i_channels*i_samples_left;
428 in_buf.bufs = &in_ptr;
429 in_buf.bufferIdentifiers = &in_identifier;
430 in_buf.bufSizes = &in_size;
431 in_buf.bufElSizes = &in_elem_size;
434 p_block = block_Alloc( p_sys->i_maxoutputsize );
435 p_block->i_buffer = p_sys->i_maxoutputsize;
436 out_ptr = p_block->p_buffer;
437 out_size = p_block->i_buffer;
440 out_buf.bufs = &out_ptr;
441 out_buf.bufferIdentifiers = &out_identifier;
442 out_buf.bufSizes = &out_size;
443 out_buf.bufElSizes = &out_elem_size;
445 if ((erraac = aacEncEncode(p_sys->handle, &in_buf, &out_buf, &in_args, &out_args)) != AACENC_OK) {
446 if (erraac == AACENC_ENCODE_EOF) {
447 msg_Info( p_enc, "Encoding final bytes (EOF)");
451 msg_Err( p_enc, "Encoding failed: %s", aac_get_errorstring(erraac));
452 block_Release(p_block);
456 if ( out_args.numOutBytes > 0 )
458 p_block->i_buffer = out_args.numOutBytes;
459 if ( unlikely(i_samples == 0) )
461 // I only have the numOutBytes so approximate based on compression factor
462 double d_samples_forward = p_sys->d_compression_ratio*(double)out_args.numOutBytes;
463 i_pts_out += (mtime_t)d_samples_forward;
464 p_block->i_length = (mtime_t)d_samples_forward;
465 // TODO: It would be more precise (a few microseconds) to use d_samples_forward =
466 // (mtime_t)CLOCK_FREQ * (mtime_t)p_sys->i_frame_size/(mtime_t)p_enc->fmt_out.audio.i_rate
467 // but I am not sure if the lib always outputs a full frame when
468 // emptying the internal buffer in the EOF scenario
472 if ( i_loop_count == 0 )
474 // There can be an implicit delay in the first loop cycle because leftover bytes
475 // in the library buffer from the prior block
476 double d_samples_delay = (double)p_sys->i_frame_size - (double)out_args.numInSamples /
477 (double)p_enc->fmt_in.audio.i_channels;
478 i_pts_out -= (mtime_t)((double)CLOCK_FREQ * d_samples_delay /
479 (double)p_enc->fmt_out.audio.i_rate);
480 //p_block->i_length = (mtime_t)((double)CLOCK_FREQ * (double)p_sys->i_frame_size /
481 // (double)p_enc->fmt_out.audio.i_rate);
482 p_block->i_length = i_pts_out - p_sys->i_pts_last;
486 double d_samples_forward = (double)out_args.numInSamples/(double)p_enc->fmt_in.audio.i_channels;
487 double d_length = ((double)CLOCK_FREQ * d_samples_forward /
488 (double)p_enc->fmt_out.audio.i_rate);
489 i_pts_out += (mtime_t) d_length;
490 p_block->i_length = (mtime_t) d_length;
493 p_block->i_dts = p_block->i_pts = i_pts_out;
494 block_ChainAppend( &p_chain, p_block );
495 //msg_Dbg( p_enc, "p_block->i_dts %llu, p_block->i_length %llu, p_sys->i_pts_last %llu\n\
496 // out_args.numOutBytes = %i, out_args.numInSamples = %i, i_samples %i, i_loop_count %i",
497 // p_block->i_dts , p_block->i_length,p_sys->i_pts_last,
498 // out_args.numOutBytes, out_args.numInSamples, i_samples, i_loop_count);
499 if ( likely(i_samples > 0) )
501 p_sys->d_compression_ratio = (double)p_block->i_length / (double)out_args.numOutBytes;
502 i_samples_left -= out_args.numInSamples/p_enc->fmt_in.audio.i_channels;
503 p_sys->i_pts_last = i_pts_out;
508 block_Release(p_block);
509 //msg_Dbg( p_enc, "aac_encode_audio: not enough data yet");
512 if ( unlikely(i_loop_count++ > 100) )
514 msg_Err( p_enc, "Loop count greater than 100!!!, something must be wrong with the encoder library");
523 /*****************************************************************************
524 * CloseDecoder: decoder destruction
525 *****************************************************************************/
526 static void CloseEncoder( vlc_object_t *p_this )
528 encoder_t *p_enc = (encoder_t *)p_this;
529 encoder_sys_t *p_sys = p_enc->p_sys;
531 aacEncClose(&p_sys->handle);