2 * Audio Toolbox system codecs
4 * copyright (c) 2016 Rodger Combs
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <AudioToolbox/AudioToolbox.h>
26 #include "audio_frame_queue.h"
28 #include "bytestream.h"
30 #include "libavformat/isom.h"
31 #include "libavutil/avassert.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/log.h"
35 typedef struct ATDecodeContext {
40 AudioConverterRef converter;
50 static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
55 case FF_PROFILE_AAC_LOW:
57 return kAudioFormatMPEG4AAC;
58 case FF_PROFILE_AAC_HE:
59 return kAudioFormatMPEG4AAC_HE;
60 case FF_PROFILE_AAC_HE_V2:
61 return kAudioFormatMPEG4AAC_HE_V2;
62 case FF_PROFILE_AAC_LD:
63 return kAudioFormatMPEG4AAC_LD;
64 case FF_PROFILE_AAC_ELD:
65 return kAudioFormatMPEG4AAC_ELD;
67 case AV_CODEC_ID_ADPCM_IMA_QT:
68 return kAudioFormatAppleIMA4;
69 case AV_CODEC_ID_ALAC:
70 return kAudioFormatAppleLossless;
71 case AV_CODEC_ID_ILBC:
72 return kAudioFormatiLBC;
73 case AV_CODEC_ID_PCM_ALAW:
74 return kAudioFormatALaw;
75 case AV_CODEC_ID_PCM_MULAW:
76 return kAudioFormatULaw;
78 av_assert0(!"Invalid codec ID!");
83 static void ffat_update_ctx(AVCodecContext *avctx)
85 ATDecodeContext *at = avctx->priv_data;
86 UInt32 size = sizeof(unsigned);
87 AudioConverterPrimeInfo prime_info;
88 AudioStreamBasicDescription out_format;
90 AudioConverterGetProperty(at->converter,
91 kAudioConverterPropertyMaximumOutputPacketSize,
92 &size, &at->pkt_size);
94 if (at->pkt_size <= 0)
95 at->pkt_size = 1024 * 50;
97 size = sizeof(prime_info);
99 if (!AudioConverterGetProperty(at->converter,
100 kAudioConverterPrimeInfo,
101 &size, &prime_info)) {
102 avctx->initial_padding = prime_info.leadingFrames;
105 size = sizeof(out_format);
106 if (!AudioConverterGetProperty(at->converter,
107 kAudioConverterCurrentOutputStreamDescription,
108 &size, &out_format)) {
109 if (out_format.mFramesPerPacket)
110 avctx->frame_size = out_format.mFramesPerPacket;
111 if (out_format.mBytesPerPacket && avctx->codec_id == AV_CODEC_ID_ILBC)
112 avctx->block_align = out_format.mBytesPerPacket;
115 at->frame_size = avctx->frame_size;
116 if (avctx->codec_id == AV_CODEC_ID_PCM_MULAW ||
117 avctx->codec_id == AV_CODEC_ID_PCM_ALAW) {
118 at->pkt_size *= 1024;
119 avctx->frame_size *= 1024;
123 static int read_descr(GetByteContext *gb, int *tag)
127 *tag = bytestream2_get_byte(gb);
129 int c = bytestream2_get_byte(gb);
130 len = (len << 7) | (c & 0x7f);
137 static int get_ilbc_mode(AVCodecContext *avctx)
139 if (avctx->block_align == 38)
141 else if (avctx->block_align == 50)
143 else if (avctx->bit_rate > 0)
144 return avctx->bit_rate <= 14000 ? 30 : 20;
149 static av_cold int get_channel_label(int channel)
151 uint64_t map = 1 << channel;
152 if (map <= AV_CH_LOW_FREQUENCY)
154 else if (map <= AV_CH_BACK_RIGHT)
156 else if (map <= AV_CH_BACK_CENTER)
158 else if (map <= AV_CH_SIDE_RIGHT)
160 else if (map <= AV_CH_TOP_BACK_RIGHT)
162 else if (map <= AV_CH_STEREO_RIGHT)
164 else if (map <= AV_CH_WIDE_RIGHT)
166 else if (map <= AV_CH_SURROUND_DIRECT_RIGHT)
168 else if (map == AV_CH_LOW_FREQUENCY_2)
169 return kAudioChannelLabel_LFE2;
174 static int remap_layout(AudioChannelLayout *layout, uint64_t in_layout, int count)
178 layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
179 layout->mNumberChannelDescriptions = count;
180 for (i = 0; i < count; i++) {
182 while (!(in_layout & (1 << c)) && c < 64)
185 return AVERROR(EINVAL); // This should never happen
186 label = get_channel_label(c);
187 layout->mChannelDescriptions[i].mChannelLabel = label;
189 return AVERROR(EINVAL);
195 static int get_aac_tag(uint64_t in_layout)
198 case AV_CH_LAYOUT_MONO:
199 return kAudioChannelLayoutTag_Mono;
200 case AV_CH_LAYOUT_STEREO:
201 return kAudioChannelLayoutTag_Stereo;
202 case AV_CH_LAYOUT_QUAD:
203 return kAudioChannelLayoutTag_AAC_Quadraphonic;
204 case AV_CH_LAYOUT_OCTAGONAL:
205 return kAudioChannelLayoutTag_AAC_Octagonal;
206 case AV_CH_LAYOUT_SURROUND:
207 return kAudioChannelLayoutTag_AAC_3_0;
208 case AV_CH_LAYOUT_4POINT0:
209 return kAudioChannelLayoutTag_AAC_4_0;
210 case AV_CH_LAYOUT_5POINT0:
211 return kAudioChannelLayoutTag_AAC_5_0;
212 case AV_CH_LAYOUT_5POINT1:
213 return kAudioChannelLayoutTag_AAC_5_1;
214 case AV_CH_LAYOUT_6POINT0:
215 return kAudioChannelLayoutTag_AAC_6_0;
216 case AV_CH_LAYOUT_6POINT1:
217 return kAudioChannelLayoutTag_AAC_6_1;
218 case AV_CH_LAYOUT_7POINT0:
219 return kAudioChannelLayoutTag_AAC_7_0;
220 case AV_CH_LAYOUT_7POINT1_WIDE_BACK:
221 return kAudioChannelLayoutTag_AAC_7_1;
222 case AV_CH_LAYOUT_7POINT1:
223 return kAudioChannelLayoutTag_MPEG_7_1_C;
229 static av_cold int ffat_init_encoder(AVCodecContext *avctx)
231 ATDecodeContext *at = avctx->priv_data;
234 AudioStreamBasicDescription in_format = {
235 .mSampleRate = avctx->sample_rate,
236 .mFormatID = kAudioFormatLinearPCM,
237 .mFormatFlags = ((avctx->sample_fmt == AV_SAMPLE_FMT_FLT ||
238 avctx->sample_fmt == AV_SAMPLE_FMT_DBL) ? kAudioFormatFlagIsFloat
239 : avctx->sample_fmt == AV_SAMPLE_FMT_U8 ? 0
240 : kAudioFormatFlagIsSignedInteger)
241 | kAudioFormatFlagIsPacked,
242 .mBytesPerPacket = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
243 .mFramesPerPacket = 1,
244 .mBytesPerFrame = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
245 .mChannelsPerFrame = avctx->channels,
246 .mBitsPerChannel = av_get_bytes_per_sample(avctx->sample_fmt) * 8,
248 AudioStreamBasicDescription out_format = {
249 .mSampleRate = avctx->sample_rate,
250 .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
251 .mChannelsPerFrame = in_format.mChannelsPerFrame,
253 UInt32 layout_size = sizeof(AudioChannelLayout) +
254 sizeof(AudioChannelDescription) * avctx->channels;
255 AudioChannelLayout *channel_layout = av_malloc(layout_size);
258 return AVERROR(ENOMEM);
260 if (avctx->codec_id == AV_CODEC_ID_ILBC) {
261 int mode = get_ilbc_mode(avctx);
262 out_format.mFramesPerPacket = 8000 * mode / 1000;
263 out_format.mBytesPerPacket = (mode == 20 ? 38 : 50);
266 status = AudioConverterNew(&in_format, &out_format, &at->converter);
269 av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
270 av_free(channel_layout);
271 return AVERROR_UNKNOWN;
274 if (!avctx->channel_layout)
275 avctx->channel_layout = av_get_default_channel_layout(avctx->channels);
277 if ((status = remap_layout(channel_layout, avctx->channel_layout, avctx->channels)) < 0) {
278 av_log(avctx, AV_LOG_ERROR, "Invalid channel layout\n");
279 av_free(channel_layout);
283 if (AudioConverterSetProperty(at->converter, kAudioConverterInputChannelLayout,
284 layout_size, channel_layout)) {
285 av_log(avctx, AV_LOG_ERROR, "Unsupported input channel layout\n");
286 av_free(channel_layout);
287 return AVERROR(EINVAL);
289 if (avctx->codec_id == AV_CODEC_ID_AAC) {
290 int tag = get_aac_tag(avctx->channel_layout);
292 channel_layout->mChannelLayoutTag = tag;
293 channel_layout->mNumberChannelDescriptions = 0;
296 if (AudioConverterSetProperty(at->converter, kAudioConverterOutputChannelLayout,
297 layout_size, channel_layout)) {
298 av_log(avctx, AV_LOG_ERROR, "Unsupported output channel layout\n");
299 av_free(channel_layout);
300 return AVERROR(EINVAL);
302 av_free(channel_layout);
304 if (avctx->bits_per_raw_sample)
305 AudioConverterSetProperty(at->converter,
306 kAudioConverterPropertyBitDepthHint,
307 sizeof(avctx->bits_per_raw_sample),
308 &avctx->bits_per_raw_sample);
310 #if !TARGET_OS_IPHONE
312 at->mode = (avctx->flags & AV_CODEC_FLAG_QSCALE) ?
313 kAudioCodecBitRateControlMode_Variable :
314 kAudioCodecBitRateControlMode_Constant;
316 AudioConverterSetProperty(at->converter, kAudioCodecPropertyBitRateControlMode,
317 sizeof(at->mode), &at->mode);
319 if (at->mode == kAudioCodecBitRateControlMode_Variable) {
320 int q = avctx->global_quality / FF_QP2LAMBDA;
321 if (q < 0 || q > 14) {
322 av_log(avctx, AV_LOG_WARNING,
323 "VBR quality %d out of range, should be 0-14\n", q);
324 q = av_clip(q, 0, 14);
327 AudioConverterSetProperty(at->converter, kAudioCodecPropertySoundQualityForVBR,
331 if (avctx->bit_rate > 0) {
332 UInt32 rate = avctx->bit_rate;
334 status = AudioConverterGetPropertyInfo(at->converter,
335 kAudioConverterApplicableEncodeBitRates,
337 if (!status && size) {
338 UInt32 new_rate = rate;
341 AudioValueRange *ranges = av_malloc(size);
343 return AVERROR(ENOMEM);
344 AudioConverterGetProperty(at->converter,
345 kAudioConverterApplicableEncodeBitRates,
347 count = size / sizeof(AudioValueRange);
348 for (i = 0; i < count; i++) {
349 AudioValueRange *range = &ranges[i];
350 if (rate >= range->mMinimum && rate <= range->mMaximum) {
353 } else if (rate > range->mMaximum) {
354 new_rate = range->mMaximum;
356 new_rate = range->mMinimum;
360 if (new_rate != rate) {
361 av_log(avctx, AV_LOG_WARNING,
362 "Bitrate %u not allowed; changing to %u\n", rate, new_rate);
367 AudioConverterSetProperty(at->converter, kAudioConverterEncodeBitRate,
368 sizeof(rate), &rate);
371 at->quality = 96 - at->quality * 32;
372 AudioConverterSetProperty(at->converter, kAudioConverterCodecQuality,
373 sizeof(at->quality), &at->quality);
375 if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterCompressionMagicCookie,
376 &avctx->extradata_size, NULL) &&
377 avctx->extradata_size) {
378 int extradata_size = avctx->extradata_size;
380 if (!(avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE)))
381 return AVERROR(ENOMEM);
382 if (avctx->codec_id == AV_CODEC_ID_ALAC) {
383 avctx->extradata_size = 0x24;
384 AV_WB32(avctx->extradata, 0x24);
385 AV_WB32(avctx->extradata + 4, MKBETAG('a','l','a','c'));
386 extradata = avctx->extradata + 12;
387 avctx->extradata_size = 0x24;
389 extradata = avctx->extradata;
391 status = AudioConverterGetProperty(at->converter,
392 kAudioConverterCompressionMagicCookie,
393 &extradata_size, extradata);
395 av_log(avctx, AV_LOG_ERROR, "AudioToolbox cookie error: %i\n", (int)status);
396 return AVERROR_UNKNOWN;
397 } else if (avctx->codec_id == AV_CODEC_ID_AAC) {
400 bytestream2_init(&gb, extradata, extradata_size);
402 len = read_descr(&gb, &tag);
403 if (tag == MP4DecConfigDescrTag) {
404 bytestream2_skip(&gb, 13);
405 len = read_descr(&gb, &tag);
406 if (tag == MP4DecSpecificDescrTag) {
407 len = FFMIN(gb.buffer_end - gb.buffer, len);
408 memmove(extradata, gb.buffer, len);
409 avctx->extradata_size = len;
412 } else if (tag == MP4ESDescrTag) {
414 bytestream2_skip(&gb, 2);
415 flags = bytestream2_get_byte(&gb);
416 if (flags & 0x80) //streamDependenceFlag
417 bytestream2_skip(&gb, 2);
418 if (flags & 0x40) //URL_Flag
419 bytestream2_skip(&gb, bytestream2_get_byte(&gb));
420 if (flags & 0x20) //OCRstreamFlag
421 bytestream2_skip(&gb, 2);
423 } while (bytestream2_get_bytes_left(&gb));
424 } else if (avctx->codec_id != AV_CODEC_ID_ALAC) {
425 avctx->extradata_size = extradata_size;
429 ffat_update_ctx(avctx);
431 #if !TARGET_OS_IPHONE && defined(__MAC_10_9)
432 if (at->mode == kAudioCodecBitRateControlMode_Variable && avctx->rc_max_rate) {
433 UInt32 max_size = avctx->rc_max_rate * avctx->frame_size / avctx->sample_rate;
435 AudioConverterSetProperty(at->converter, kAudioCodecPropertyPacketSizeLimitForVBR,
436 sizeof(max_size), &max_size);
440 ff_af_queue_init(avctx, &at->afq);
445 static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_packets,
446 AudioBufferList *data,
447 AudioStreamPacketDescription **packets,
450 AVCodecContext *avctx = inctx;
451 ATDecodeContext *at = avctx->priv_data;
458 av_frame_unref(&at->in_frame);
459 av_frame_move_ref(&at->in_frame, &at->new_in_frame);
461 if (!at->in_frame.data[0]) {
466 data->mNumberBuffers = 1;
467 data->mBuffers[0].mNumberChannels = avctx->channels;
468 data->mBuffers[0].mDataByteSize = at->in_frame.nb_samples *
469 av_get_bytes_per_sample(avctx->sample_fmt) *
471 data->mBuffers[0].mData = at->in_frame.data[0];
472 if (*nb_packets > at->in_frame.nb_samples)
473 *nb_packets = at->in_frame.nb_samples;
478 static int ffat_encode(AVCodecContext *avctx, AVPacket *avpkt,
479 const AVFrame *frame, int *got_packet_ptr)
481 ATDecodeContext *at = avctx->priv_data;
484 AudioBufferList out_buffers = {
488 .mNumberChannels = avctx->channels,
489 .mDataByteSize = at->pkt_size,
493 AudioStreamPacketDescription out_pkt_desc = {0};
495 if ((ret = ff_alloc_packet2(avctx, avpkt, at->pkt_size, 0)) < 0)
498 av_frame_unref(&at->new_in_frame);
501 if ((ret = ff_af_queue_add(&at->afq, frame)) < 0)
503 if ((ret = av_frame_ref(&at->new_in_frame, frame)) < 0)
509 out_buffers.mBuffers[0].mData = avpkt->data;
511 *got_packet_ptr = avctx->frame_size / at->frame_size;
513 ret = AudioConverterFillComplexBuffer(at->converter, ffat_encode_callback, avctx,
514 got_packet_ptr, &out_buffers,
515 (avctx->frame_size > at->frame_size) ? NULL : &out_pkt_desc);
516 if ((!ret || ret == 1) && *got_packet_ptr) {
517 avpkt->size = out_buffers.mBuffers[0].mDataByteSize;
518 ff_af_queue_remove(&at->afq, out_pkt_desc.mVariableFramesInPacket ?
519 out_pkt_desc.mVariableFramesInPacket :
523 } else if (ret && ret != 1) {
524 av_log(avctx, AV_LOG_WARNING, "Encode error: %i\n", ret);
530 static av_cold void ffat_encode_flush(AVCodecContext *avctx)
532 ATDecodeContext *at = avctx->priv_data;
533 AudioConverterReset(at->converter);
534 av_frame_unref(&at->new_in_frame);
535 av_frame_unref(&at->in_frame);
538 static av_cold int ffat_close_encoder(AVCodecContext *avctx)
540 ATDecodeContext *at = avctx->priv_data;
541 AudioConverterDispose(at->converter);
542 av_frame_unref(&at->new_in_frame);
543 av_frame_unref(&at->in_frame);
544 ff_af_queue_close(&at->afq);
548 static const AVProfile aac_profiles[] = {
549 { FF_PROFILE_AAC_LOW, "LC" },
550 { FF_PROFILE_AAC_HE, "HE-AAC" },
551 { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
552 { FF_PROFILE_AAC_LD, "LD" },
553 { FF_PROFILE_AAC_ELD, "ELD" },
554 { FF_PROFILE_UNKNOWN },
557 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
558 static const AVOption options[] = {
559 #if !TARGET_OS_IPHONE
560 {"aac_at_mode", "ratecontrol mode", offsetof(ATDecodeContext, mode), AV_OPT_TYPE_INT, {.i64 = -1}, -1, kAudioCodecBitRateControlMode_Variable, AE, "mode"},
561 {"auto", "VBR if global quality is given; CBR otherwise", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, INT_MIN, INT_MAX, AE, "mode"},
562 {"cbr", "constant bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Constant}, INT_MIN, INT_MAX, AE, "mode"},
563 {"abr", "long-term average bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_LongTermAverage}, INT_MIN, INT_MAX, AE, "mode"},
564 {"cvbr", "constrained variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_VariableConstrained}, INT_MIN, INT_MAX, AE, "mode"},
565 {"vbr" , "variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Variable}, INT_MIN, INT_MAX, AE, "mode"},
567 {"aac_at_quality", "quality vs speed control", offsetof(ATDecodeContext, quality), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, AE},
571 #define FFAT_ENC_CLASS(NAME) \
572 static const AVClass ffat_##NAME##_enc_class = { \
573 .class_name = "at_" #NAME "_enc", \
574 .item_name = av_default_item_name, \
576 .version = LIBAVUTIL_VERSION_INT, \
579 #define FFAT_ENC(NAME, ID, PROFILES, ...) \
580 FFAT_ENC_CLASS(NAME) \
581 AVCodec ff_##NAME##_at_encoder = { \
582 .name = #NAME "_at", \
583 .long_name = NULL_IF_CONFIG_SMALL(#NAME " (AudioToolbox)"), \
584 .type = AVMEDIA_TYPE_AUDIO, \
586 .priv_data_size = sizeof(ATDecodeContext), \
587 .init = ffat_init_encoder, \
588 .close = ffat_close_encoder, \
589 .encode2 = ffat_encode, \
590 .flush = ffat_encode_flush, \
591 .priv_class = &ffat_##NAME##_enc_class, \
592 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY __VA_ARGS__, \
593 .sample_fmts = (const enum AVSampleFormat[]) { \
595 AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_NONE \
597 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE, \
598 .profiles = PROFILES, \
601 static const uint64_t aac_at_channel_layouts[] = {
604 AV_CH_LAYOUT_SURROUND,
605 AV_CH_LAYOUT_4POINT0,
606 AV_CH_LAYOUT_5POINT0,
607 AV_CH_LAYOUT_5POINT1,
608 AV_CH_LAYOUT_6POINT0,
609 AV_CH_LAYOUT_6POINT1,
610 AV_CH_LAYOUT_7POINT0,
611 AV_CH_LAYOUT_7POINT1_WIDE_BACK,
613 AV_CH_LAYOUT_OCTAGONAL,
617 FFAT_ENC(aac, AV_CODEC_ID_AAC, aac_profiles, , .channel_layouts = aac_at_channel_layouts)
618 //FFAT_ENC(adpcm_ima_qt, AV_CODEC_ID_ADPCM_IMA_QT, NULL)
619 FFAT_ENC(alac, AV_CODEC_ID_ALAC, NULL, | AV_CODEC_CAP_VARIABLE_FRAME_SIZE | AV_CODEC_CAP_LOSSLESS)
620 FFAT_ENC(ilbc, AV_CODEC_ID_ILBC, NULL)
621 FFAT_ENC(pcm_alaw, AV_CODEC_ID_PCM_ALAW, NULL)
622 FFAT_ENC(pcm_mulaw, AV_CODEC_ID_PCM_MULAW, NULL)