2 * Audio Toolbox system codecs
4 * copyright (c) 2016 Rodger Combs
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <AudioToolbox/AudioToolbox.h>
25 #define FF_BUFQUEUE_SIZE 256
26 #include "libavfilter/bufferqueue.h"
29 #include "audio_frame_queue.h"
31 #include "bytestream.h"
33 #include "libavformat/isom.h"
34 #include "libavutil/avassert.h"
35 #include "libavutil/opt.h"
36 #include "libavutil/log.h"
38 typedef struct ATDecodeContext {
43 AudioConverterRef converter;
44 struct FFBufQueue frame_queue;
45 struct FFBufQueue used_frame_queue;
53 static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
58 case FF_PROFILE_AAC_LOW:
60 return kAudioFormatMPEG4AAC;
61 case FF_PROFILE_AAC_HE:
62 return kAudioFormatMPEG4AAC_HE;
63 case FF_PROFILE_AAC_HE_V2:
64 return kAudioFormatMPEG4AAC_HE_V2;
65 case FF_PROFILE_AAC_LD:
66 return kAudioFormatMPEG4AAC_LD;
67 case FF_PROFILE_AAC_ELD:
68 return kAudioFormatMPEG4AAC_ELD;
70 case AV_CODEC_ID_ADPCM_IMA_QT:
71 return kAudioFormatAppleIMA4;
72 case AV_CODEC_ID_ALAC:
73 return kAudioFormatAppleLossless;
74 case AV_CODEC_ID_ILBC:
75 return kAudioFormatiLBC;
76 case AV_CODEC_ID_PCM_ALAW:
77 return kAudioFormatALaw;
78 case AV_CODEC_ID_PCM_MULAW:
79 return kAudioFormatULaw;
81 av_assert0(!"Invalid codec ID!");
86 static void ffat_update_ctx(AVCodecContext *avctx)
88 ATDecodeContext *at = avctx->priv_data;
89 UInt32 size = sizeof(unsigned);
90 AudioConverterPrimeInfo prime_info;
91 AudioStreamBasicDescription out_format;
93 AudioConverterGetProperty(at->converter,
94 kAudioConverterPropertyMaximumOutputPacketSize,
95 &size, &at->pkt_size);
97 if (at->pkt_size <= 0)
98 at->pkt_size = 1024 * 50;
100 size = sizeof(prime_info);
102 if (!AudioConverterGetProperty(at->converter,
103 kAudioConverterPrimeInfo,
104 &size, &prime_info)) {
105 avctx->initial_padding = prime_info.leadingFrames;
108 size = sizeof(out_format);
109 if (!AudioConverterGetProperty(at->converter,
110 kAudioConverterCurrentOutputStreamDescription,
111 &size, &out_format)) {
112 if (out_format.mFramesPerPacket)
113 avctx->frame_size = out_format.mFramesPerPacket;
114 if (out_format.mBytesPerPacket && avctx->codec_id == AV_CODEC_ID_ILBC)
115 avctx->block_align = out_format.mBytesPerPacket;
118 at->frame_size = avctx->frame_size;
119 if (avctx->codec_id == AV_CODEC_ID_PCM_MULAW ||
120 avctx->codec_id == AV_CODEC_ID_PCM_ALAW) {
121 at->pkt_size *= 1024;
122 avctx->frame_size *= 1024;
126 static int read_descr(GetByteContext *gb, int *tag)
130 *tag = bytestream2_get_byte(gb);
132 int c = bytestream2_get_byte(gb);
133 len = (len << 7) | (c & 0x7f);
140 static int get_ilbc_mode(AVCodecContext *avctx)
142 if (avctx->block_align == 38)
144 else if (avctx->block_align == 50)
146 else if (avctx->bit_rate > 0)
147 return avctx->bit_rate <= 14000 ? 30 : 20;
152 static av_cold int get_channel_label(int channel)
154 uint64_t map = 1 << channel;
155 if (map <= AV_CH_LOW_FREQUENCY)
157 else if (map <= AV_CH_BACK_RIGHT)
159 else if (map <= AV_CH_BACK_CENTER)
161 else if (map <= AV_CH_SIDE_RIGHT)
163 else if (map <= AV_CH_TOP_BACK_RIGHT)
165 else if (map <= AV_CH_STEREO_RIGHT)
167 else if (map <= AV_CH_WIDE_RIGHT)
169 else if (map <= AV_CH_SURROUND_DIRECT_RIGHT)
171 else if (map == AV_CH_LOW_FREQUENCY_2)
172 return kAudioChannelLabel_LFE2;
177 static int remap_layout(AudioChannelLayout *layout, uint64_t in_layout, int count)
181 layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
182 layout->mNumberChannelDescriptions = count;
183 for (i = 0; i < count; i++) {
185 while (!(in_layout & (1 << c)) && c < 64)
188 return AVERROR(EINVAL); // This should never happen
189 label = get_channel_label(c);
190 layout->mChannelDescriptions[i].mChannelLabel = label;
192 return AVERROR(EINVAL);
198 static int get_aac_tag(uint64_t in_layout)
201 case AV_CH_LAYOUT_MONO:
202 return kAudioChannelLayoutTag_Mono;
203 case AV_CH_LAYOUT_STEREO:
204 return kAudioChannelLayoutTag_Stereo;
205 case AV_CH_LAYOUT_QUAD:
206 return kAudioChannelLayoutTag_AAC_Quadraphonic;
207 case AV_CH_LAYOUT_OCTAGONAL:
208 return kAudioChannelLayoutTag_AAC_Octagonal;
209 case AV_CH_LAYOUT_SURROUND:
210 return kAudioChannelLayoutTag_AAC_3_0;
211 case AV_CH_LAYOUT_4POINT0:
212 return kAudioChannelLayoutTag_AAC_4_0;
213 case AV_CH_LAYOUT_5POINT0:
214 return kAudioChannelLayoutTag_AAC_5_0;
215 case AV_CH_LAYOUT_5POINT1:
216 return kAudioChannelLayoutTag_AAC_5_1;
217 case AV_CH_LAYOUT_6POINT0:
218 return kAudioChannelLayoutTag_AAC_6_0;
219 case AV_CH_LAYOUT_6POINT1:
220 return kAudioChannelLayoutTag_AAC_6_1;
221 case AV_CH_LAYOUT_7POINT0:
222 return kAudioChannelLayoutTag_AAC_7_0;
223 case AV_CH_LAYOUT_7POINT1_WIDE_BACK:
224 return kAudioChannelLayoutTag_AAC_7_1;
225 case AV_CH_LAYOUT_7POINT1:
226 return kAudioChannelLayoutTag_MPEG_7_1_C;
232 static av_cold int ffat_init_encoder(AVCodecContext *avctx)
234 ATDecodeContext *at = avctx->priv_data;
237 AudioStreamBasicDescription in_format = {
238 .mSampleRate = avctx->sample_rate,
239 .mFormatID = kAudioFormatLinearPCM,
240 .mFormatFlags = ((avctx->sample_fmt == AV_SAMPLE_FMT_FLT ||
241 avctx->sample_fmt == AV_SAMPLE_FMT_DBL) ? kAudioFormatFlagIsFloat
242 : avctx->sample_fmt == AV_SAMPLE_FMT_U8 ? 0
243 : kAudioFormatFlagIsSignedInteger)
244 | kAudioFormatFlagIsPacked,
245 .mBytesPerPacket = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
246 .mFramesPerPacket = 1,
247 .mBytesPerFrame = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
248 .mChannelsPerFrame = avctx->channels,
249 .mBitsPerChannel = av_get_bytes_per_sample(avctx->sample_fmt) * 8,
251 AudioStreamBasicDescription out_format = {
252 .mSampleRate = avctx->sample_rate,
253 .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
254 .mChannelsPerFrame = in_format.mChannelsPerFrame,
256 UInt32 layout_size = sizeof(AudioChannelLayout) +
257 sizeof(AudioChannelDescription) * avctx->channels;
258 AudioChannelLayout *channel_layout = av_malloc(layout_size);
261 return AVERROR(ENOMEM);
263 if (avctx->codec_id == AV_CODEC_ID_ILBC) {
264 int mode = get_ilbc_mode(avctx);
265 out_format.mFramesPerPacket = 8000 * mode / 1000;
266 out_format.mBytesPerPacket = (mode == 20 ? 38 : 50);
269 status = AudioConverterNew(&in_format, &out_format, &at->converter);
272 av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
273 av_free(channel_layout);
274 return AVERROR_UNKNOWN;
277 if (!avctx->channel_layout)
278 avctx->channel_layout = av_get_default_channel_layout(avctx->channels);
280 if ((status = remap_layout(channel_layout, avctx->channel_layout, avctx->channels)) < 0) {
281 av_log(avctx, AV_LOG_ERROR, "Invalid channel layout\n");
282 av_free(channel_layout);
286 if (AudioConverterSetProperty(at->converter, kAudioConverterInputChannelLayout,
287 layout_size, channel_layout)) {
288 av_log(avctx, AV_LOG_ERROR, "Unsupported input channel layout\n");
289 av_free(channel_layout);
290 return AVERROR(EINVAL);
292 if (avctx->codec_id == AV_CODEC_ID_AAC) {
293 int tag = get_aac_tag(avctx->channel_layout);
295 channel_layout->mChannelLayoutTag = tag;
296 channel_layout->mNumberChannelDescriptions = 0;
299 if (AudioConverterSetProperty(at->converter, kAudioConverterOutputChannelLayout,
300 layout_size, channel_layout)) {
301 av_log(avctx, AV_LOG_ERROR, "Unsupported output channel layout\n");
302 av_free(channel_layout);
303 return AVERROR(EINVAL);
305 av_free(channel_layout);
307 if (avctx->bits_per_raw_sample)
308 AudioConverterSetProperty(at->converter,
309 kAudioConverterPropertyBitDepthHint,
310 sizeof(avctx->bits_per_raw_sample),
311 &avctx->bits_per_raw_sample);
313 #if !TARGET_OS_IPHONE
315 at->mode = (avctx->flags & AV_CODEC_FLAG_QSCALE) ?
316 kAudioCodecBitRateControlMode_Variable :
317 kAudioCodecBitRateControlMode_Constant;
319 AudioConverterSetProperty(at->converter, kAudioCodecPropertyBitRateControlMode,
320 sizeof(at->mode), &at->mode);
322 if (at->mode == kAudioCodecBitRateControlMode_Variable) {
323 int q = avctx->global_quality / FF_QP2LAMBDA;
324 if (q < 0 || q > 14) {
325 av_log(avctx, AV_LOG_WARNING,
326 "VBR quality %d out of range, should be 0-14\n", q);
327 q = av_clip(q, 0, 14);
330 AudioConverterSetProperty(at->converter, kAudioCodecPropertySoundQualityForVBR,
334 if (avctx->bit_rate > 0) {
335 UInt32 rate = avctx->bit_rate;
337 status = AudioConverterGetPropertyInfo(at->converter,
338 kAudioConverterApplicableEncodeBitRates,
340 if (!status && size) {
341 UInt32 new_rate = rate;
344 AudioValueRange *ranges = av_malloc(size);
346 return AVERROR(ENOMEM);
347 AudioConverterGetProperty(at->converter,
348 kAudioConverterApplicableEncodeBitRates,
350 count = size / sizeof(AudioValueRange);
351 for (i = 0; i < count; i++) {
352 AudioValueRange *range = &ranges[i];
353 if (rate >= range->mMinimum && rate <= range->mMaximum) {
356 } else if (rate > range->mMaximum) {
357 new_rate = range->mMaximum;
359 new_rate = range->mMinimum;
363 if (new_rate != rate) {
364 av_log(avctx, AV_LOG_WARNING,
365 "Bitrate %u not allowed; changing to %u\n", rate, new_rate);
370 AudioConverterSetProperty(at->converter, kAudioConverterEncodeBitRate,
371 sizeof(rate), &rate);
374 at->quality = 96 - at->quality * 32;
375 AudioConverterSetProperty(at->converter, kAudioConverterCodecQuality,
376 sizeof(at->quality), &at->quality);
378 if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterCompressionMagicCookie,
379 &avctx->extradata_size, NULL) &&
380 avctx->extradata_size) {
381 int extradata_size = avctx->extradata_size;
383 if (!(avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE)))
384 return AVERROR(ENOMEM);
385 if (avctx->codec_id == AV_CODEC_ID_ALAC) {
386 avctx->extradata_size = 0x24;
387 AV_WB32(avctx->extradata, 0x24);
388 AV_WB32(avctx->extradata + 4, MKBETAG('a','l','a','c'));
389 extradata = avctx->extradata + 12;
390 avctx->extradata_size = 0x24;
392 extradata = avctx->extradata;
394 status = AudioConverterGetProperty(at->converter,
395 kAudioConverterCompressionMagicCookie,
396 &extradata_size, extradata);
398 av_log(avctx, AV_LOG_ERROR, "AudioToolbox cookie error: %i\n", (int)status);
399 return AVERROR_UNKNOWN;
400 } else if (avctx->codec_id == AV_CODEC_ID_AAC) {
403 bytestream2_init(&gb, extradata, extradata_size);
405 len = read_descr(&gb, &tag);
406 if (tag == MP4DecConfigDescrTag) {
407 bytestream2_skip(&gb, 13);
408 len = read_descr(&gb, &tag);
409 if (tag == MP4DecSpecificDescrTag) {
410 len = FFMIN(gb.buffer_end - gb.buffer, len);
411 memmove(extradata, gb.buffer, len);
412 avctx->extradata_size = len;
415 } else if (tag == MP4ESDescrTag) {
417 bytestream2_skip(&gb, 2);
418 flags = bytestream2_get_byte(&gb);
419 if (flags & 0x80) //streamDependenceFlag
420 bytestream2_skip(&gb, 2);
421 if (flags & 0x40) //URL_Flag
422 bytestream2_skip(&gb, bytestream2_get_byte(&gb));
423 if (flags & 0x20) //OCRstreamFlag
424 bytestream2_skip(&gb, 2);
426 } while (bytestream2_get_bytes_left(&gb));
427 } else if (avctx->codec_id != AV_CODEC_ID_ALAC) {
428 avctx->extradata_size = extradata_size;
432 ffat_update_ctx(avctx);
434 #if !TARGET_OS_IPHONE && defined(__MAC_10_9)
435 if (at->mode == kAudioCodecBitRateControlMode_Variable && avctx->rc_max_rate) {
436 UInt32 max_size = avctx->rc_max_rate * avctx->frame_size / avctx->sample_rate;
438 AudioConverterSetProperty(at->converter, kAudioCodecPropertyPacketSizeLimitForVBR,
439 sizeof(max_size), &max_size);
443 ff_af_queue_init(avctx, &at->afq);
448 static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_packets,
449 AudioBufferList *data,
450 AudioStreamPacketDescription **packets,
453 AVCodecContext *avctx = inctx;
454 ATDecodeContext *at = avctx->priv_data;
457 if (!at->frame_queue.available) {
467 frame = ff_bufqueue_get(&at->frame_queue);
469 data->mNumberBuffers = 1;
470 data->mBuffers[0].mNumberChannels = avctx->channels;
471 data->mBuffers[0].mDataByteSize = frame->nb_samples *
472 av_get_bytes_per_sample(avctx->sample_fmt) *
474 data->mBuffers[0].mData = frame->data[0];
475 if (*nb_packets > frame->nb_samples)
476 *nb_packets = frame->nb_samples;
478 ff_bufqueue_add(avctx, &at->used_frame_queue, frame);
483 static int ffat_encode(AVCodecContext *avctx, AVPacket *avpkt,
484 const AVFrame *frame, int *got_packet_ptr)
486 ATDecodeContext *at = avctx->priv_data;
489 AudioBufferList out_buffers = {
493 .mNumberChannels = avctx->channels,
494 .mDataByteSize = at->pkt_size,
498 AudioStreamPacketDescription out_pkt_desc = {0};
503 if (ff_bufqueue_is_full(&at->frame_queue)) {
505 * The frame queue is significantly larger than needed in practice,
506 * but no clear way to determine the minimum number of samples to
507 * get output from AudioConverterFillComplexBuffer().
509 av_log(avctx, AV_LOG_ERROR, "Bug: frame queue is too small.\n");
513 if ((ret = ff_af_queue_add(&at->afq, frame)) < 0)
516 in_frame = av_frame_clone(frame);
518 return AVERROR(ENOMEM);
520 ff_bufqueue_add(avctx, &at->frame_queue, in_frame);
525 if ((ret = ff_alloc_packet2(avctx, avpkt, at->pkt_size, 0)) < 0)
529 out_buffers.mBuffers[0].mData = avpkt->data;
531 *got_packet_ptr = avctx->frame_size / at->frame_size;
533 ret = AudioConverterFillComplexBuffer(at->converter, ffat_encode_callback, avctx,
534 got_packet_ptr, &out_buffers,
535 (avctx->frame_size > at->frame_size) ? NULL : &out_pkt_desc);
537 ff_bufqueue_discard_all(&at->used_frame_queue);
539 if ((!ret || ret == 1) && *got_packet_ptr) {
540 avpkt->size = out_buffers.mBuffers[0].mDataByteSize;
541 ff_af_queue_remove(&at->afq, out_pkt_desc.mVariableFramesInPacket ?
542 out_pkt_desc.mVariableFramesInPacket :
546 } else if (ret && ret != 1) {
547 av_log(avctx, AV_LOG_WARNING, "Encode error: %i\n", ret);
553 static av_cold void ffat_encode_flush(AVCodecContext *avctx)
555 ATDecodeContext *at = avctx->priv_data;
556 AudioConverterReset(at->converter);
557 ff_bufqueue_discard_all(&at->frame_queue);
558 ff_bufqueue_discard_all(&at->used_frame_queue);
561 static av_cold int ffat_close_encoder(AVCodecContext *avctx)
563 ATDecodeContext *at = avctx->priv_data;
564 AudioConverterDispose(at->converter);
565 ff_bufqueue_discard_all(&at->frame_queue);
566 ff_bufqueue_discard_all(&at->used_frame_queue);
567 ff_af_queue_close(&at->afq);
571 static const AVProfile aac_profiles[] = {
572 { FF_PROFILE_AAC_LOW, "LC" },
573 { FF_PROFILE_AAC_HE, "HE-AAC" },
574 { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
575 { FF_PROFILE_AAC_LD, "LD" },
576 { FF_PROFILE_AAC_ELD, "ELD" },
577 { FF_PROFILE_UNKNOWN },
580 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
581 static const AVOption options[] = {
582 #if !TARGET_OS_IPHONE
583 {"aac_at_mode", "ratecontrol mode", offsetof(ATDecodeContext, mode), AV_OPT_TYPE_INT, {.i64 = -1}, -1, kAudioCodecBitRateControlMode_Variable, AE, "mode"},
584 {"auto", "VBR if global quality is given; CBR otherwise", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, INT_MIN, INT_MAX, AE, "mode"},
585 {"cbr", "constant bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Constant}, INT_MIN, INT_MAX, AE, "mode"},
586 {"abr", "long-term average bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_LongTermAverage}, INT_MIN, INT_MAX, AE, "mode"},
587 {"cvbr", "constrained variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_VariableConstrained}, INT_MIN, INT_MAX, AE, "mode"},
588 {"vbr" , "variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Variable}, INT_MIN, INT_MAX, AE, "mode"},
590 {"aac_at_quality", "quality vs speed control", offsetof(ATDecodeContext, quality), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, AE},
594 #define FFAT_ENC_CLASS(NAME) \
595 static const AVClass ffat_##NAME##_enc_class = { \
596 .class_name = "at_" #NAME "_enc", \
597 .item_name = av_default_item_name, \
599 .version = LIBAVUTIL_VERSION_INT, \
602 #define FFAT_ENC(NAME, ID, PROFILES, ...) \
603 FFAT_ENC_CLASS(NAME) \
604 AVCodec ff_##NAME##_at_encoder = { \
605 .name = #NAME "_at", \
606 .long_name = NULL_IF_CONFIG_SMALL(#NAME " (AudioToolbox)"), \
607 .type = AVMEDIA_TYPE_AUDIO, \
609 .priv_data_size = sizeof(ATDecodeContext), \
610 .init = ffat_init_encoder, \
611 .close = ffat_close_encoder, \
612 .encode2 = ffat_encode, \
613 .flush = ffat_encode_flush, \
614 .priv_class = &ffat_##NAME##_enc_class, \
615 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY __VA_ARGS__, \
616 .sample_fmts = (const enum AVSampleFormat[]) { \
618 AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_NONE \
620 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE, \
621 .profiles = PROFILES, \
624 static const uint64_t aac_at_channel_layouts[] = {
627 AV_CH_LAYOUT_SURROUND,
628 AV_CH_LAYOUT_4POINT0,
629 AV_CH_LAYOUT_5POINT0,
630 AV_CH_LAYOUT_5POINT1,
631 AV_CH_LAYOUT_6POINT0,
632 AV_CH_LAYOUT_6POINT1,
633 AV_CH_LAYOUT_7POINT0,
634 AV_CH_LAYOUT_7POINT1_WIDE_BACK,
636 AV_CH_LAYOUT_OCTAGONAL,
640 FFAT_ENC(aac, AV_CODEC_ID_AAC, aac_profiles, , .channel_layouts = aac_at_channel_layouts)
641 //FFAT_ENC(adpcm_ima_qt, AV_CODEC_ID_ADPCM_IMA_QT, NULL)
642 FFAT_ENC(alac, AV_CODEC_ID_ALAC, NULL, | AV_CODEC_CAP_VARIABLE_FRAME_SIZE | AV_CODEC_CAP_LOSSLESS)
643 FFAT_ENC(ilbc, AV_CODEC_ID_ILBC, NULL)
644 FFAT_ENC(pcm_alaw, AV_CODEC_ID_PCM_ALAW, NULL)
645 FFAT_ENC(pcm_mulaw, AV_CODEC_ID_PCM_MULAW, NULL)