2 * Audio Toolbox system codecs
4 * copyright (c) 2016 rcombs
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <AudioToolbox/AudioToolbox.h>
25 #define FF_BUFQUEUE_SIZE 256
26 #include "libavfilter/bufferqueue.h"
29 #include "audio_frame_queue.h"
31 #include "bytestream.h"
33 #include "libavformat/isom.h"
34 #include "libavutil/avassert.h"
35 #include "libavutil/opt.h"
36 #include "libavutil/log.h"
38 typedef struct ATDecodeContext {
43 AudioConverterRef converter;
44 struct FFBufQueue frame_queue;
45 struct FFBufQueue used_frame_queue;
52 AVFrame* encoding_frame;
55 static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
60 case FF_PROFILE_AAC_LOW:
62 return kAudioFormatMPEG4AAC;
63 case FF_PROFILE_AAC_HE:
64 return kAudioFormatMPEG4AAC_HE;
65 case FF_PROFILE_AAC_HE_V2:
66 return kAudioFormatMPEG4AAC_HE_V2;
67 case FF_PROFILE_AAC_LD:
68 return kAudioFormatMPEG4AAC_LD;
69 case FF_PROFILE_AAC_ELD:
70 return kAudioFormatMPEG4AAC_ELD;
72 case AV_CODEC_ID_ADPCM_IMA_QT:
73 return kAudioFormatAppleIMA4;
74 case AV_CODEC_ID_ALAC:
75 return kAudioFormatAppleLossless;
76 case AV_CODEC_ID_ILBC:
77 return kAudioFormatiLBC;
78 case AV_CODEC_ID_PCM_ALAW:
79 return kAudioFormatALaw;
80 case AV_CODEC_ID_PCM_MULAW:
81 return kAudioFormatULaw;
83 av_assert0(!"Invalid codec ID!");
88 static void ffat_update_ctx(AVCodecContext *avctx)
90 ATDecodeContext *at = avctx->priv_data;
91 UInt32 size = sizeof(unsigned);
92 AudioConverterPrimeInfo prime_info;
93 AudioStreamBasicDescription out_format;
95 AudioConverterGetProperty(at->converter,
96 kAudioConverterPropertyMaximumOutputPacketSize,
97 &size, &at->pkt_size);
99 if (at->pkt_size <= 0)
100 at->pkt_size = 1024 * 50;
102 size = sizeof(prime_info);
104 if (!AudioConverterGetProperty(at->converter,
105 kAudioConverterPrimeInfo,
106 &size, &prime_info)) {
107 avctx->initial_padding = prime_info.leadingFrames;
110 size = sizeof(out_format);
111 if (!AudioConverterGetProperty(at->converter,
112 kAudioConverterCurrentOutputStreamDescription,
113 &size, &out_format)) {
114 if (out_format.mFramesPerPacket)
115 avctx->frame_size = out_format.mFramesPerPacket;
116 if (out_format.mBytesPerPacket && avctx->codec_id == AV_CODEC_ID_ILBC)
117 avctx->block_align = out_format.mBytesPerPacket;
120 at->frame_size = avctx->frame_size;
121 if (avctx->codec_id == AV_CODEC_ID_PCM_MULAW ||
122 avctx->codec_id == AV_CODEC_ID_PCM_ALAW) {
123 at->pkt_size *= 1024;
124 avctx->frame_size *= 1024;
128 static int read_descr(GetByteContext *gb, int *tag)
132 *tag = bytestream2_get_byte(gb);
134 int c = bytestream2_get_byte(gb);
135 len = (len << 7) | (c & 0x7f);
142 static int get_ilbc_mode(AVCodecContext *avctx)
144 if (avctx->block_align == 38)
146 else if (avctx->block_align == 50)
148 else if (avctx->bit_rate > 0)
149 return avctx->bit_rate <= 14000 ? 30 : 20;
154 static av_cold int get_channel_label(int channel)
156 uint64_t map = 1 << channel;
157 if (map <= AV_CH_LOW_FREQUENCY)
159 else if (map <= AV_CH_BACK_RIGHT)
161 else if (map <= AV_CH_BACK_CENTER)
163 else if (map <= AV_CH_SIDE_RIGHT)
165 else if (map <= AV_CH_TOP_BACK_RIGHT)
167 else if (map <= AV_CH_STEREO_RIGHT)
169 else if (map <= AV_CH_WIDE_RIGHT)
171 else if (map <= AV_CH_SURROUND_DIRECT_RIGHT)
173 else if (map == AV_CH_LOW_FREQUENCY_2)
174 return kAudioChannelLabel_LFE2;
179 static int remap_layout(AudioChannelLayout *layout, uint64_t in_layout, int count)
183 layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
184 layout->mNumberChannelDescriptions = count;
185 for (i = 0; i < count; i++) {
187 while (!(in_layout & (1 << c)) && c < 64)
190 return AVERROR(EINVAL); // This should never happen
191 label = get_channel_label(c);
192 layout->mChannelDescriptions[i].mChannelLabel = label;
194 return AVERROR(EINVAL);
200 static int get_aac_tag(uint64_t in_layout)
203 case AV_CH_LAYOUT_MONO:
204 return kAudioChannelLayoutTag_Mono;
205 case AV_CH_LAYOUT_STEREO:
206 return kAudioChannelLayoutTag_Stereo;
207 case AV_CH_LAYOUT_QUAD:
208 return kAudioChannelLayoutTag_AAC_Quadraphonic;
209 case AV_CH_LAYOUT_OCTAGONAL:
210 return kAudioChannelLayoutTag_AAC_Octagonal;
211 case AV_CH_LAYOUT_SURROUND:
212 return kAudioChannelLayoutTag_AAC_3_0;
213 case AV_CH_LAYOUT_4POINT0:
214 return kAudioChannelLayoutTag_AAC_4_0;
215 case AV_CH_LAYOUT_5POINT0:
216 return kAudioChannelLayoutTag_AAC_5_0;
217 case AV_CH_LAYOUT_5POINT1:
218 return kAudioChannelLayoutTag_AAC_5_1;
219 case AV_CH_LAYOUT_6POINT0:
220 return kAudioChannelLayoutTag_AAC_6_0;
221 case AV_CH_LAYOUT_6POINT1:
222 return kAudioChannelLayoutTag_AAC_6_1;
223 case AV_CH_LAYOUT_7POINT0:
224 return kAudioChannelLayoutTag_AAC_7_0;
225 case AV_CH_LAYOUT_7POINT1_WIDE_BACK:
226 return kAudioChannelLayoutTag_AAC_7_1;
227 case AV_CH_LAYOUT_7POINT1:
228 return kAudioChannelLayoutTag_MPEG_7_1_C;
234 static av_cold int ffat_init_encoder(AVCodecContext *avctx)
236 ATDecodeContext *at = avctx->priv_data;
239 AudioStreamBasicDescription in_format = {
240 .mSampleRate = avctx->sample_rate,
241 .mFormatID = kAudioFormatLinearPCM,
242 .mFormatFlags = ((avctx->sample_fmt == AV_SAMPLE_FMT_FLT ||
243 avctx->sample_fmt == AV_SAMPLE_FMT_DBL) ? kAudioFormatFlagIsFloat
244 : avctx->sample_fmt == AV_SAMPLE_FMT_U8 ? 0
245 : kAudioFormatFlagIsSignedInteger)
246 | kAudioFormatFlagIsPacked,
247 .mBytesPerPacket = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
248 .mFramesPerPacket = 1,
249 .mBytesPerFrame = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
250 .mChannelsPerFrame = avctx->channels,
251 .mBitsPerChannel = av_get_bytes_per_sample(avctx->sample_fmt) * 8,
253 AudioStreamBasicDescription out_format = {
254 .mSampleRate = avctx->sample_rate,
255 .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
256 .mChannelsPerFrame = in_format.mChannelsPerFrame,
258 UInt32 layout_size = sizeof(AudioChannelLayout) +
259 sizeof(AudioChannelDescription) * avctx->channels;
260 AudioChannelLayout *channel_layout = av_malloc(layout_size);
263 return AVERROR(ENOMEM);
265 if (avctx->codec_id == AV_CODEC_ID_ILBC) {
266 int mode = get_ilbc_mode(avctx);
267 out_format.mFramesPerPacket = 8000 * mode / 1000;
268 out_format.mBytesPerPacket = (mode == 20 ? 38 : 50);
271 status = AudioConverterNew(&in_format, &out_format, &at->converter);
274 av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
275 av_free(channel_layout);
276 return AVERROR_UNKNOWN;
279 if (!avctx->channel_layout)
280 avctx->channel_layout = av_get_default_channel_layout(avctx->channels);
282 if ((status = remap_layout(channel_layout, avctx->channel_layout, avctx->channels)) < 0) {
283 av_log(avctx, AV_LOG_ERROR, "Invalid channel layout\n");
284 av_free(channel_layout);
288 if (AudioConverterSetProperty(at->converter, kAudioConverterInputChannelLayout,
289 layout_size, channel_layout)) {
290 av_log(avctx, AV_LOG_ERROR, "Unsupported input channel layout\n");
291 av_free(channel_layout);
292 return AVERROR(EINVAL);
294 if (avctx->codec_id == AV_CODEC_ID_AAC) {
295 int tag = get_aac_tag(avctx->channel_layout);
297 channel_layout->mChannelLayoutTag = tag;
298 channel_layout->mNumberChannelDescriptions = 0;
301 if (AudioConverterSetProperty(at->converter, kAudioConverterOutputChannelLayout,
302 layout_size, channel_layout)) {
303 av_log(avctx, AV_LOG_ERROR, "Unsupported output channel layout\n");
304 av_free(channel_layout);
305 return AVERROR(EINVAL);
307 av_free(channel_layout);
309 if (avctx->bits_per_raw_sample)
310 AudioConverterSetProperty(at->converter,
311 kAudioConverterPropertyBitDepthHint,
312 sizeof(avctx->bits_per_raw_sample),
313 &avctx->bits_per_raw_sample);
315 #if !TARGET_OS_IPHONE
317 at->mode = (avctx->flags & AV_CODEC_FLAG_QSCALE) ?
318 kAudioCodecBitRateControlMode_Variable :
319 kAudioCodecBitRateControlMode_Constant;
321 AudioConverterSetProperty(at->converter, kAudioCodecPropertyBitRateControlMode,
322 sizeof(at->mode), &at->mode);
324 if (at->mode == kAudioCodecBitRateControlMode_Variable) {
325 int q = avctx->global_quality / FF_QP2LAMBDA;
326 if (q < 0 || q > 14) {
327 av_log(avctx, AV_LOG_WARNING,
328 "VBR quality %d out of range, should be 0-14\n", q);
329 q = av_clip(q, 0, 14);
332 AudioConverterSetProperty(at->converter, kAudioCodecPropertySoundQualityForVBR,
336 if (avctx->bit_rate > 0) {
337 UInt32 rate = avctx->bit_rate;
339 status = AudioConverterGetPropertyInfo(at->converter,
340 kAudioConverterApplicableEncodeBitRates,
342 if (!status && size) {
343 UInt32 new_rate = rate;
346 AudioValueRange *ranges = av_malloc(size);
348 return AVERROR(ENOMEM);
349 AudioConverterGetProperty(at->converter,
350 kAudioConverterApplicableEncodeBitRates,
352 count = size / sizeof(AudioValueRange);
353 for (i = 0; i < count; i++) {
354 AudioValueRange *range = &ranges[i];
355 if (rate >= range->mMinimum && rate <= range->mMaximum) {
358 } else if (rate > range->mMaximum) {
359 new_rate = range->mMaximum;
361 new_rate = range->mMinimum;
365 if (new_rate != rate) {
366 av_log(avctx, AV_LOG_WARNING,
367 "Bitrate %u not allowed; changing to %u\n", rate, new_rate);
372 AudioConverterSetProperty(at->converter, kAudioConverterEncodeBitRate,
373 sizeof(rate), &rate);
376 at->quality = 96 - at->quality * 32;
377 AudioConverterSetProperty(at->converter, kAudioConverterCodecQuality,
378 sizeof(at->quality), &at->quality);
380 if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterCompressionMagicCookie,
381 &avctx->extradata_size, NULL) &&
382 avctx->extradata_size) {
383 int extradata_size = avctx->extradata_size;
385 if (!(avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE)))
386 return AVERROR(ENOMEM);
387 if (avctx->codec_id == AV_CODEC_ID_ALAC) {
388 avctx->extradata_size = 0x24;
389 AV_WB32(avctx->extradata, 0x24);
390 AV_WB32(avctx->extradata + 4, MKBETAG('a','l','a','c'));
391 extradata = avctx->extradata + 12;
392 avctx->extradata_size = 0x24;
394 extradata = avctx->extradata;
396 status = AudioConverterGetProperty(at->converter,
397 kAudioConverterCompressionMagicCookie,
398 &extradata_size, extradata);
400 av_log(avctx, AV_LOG_ERROR, "AudioToolbox cookie error: %i\n", (int)status);
401 return AVERROR_UNKNOWN;
402 } else if (avctx->codec_id == AV_CODEC_ID_AAC) {
405 bytestream2_init(&gb, extradata, extradata_size);
407 len = read_descr(&gb, &tag);
408 if (tag == MP4DecConfigDescrTag) {
409 bytestream2_skip(&gb, 13);
410 len = read_descr(&gb, &tag);
411 if (tag == MP4DecSpecificDescrTag) {
412 len = FFMIN(gb.buffer_end - gb.buffer, len);
413 memmove(extradata, gb.buffer, len);
414 avctx->extradata_size = len;
417 } else if (tag == MP4ESDescrTag) {
419 bytestream2_skip(&gb, 2);
420 flags = bytestream2_get_byte(&gb);
421 if (flags & 0x80) //streamDependenceFlag
422 bytestream2_skip(&gb, 2);
423 if (flags & 0x40) //URL_Flag
424 bytestream2_skip(&gb, bytestream2_get_byte(&gb));
425 if (flags & 0x20) //OCRstreamFlag
426 bytestream2_skip(&gb, 2);
428 } while (bytestream2_get_bytes_left(&gb));
429 } else if (avctx->codec_id != AV_CODEC_ID_ALAC) {
430 avctx->extradata_size = extradata_size;
434 ffat_update_ctx(avctx);
436 #if !TARGET_OS_IPHONE && defined(__MAC_10_9)
437 if (at->mode == kAudioCodecBitRateControlMode_Variable && avctx->rc_max_rate) {
438 UInt32 max_size = avctx->rc_max_rate * avctx->frame_size / avctx->sample_rate;
440 AudioConverterSetProperty(at->converter, kAudioCodecPropertyPacketSizeLimitForVBR,
441 sizeof(max_size), &max_size);
445 ff_af_queue_init(avctx, &at->afq);
447 at->encoding_frame = av_frame_alloc();
448 if (!at->encoding_frame)
449 return AVERROR(ENOMEM);
454 static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_packets,
455 AudioBufferList *data,
456 AudioStreamPacketDescription **packets,
459 AVCodecContext *avctx = inctx;
460 ATDecodeContext *at = avctx->priv_data;
464 if (!at->frame_queue.available) {
474 frame = ff_bufqueue_get(&at->frame_queue);
476 data->mNumberBuffers = 1;
477 data->mBuffers[0].mNumberChannels = avctx->channels;
478 data->mBuffers[0].mDataByteSize = frame->nb_samples *
479 av_get_bytes_per_sample(avctx->sample_fmt) *
481 data->mBuffers[0].mData = frame->data[0];
482 if (*nb_packets > frame->nb_samples)
483 *nb_packets = frame->nb_samples;
485 av_frame_unref(at->encoding_frame);
486 ret = av_frame_ref(at->encoding_frame, frame);
492 ff_bufqueue_add(avctx, &at->used_frame_queue, frame);
497 static int ffat_encode(AVCodecContext *avctx, AVPacket *avpkt,
498 const AVFrame *frame, int *got_packet_ptr)
500 ATDecodeContext *at = avctx->priv_data;
503 AudioBufferList out_buffers = {
507 .mNumberChannels = avctx->channels,
508 .mDataByteSize = at->pkt_size,
512 AudioStreamPacketDescription out_pkt_desc = {0};
517 if (ff_bufqueue_is_full(&at->frame_queue)) {
519 * The frame queue is significantly larger than needed in practice,
520 * but no clear way to determine the minimum number of samples to
521 * get output from AudioConverterFillComplexBuffer().
523 av_log(avctx, AV_LOG_ERROR, "Bug: frame queue is too small.\n");
527 if ((ret = ff_af_queue_add(&at->afq, frame)) < 0)
530 in_frame = av_frame_clone(frame);
532 return AVERROR(ENOMEM);
534 ff_bufqueue_add(avctx, &at->frame_queue, in_frame);
539 if ((ret = ff_alloc_packet2(avctx, avpkt, at->pkt_size, 0)) < 0)
543 out_buffers.mBuffers[0].mData = avpkt->data;
545 *got_packet_ptr = avctx->frame_size / at->frame_size;
547 ret = AudioConverterFillComplexBuffer(at->converter, ffat_encode_callback, avctx,
548 got_packet_ptr, &out_buffers,
549 (avctx->frame_size > at->frame_size) ? NULL : &out_pkt_desc);
551 ff_bufqueue_discard_all(&at->used_frame_queue);
553 if ((!ret || ret == 1) && *got_packet_ptr) {
554 avpkt->size = out_buffers.mBuffers[0].mDataByteSize;
555 ff_af_queue_remove(&at->afq, out_pkt_desc.mVariableFramesInPacket ?
556 out_pkt_desc.mVariableFramesInPacket :
560 } else if (ret && ret != 1) {
561 av_log(avctx, AV_LOG_WARNING, "Encode error: %i\n", ret);
567 static av_cold void ffat_encode_flush(AVCodecContext *avctx)
569 ATDecodeContext *at = avctx->priv_data;
570 AudioConverterReset(at->converter);
571 ff_bufqueue_discard_all(&at->frame_queue);
572 ff_bufqueue_discard_all(&at->used_frame_queue);
575 static av_cold int ffat_close_encoder(AVCodecContext *avctx)
577 ATDecodeContext *at = avctx->priv_data;
578 AudioConverterDispose(at->converter);
579 ff_bufqueue_discard_all(&at->frame_queue);
580 ff_bufqueue_discard_all(&at->used_frame_queue);
581 ff_af_queue_close(&at->afq);
582 av_frame_free(&at->encoding_frame);
586 static const AVProfile aac_profiles[] = {
587 { FF_PROFILE_AAC_LOW, "LC" },
588 { FF_PROFILE_AAC_HE, "HE-AAC" },
589 { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
590 { FF_PROFILE_AAC_LD, "LD" },
591 { FF_PROFILE_AAC_ELD, "ELD" },
592 { FF_PROFILE_UNKNOWN },
595 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
596 static const AVOption options[] = {
597 #if !TARGET_OS_IPHONE
598 {"aac_at_mode", "ratecontrol mode", offsetof(ATDecodeContext, mode), AV_OPT_TYPE_INT, {.i64 = -1}, -1, kAudioCodecBitRateControlMode_Variable, AE, "mode"},
599 {"auto", "VBR if global quality is given; CBR otherwise", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, INT_MIN, INT_MAX, AE, "mode"},
600 {"cbr", "constant bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Constant}, INT_MIN, INT_MAX, AE, "mode"},
601 {"abr", "long-term average bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_LongTermAverage}, INT_MIN, INT_MAX, AE, "mode"},
602 {"cvbr", "constrained variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_VariableConstrained}, INT_MIN, INT_MAX, AE, "mode"},
603 {"vbr" , "variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Variable}, INT_MIN, INT_MAX, AE, "mode"},
605 {"aac_at_quality", "quality vs speed control", offsetof(ATDecodeContext, quality), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, AE},
609 #define FFAT_ENC_CLASS(NAME) \
610 static const AVClass ffat_##NAME##_enc_class = { \
611 .class_name = "at_" #NAME "_enc", \
612 .item_name = av_default_item_name, \
614 .version = LIBAVUTIL_VERSION_INT, \
617 #define FFAT_ENC(NAME, ID, PROFILES, ...) \
618 FFAT_ENC_CLASS(NAME) \
619 AVCodec ff_##NAME##_at_encoder = { \
620 .name = #NAME "_at", \
621 .long_name = NULL_IF_CONFIG_SMALL(#NAME " (AudioToolbox)"), \
622 .type = AVMEDIA_TYPE_AUDIO, \
624 .priv_data_size = sizeof(ATDecodeContext), \
625 .init = ffat_init_encoder, \
626 .close = ffat_close_encoder, \
627 .encode2 = ffat_encode, \
628 .flush = ffat_encode_flush, \
629 .priv_class = &ffat_##NAME##_enc_class, \
630 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | \
631 AV_CODEC_CAP_ENCODER_FLUSH __VA_ARGS__, \
632 .sample_fmts = (const enum AVSampleFormat[]) { \
634 AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_NONE \
636 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE, \
637 .profiles = PROFILES, \
638 .wrapper_name = "at", \
641 static const uint64_t aac_at_channel_layouts[] = {
644 AV_CH_LAYOUT_SURROUND,
645 AV_CH_LAYOUT_4POINT0,
646 AV_CH_LAYOUT_5POINT0,
647 AV_CH_LAYOUT_5POINT1,
648 AV_CH_LAYOUT_6POINT0,
649 AV_CH_LAYOUT_6POINT1,
650 AV_CH_LAYOUT_7POINT0,
651 AV_CH_LAYOUT_7POINT1_WIDE_BACK,
653 AV_CH_LAYOUT_OCTAGONAL,
657 FFAT_ENC(aac, AV_CODEC_ID_AAC, aac_profiles, , .channel_layouts = aac_at_channel_layouts)
658 //FFAT_ENC(adpcm_ima_qt, AV_CODEC_ID_ADPCM_IMA_QT, NULL)
659 FFAT_ENC(alac, AV_CODEC_ID_ALAC, NULL, | AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
660 FFAT_ENC(ilbc, AV_CODEC_ID_ILBC, NULL)
661 FFAT_ENC(pcm_alaw, AV_CODEC_ID_PCM_ALAW, NULL)
662 FFAT_ENC(pcm_mulaw, AV_CODEC_ID_PCM_MULAW, NULL)