2 * Audio Toolbox system codecs
4 * copyright (c) 2016 Rodger Combs
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <AudioToolbox/AudioToolbox.h>
26 #include "audio_frame_queue.h"
28 #include "bytestream.h"
30 #include "libavformat/isom.h"
31 #include "libavutil/avassert.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/log.h"
35 typedef struct ATDecodeContext {
40 AudioConverterRef converter;
41 AudioStreamPacketDescription pkt_desc;
51 static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
56 case FF_PROFILE_AAC_LOW:
58 return kAudioFormatMPEG4AAC;
59 case FF_PROFILE_AAC_HE:
60 return kAudioFormatMPEG4AAC_HE;
61 case FF_PROFILE_AAC_HE_V2:
62 return kAudioFormatMPEG4AAC_HE_V2;
63 case FF_PROFILE_AAC_LD:
64 return kAudioFormatMPEG4AAC_LD;
65 case FF_PROFILE_AAC_ELD:
66 return kAudioFormatMPEG4AAC_ELD;
68 case AV_CODEC_ID_ADPCM_IMA_QT:
69 return kAudioFormatAppleIMA4;
70 case AV_CODEC_ID_ALAC:
71 return kAudioFormatAppleLossless;
72 case AV_CODEC_ID_ILBC:
73 return kAudioFormatiLBC;
74 case AV_CODEC_ID_PCM_ALAW:
75 return kAudioFormatALaw;
76 case AV_CODEC_ID_PCM_MULAW:
77 return kAudioFormatULaw;
79 av_assert0(!"Invalid codec ID!");
84 static void ffat_update_ctx(AVCodecContext *avctx)
86 ATDecodeContext *at = avctx->priv_data;
87 UInt32 size = sizeof(unsigned);
88 AudioConverterPrimeInfo prime_info;
89 AudioStreamBasicDescription out_format;
91 AudioConverterGetProperty(at->converter,
92 kAudioConverterPropertyMaximumOutputPacketSize,
93 &size, &at->pkt_size);
95 if (at->pkt_size <= 0)
96 at->pkt_size = 1024 * 50;
98 size = sizeof(prime_info);
100 if (!AudioConverterGetProperty(at->converter,
101 kAudioConverterPrimeInfo,
102 &size, &prime_info)) {
103 avctx->initial_padding = prime_info.leadingFrames;
106 size = sizeof(out_format);
107 if (!AudioConverterGetProperty(at->converter,
108 kAudioConverterCurrentOutputStreamDescription,
109 &size, &out_format)) {
110 if (out_format.mFramesPerPacket)
111 avctx->frame_size = out_format.mFramesPerPacket;
112 if (out_format.mBytesPerPacket && avctx->codec_id == AV_CODEC_ID_ILBC)
113 avctx->block_align = out_format.mBytesPerPacket;
116 at->frame_size = avctx->frame_size;
117 if (avctx->codec_id == AV_CODEC_ID_PCM_MULAW ||
118 avctx->codec_id == AV_CODEC_ID_PCM_ALAW) {
119 at->pkt_size *= 1024;
120 avctx->frame_size *= 1024;
124 static int read_descr(GetByteContext *gb, int *tag)
128 *tag = bytestream2_get_byte(gb);
130 int c = bytestream2_get_byte(gb);
131 len = (len << 7) | (c & 0x7f);
138 static int get_ilbc_mode(AVCodecContext *avctx)
140 if (avctx->block_align == 38)
142 else if (avctx->block_align == 50)
144 else if (avctx->bit_rate > 0)
145 return avctx->bit_rate <= 14000 ? 30 : 20;
150 static av_cold int ffat_init_encoder(AVCodecContext *avctx)
152 ATDecodeContext *at = avctx->priv_data;
155 AudioStreamBasicDescription in_format = {
156 .mSampleRate = avctx->sample_rate,
157 .mFormatID = kAudioFormatLinearPCM,
158 .mFormatFlags = ((avctx->sample_fmt == AV_SAMPLE_FMT_FLT ||
159 avctx->sample_fmt == AV_SAMPLE_FMT_DBL) ? kAudioFormatFlagIsFloat
160 : avctx->sample_fmt == AV_SAMPLE_FMT_U8 ? 0
161 : kAudioFormatFlagIsSignedInteger)
162 | kAudioFormatFlagIsPacked,
163 .mBytesPerPacket = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
164 .mFramesPerPacket = 1,
165 .mBytesPerFrame = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
166 .mChannelsPerFrame = avctx->channels,
167 .mBitsPerChannel = av_get_bytes_per_sample(avctx->sample_fmt) * 8,
169 AudioStreamBasicDescription out_format = {
170 .mSampleRate = avctx->sample_rate,
171 .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
172 .mChannelsPerFrame = in_format.mChannelsPerFrame,
174 AudioChannelLayout channel_layout = {
175 .mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelBitmap,
176 .mChannelBitmap = avctx->channel_layout,
178 UInt32 size = sizeof(channel_layout);
180 if (avctx->codec_id == AV_CODEC_ID_ILBC) {
181 int mode = get_ilbc_mode(avctx);
182 out_format.mFramesPerPacket = 8000 * mode / 1000;
183 out_format.mBytesPerPacket = (mode == 20 ? 38 : 50);
186 status = AudioConverterNew(&in_format, &out_format, &at->converter);
189 av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
190 return AVERROR_UNKNOWN;
193 size = sizeof(UInt32);
195 AudioConverterSetProperty(at->converter, kAudioConverterInputChannelLayout,
196 size, &channel_layout);
197 AudioConverterSetProperty(at->converter, kAudioConverterOutputChannelLayout,
198 size, &channel_layout);
200 if (avctx->bits_per_raw_sample) {
201 size = sizeof(avctx->bits_per_raw_sample);
202 AudioConverterSetProperty(at->converter,
203 kAudioConverterPropertyBitDepthHint,
204 size, &avctx->bits_per_raw_sample);
208 at->mode = (avctx->flags & AV_CODEC_FLAG_QSCALE) ?
209 kAudioCodecBitRateControlMode_Variable :
210 kAudioCodecBitRateControlMode_Constant;
212 AudioConverterSetProperty(at->converter, kAudioCodecPropertyBitRateControlMode,
215 if (at->mode == kAudioCodecBitRateControlMode_Variable) {
216 int q = avctx->global_quality / FF_QP2LAMBDA;
217 if (q < 0 || q > 14) {
218 av_log(avctx, AV_LOG_WARNING,
219 "VBR quality %d out of range, should be 0-14\n", q);
220 q = av_clip(q, 0, 14);
223 AudioConverterSetProperty(at->converter, kAudioCodecPropertySoundQualityForVBR,
225 } else if (avctx->bit_rate > 0) {
226 UInt32 rate = avctx->bit_rate;
227 AudioConverterSetProperty(at->converter, kAudioConverterEncodeBitRate,
231 at->quality = 96 - at->quality * 32;
232 AudioConverterSetProperty(at->converter, kAudioConverterCodecQuality,
235 if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterCompressionMagicCookie,
236 &avctx->extradata_size, NULL) &&
237 avctx->extradata_size) {
238 int extradata_size = avctx->extradata_size;
240 if (!(avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE)))
241 return AVERROR(ENOMEM);
242 if (avctx->codec_id == AV_CODEC_ID_ALAC) {
243 avctx->extradata_size = 0x24;
244 AV_WB32(avctx->extradata, 0x24);
245 AV_WB32(avctx->extradata + 4, MKBETAG('a','l','a','c'));
246 extradata = avctx->extradata + 12;
247 avctx->extradata_size = 0x24;
249 extradata = avctx->extradata;
251 status = AudioConverterGetProperty(at->converter,
252 kAudioConverterCompressionMagicCookie,
253 &extradata_size, extradata);
255 av_log(avctx, AV_LOG_ERROR, "AudioToolbox cookie error: %i\n", (int)status);
256 return AVERROR_UNKNOWN;
257 } else if (avctx->codec_id == AV_CODEC_ID_AAC) {
260 bytestream2_init(&gb, extradata, extradata_size);
262 len = read_descr(&gb, &tag);
263 if (tag == MP4DecConfigDescrTag) {
264 bytestream2_skip(&gb, 13);
265 len = read_descr(&gb, &tag);
266 if (tag == MP4DecSpecificDescrTag) {
267 len = FFMIN(gb.buffer_end - gb.buffer, len);
268 memmove(extradata, gb.buffer, len);
269 avctx->extradata_size = len;
272 } else if (tag == MP4ESDescrTag) {
274 bytestream2_skip(&gb, 2);
275 flags = bytestream2_get_byte(&gb);
276 if (flags & 0x80) //streamDependenceFlag
277 bytestream2_skip(&gb, 2);
278 if (flags & 0x40) //URL_Flag
279 bytestream2_skip(&gb, bytestream2_get_byte(&gb));
280 if (flags & 0x20) //OCRstreamFlag
281 bytestream2_skip(&gb, 2);
283 } while (bytestream2_get_bytes_left(&gb));
284 } else if (avctx->codec_id != AV_CODEC_ID_ALAC) {
285 avctx->extradata_size = extradata_size;
289 ffat_update_ctx(avctx);
291 #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090
292 if (at->mode == kAudioCodecBitRateControlMode_Variable && avctx->rc_max_rate) {
293 int max_size = avctx->rc_max_rate * avctx->frame_size / avctx->sample_rate;
295 AudioConverterSetProperty(at->converter, kAudioCodecPropertyPacketSizeLimitForVBR,
300 ff_af_queue_init(avctx, &at->afq);
305 static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_packets,
306 AudioBufferList *data,
307 AudioStreamPacketDescription **packets,
310 AVCodecContext *avctx = inctx;
311 ATDecodeContext *at = avctx->priv_data;
316 *packets = &at->pkt_desc;
317 at->pkt_desc.mDataByteSize = 0;
322 av_frame_unref(&at->in_frame);
323 av_frame_move_ref(&at->in_frame, &at->new_in_frame);
325 if (!at->in_frame.data[0]) {
330 data->mNumberBuffers = 1;
331 data->mBuffers[0].mNumberChannels = 0;
332 data->mBuffers[0].mDataByteSize = at->in_frame.nb_samples *
333 av_get_bytes_per_sample(avctx->sample_fmt) *
335 data->mBuffers[0].mData = at->in_frame.data[0];
336 *nb_packets = (at->in_frame.nb_samples + (at->frame_size - 1)) / at->frame_size;
339 *packets = &at->pkt_desc;
340 at->pkt_desc.mDataByteSize = data->mBuffers[0].mDataByteSize;
341 at->pkt_desc.mVariableFramesInPacket = at->in_frame.nb_samples;
347 static int ffat_encode(AVCodecContext *avctx, AVPacket *avpkt,
348 const AVFrame *frame, int *got_packet_ptr)
350 ATDecodeContext *at = avctx->priv_data;
353 AudioBufferList out_buffers = {
357 .mNumberChannels = avctx->channels,
358 .mDataByteSize = at->pkt_size,
362 AudioStreamPacketDescription out_pkt_desc = {0};
364 if ((ret = ff_alloc_packet2(avctx, avpkt, at->pkt_size, 0)) < 0)
367 av_frame_unref(&at->new_in_frame);
370 if ((ret = ff_af_queue_add(&at->afq, frame)) < 0)
372 if ((ret = av_frame_ref(&at->new_in_frame, frame)) < 0)
378 out_buffers.mBuffers[0].mData = avpkt->data;
380 *got_packet_ptr = avctx->frame_size / at->frame_size;
382 ret = AudioConverterFillComplexBuffer(at->converter, ffat_encode_callback, avctx,
383 got_packet_ptr, &out_buffers,
384 (avctx->frame_size > at->frame_size) ? NULL : &out_pkt_desc);
385 if ((!ret || ret == 1) && *got_packet_ptr) {
386 avpkt->size = out_buffers.mBuffers[0].mDataByteSize;
387 ff_af_queue_remove(&at->afq, out_pkt_desc.mVariableFramesInPacket ?
388 out_pkt_desc.mVariableFramesInPacket :
392 } else if (ret && ret != 1) {
393 av_log(avctx, AV_LOG_WARNING, "Encode error: %i\n", ret);
399 static av_cold void ffat_encode_flush(AVCodecContext *avctx)
401 ATDecodeContext *at = avctx->priv_data;
402 AudioConverterReset(at->converter);
403 av_frame_unref(&at->new_in_frame);
404 av_frame_unref(&at->in_frame);
407 static av_cold int ffat_close_encoder(AVCodecContext *avctx)
409 ATDecodeContext *at = avctx->priv_data;
410 AudioConverterDispose(at->converter);
411 av_frame_unref(&at->new_in_frame);
412 av_frame_unref(&at->in_frame);
413 ff_af_queue_close(&at->afq);
417 static const AVProfile aac_profiles[] = {
418 { FF_PROFILE_AAC_LOW, "LC" },
419 { FF_PROFILE_AAC_HE, "HE-AAC" },
420 { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
421 { FF_PROFILE_AAC_LD, "LD" },
422 { FF_PROFILE_AAC_ELD, "ELD" },
423 { FF_PROFILE_UNKNOWN },
426 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
427 static const AVOption options[] = {
428 {"aac_at_mode", "ratecontrol mode", offsetof(ATDecodeContext, mode), AV_OPT_TYPE_INT, {.i64 = -1}, -1, kAudioCodecBitRateControlMode_Variable, AE, "mode"},
429 {"auto", "VBR if global quality is given; CBR otherwise", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, INT_MIN, INT_MAX, AE, "mode"},
430 {"cbr", "constant bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Constant}, INT_MIN, INT_MAX, AE, "mode"},
431 {"abr", "long-term average bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_LongTermAverage}, INT_MIN, INT_MAX, AE, "mode"},
432 {"cvbr", "constrained variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_VariableConstrained}, INT_MIN, INT_MAX, AE, "mode"},
433 {"vbr" , "variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Variable}, INT_MIN, INT_MAX, AE, "mode"},
434 {"aac_at_quality", "quality vs speed control", offsetof(ATDecodeContext, quality), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, AE},
438 #define FFAT_ENC_CLASS(NAME) \
439 static const AVClass ffat_##NAME##_enc_class = { \
440 .class_name = "at_" #NAME "_enc", \
441 .item_name = av_default_item_name, \
443 .version = LIBAVUTIL_VERSION_INT, \
446 #define FFAT_ENC(NAME, ID, PROFILES, ...) \
447 FFAT_ENC_CLASS(NAME) \
448 AVCodec ff_##NAME##_at_encoder = { \
449 .name = #NAME "_at", \
450 .long_name = NULL_IF_CONFIG_SMALL(#NAME " (AudioToolbox)"), \
451 .type = AVMEDIA_TYPE_AUDIO, \
453 .priv_data_size = sizeof(ATDecodeContext), \
454 .init = ffat_init_encoder, \
455 .close = ffat_close_encoder, \
456 .encode2 = ffat_encode, \
457 .flush = ffat_encode_flush, \
458 .priv_class = &ffat_##NAME##_enc_class, \
459 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY __VA_ARGS__, \
460 .sample_fmts = (const enum AVSampleFormat[]) { \
462 AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_NONE \
464 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE, \
465 .profiles = PROFILES, \
468 FFAT_ENC(aac, AV_CODEC_ID_AAC, aac_profiles)
469 //FFAT_ENC(adpcm_ima_qt, AV_CODEC_ID_ADPCM_IMA_QT, NULL)
470 FFAT_ENC(alac, AV_CODEC_ID_ALAC, NULL, | AV_CODEC_CAP_VARIABLE_FRAME_SIZE | AV_CODEC_CAP_LOSSLESS)
471 FFAT_ENC(ilbc, AV_CODEC_ID_ILBC, NULL)
472 FFAT_ENC(pcm_alaw, AV_CODEC_ID_PCM_ALAW, NULL)
473 FFAT_ENC(pcm_mulaw, AV_CODEC_ID_PCM_MULAW, NULL)