git.sesse.net Git - ffmpeg/blob - libavcodec/audiotoolboxenc.c

   1 /*
   2  * Audio Toolbox system codecs
   3  *
   4  * copyright (c) 2016 Rodger Combs
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include <AudioToolbox/AudioToolbox.h>
  24
  25 #include "config.h"
  26 #include "audio_frame_queue.h"
  27 #include "avcodec.h"
  28 #include "bytestream.h"
  29 #include "internal.h"
  30 #include "libavformat/isom.h"
  31 #include "libavutil/avassert.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/log.h"
  34
  35 typedef struct ATDecodeContext {
  36     AVClass *av_class;
  37     int mode;
  38     int quality;
  39
  40     AudioConverterRef converter;
  41     AudioStreamPacketDescription pkt_desc;
  42     AVFrame in_frame;
  43     AVFrame new_in_frame;
  44
  45     unsigned pkt_size;
  46     AudioFrameQueue afq;
  47     int eof;
  48     int frame_size;
  49 } ATDecodeContext;
  50
  51 static UInt32 ffat_get_format_id(enum AVCodecID codec, int profile)
  52 {
  53     switch (codec) {
  54     case AV_CODEC_ID_AAC:
  55         switch (profile) {
  56         case FF_PROFILE_AAC_LOW:
  57         default:
  58             return kAudioFormatMPEG4AAC;
  59         case FF_PROFILE_AAC_HE:
  60             return kAudioFormatMPEG4AAC_HE;
  61         case FF_PROFILE_AAC_HE_V2:
  62             return kAudioFormatMPEG4AAC_HE_V2;
  63         case FF_PROFILE_AAC_LD:
  64             return kAudioFormatMPEG4AAC_LD;
  65         case FF_PROFILE_AAC_ELD:
  66             return kAudioFormatMPEG4AAC_ELD;
  67         }
  68     case AV_CODEC_ID_ADPCM_IMA_QT:
  69         return kAudioFormatAppleIMA4;
  70     case AV_CODEC_ID_ALAC:
  71         return kAudioFormatAppleLossless;
  72     case AV_CODEC_ID_ILBC:
  73         return kAudioFormatiLBC;
  74     case AV_CODEC_ID_PCM_ALAW:
  75         return kAudioFormatALaw;
  76     case AV_CODEC_ID_PCM_MULAW:
  77         return kAudioFormatULaw;
  78     default:
  79         av_assert0(!"Invalid codec ID!");
  80         return 0;
  81     }
  82 }
  83
  84 static void ffat_update_ctx(AVCodecContext *avctx)
  85 {
  86     ATDecodeContext *at = avctx->priv_data;
  87     UInt32 size = sizeof(unsigned);
  88     AudioConverterPrimeInfo prime_info;
  89     AudioStreamBasicDescription out_format;
  90
  91     AudioConverterGetProperty(at->converter,
  92                               kAudioConverterPropertyMaximumOutputPacketSize,
  93                               &size, &at->pkt_size);
  94
  95     if (at->pkt_size <= 0)
  96         at->pkt_size = 1024 * 50;
  97
  98     size = sizeof(prime_info);
  99
 100     if (!AudioConverterGetProperty(at->converter,
 101                                    kAudioConverterPrimeInfo,
 102                                    &size, &prime_info)) {
 103         avctx->initial_padding = prime_info.leadingFrames;
 104     }
 105
 106     size = sizeof(out_format);
 107     if (!AudioConverterGetProperty(at->converter,
 108                                    kAudioConverterCurrentOutputStreamDescription,
 109                                    &size, &out_format)) {
 110         if (out_format.mFramesPerPacket)
 111             avctx->frame_size = out_format.mFramesPerPacket;
 112         if (out_format.mBytesPerPacket && avctx->codec_id == AV_CODEC_ID_ILBC)
 113             avctx->block_align = out_format.mBytesPerPacket;
 114     }
 115
 116     at->frame_size = avctx->frame_size;
 117     if (avctx->codec_id == AV_CODEC_ID_PCM_MULAW ||
 118         avctx->codec_id == AV_CODEC_ID_PCM_ALAW) {
 119         at->pkt_size *= 1024;
 120         avctx->frame_size *= 1024;
 121     }
 122 }
 123
 124 static int read_descr(GetByteContext *gb, int *tag)
 125 {
 126     int len = 0;
 127     int count = 4;
 128     *tag = bytestream2_get_byte(gb);
 129     while (count--) {
 130         int c = bytestream2_get_byte(gb);
 131         len = (len << 7) | (c & 0x7f);
 132         if (!(c & 0x80))
 133             break;
 134     }
 135     return len;
 136 }
 137
 138 static int get_ilbc_mode(AVCodecContext *avctx)
 139 {
 140     if (avctx->block_align == 38)
 141         return 20;
 142     else if (avctx->block_align == 50)
 143         return 30;
 144     else if (avctx->bit_rate > 0)
 145         return avctx->bit_rate <= 14000 ? 30 : 20;
 146     else
 147         return 30;
 148 }
 149
 150 static av_cold int ffat_init_encoder(AVCodecContext *avctx)
 151 {
 152     ATDecodeContext *at = avctx->priv_data;
 153     OSStatus status;
 154
 155     AudioStreamBasicDescription in_format = {
 156         .mSampleRate = avctx->sample_rate,
 157         .mFormatID = kAudioFormatLinearPCM,
 158         .mFormatFlags = ((avctx->sample_fmt == AV_SAMPLE_FMT_FLT ||
 159                           avctx->sample_fmt == AV_SAMPLE_FMT_DBL) ? kAudioFormatFlagIsFloat
 160                         : avctx->sample_fmt == AV_SAMPLE_FMT_U8 ? 0
 161                         : kAudioFormatFlagIsSignedInteger)
 162                         | kAudioFormatFlagIsPacked,
 163         .mBytesPerPacket = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
 164         .mFramesPerPacket = 1,
 165         .mBytesPerFrame = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels,
 166         .mChannelsPerFrame = avctx->channels,
 167         .mBitsPerChannel = av_get_bytes_per_sample(avctx->sample_fmt) * 8,
 168     };
 169     AudioStreamBasicDescription out_format = {
 170         .mSampleRate = avctx->sample_rate,
 171         .mFormatID = ffat_get_format_id(avctx->codec_id, avctx->profile),
 172         .mChannelsPerFrame = in_format.mChannelsPerFrame,
 173     };
 174     AudioChannelLayout channel_layout = {
 175         .mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelBitmap,
 176         .mChannelBitmap = avctx->channel_layout,
 177     };
 178     UInt32 size = sizeof(channel_layout);
 179
 180     if (avctx->codec_id == AV_CODEC_ID_ILBC) {
 181         int mode = get_ilbc_mode(avctx);
 182         out_format.mFramesPerPacket  = 8000 * mode / 1000;
 183         out_format.mBytesPerPacket   = (mode == 20 ? 38 : 50);
 184     }
 185
 186     status = AudioConverterNew(&in_format, &out_format, &at->converter);
 187
 188     if (status != 0) {
 189         av_log(avctx, AV_LOG_ERROR, "AudioToolbox init error: %i\n", (int)status);
 190         return AVERROR_UNKNOWN;
 191     }
 192
 193     size = sizeof(UInt32);
 194
 195     AudioConverterSetProperty(at->converter, kAudioConverterInputChannelLayout,
 196                               size, &channel_layout);
 197     AudioConverterSetProperty(at->converter, kAudioConverterOutputChannelLayout,
 198                               size, &channel_layout);
 199
 200     if (avctx->bits_per_raw_sample) {
 201         size = sizeof(avctx->bits_per_raw_sample);
 202         AudioConverterSetProperty(at->converter,
 203                                   kAudioConverterPropertyBitDepthHint,
 204                                   size, &avctx->bits_per_raw_sample);
 205     }
 206
 207     if (at->mode == -1)
 208         at->mode = (avctx->flags & AV_CODEC_FLAG_QSCALE) ?
 209                    kAudioCodecBitRateControlMode_Variable :
 210                    kAudioCodecBitRateControlMode_Constant;
 211
 212     AudioConverterSetProperty(at->converter, kAudioCodecPropertyBitRateControlMode,
 213                               size, &at->mode);
 214
 215     if (at->mode == kAudioCodecBitRateControlMode_Variable) {
 216         int q = avctx->global_quality / FF_QP2LAMBDA;
 217         if (q < 0 || q > 14) {
 218             av_log(avctx, AV_LOG_WARNING,
 219                    "VBR quality %d out of range, should be 0-14\n", q);
 220             q = av_clip(q, 0, 14);
 221         }
 222         q = 127 - q * 9;
 223         AudioConverterSetProperty(at->converter, kAudioCodecPropertySoundQualityForVBR,
 224                                   size, &q);
 225     } else if (avctx->bit_rate > 0) {
 226         UInt32 rate = avctx->bit_rate;
 227         AudioConverterSetProperty(at->converter, kAudioConverterEncodeBitRate,
 228                                   size, &rate);
 229     }
 230
 231     at->quality = 96 - at->quality * 32;
 232     AudioConverterSetProperty(at->converter, kAudioConverterCodecQuality,
 233                               size, &at->quality);
 234
 235     if (!AudioConverterGetPropertyInfo(at->converter, kAudioConverterCompressionMagicCookie,
 236                                        &avctx->extradata_size, NULL) &&
 237         avctx->extradata_size) {
 238         int extradata_size = avctx->extradata_size;
 239         uint8_t *extradata;
 240         if (!(avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE)))
 241             return AVERROR(ENOMEM);
 242         if (avctx->codec_id == AV_CODEC_ID_ALAC) {
 243             avctx->extradata_size = 0x24;
 244             AV_WB32(avctx->extradata,     0x24);
 245             AV_WB32(avctx->extradata + 4, MKBETAG('a','l','a','c'));
 246             extradata = avctx->extradata + 12;
 247             avctx->extradata_size = 0x24;
 248         } else {
 249             extradata = avctx->extradata;
 250         }
 251         status = AudioConverterGetProperty(at->converter,
 252                                            kAudioConverterCompressionMagicCookie,
 253                                            &extradata_size, extradata);
 254         if (status != 0) {
 255             av_log(avctx, AV_LOG_ERROR, "AudioToolbox cookie error: %i\n", (int)status);
 256             return AVERROR_UNKNOWN;
 257         } else if (avctx->codec_id == AV_CODEC_ID_AAC) {
 258             GetByteContext gb;
 259             int tag, len;
 260             bytestream2_init(&gb, extradata, extradata_size);
 261             do {
 262                 len = read_descr(&gb, &tag);
 263                 if (tag == MP4DecConfigDescrTag) {
 264                     bytestream2_skip(&gb, 13);
 265                     len = read_descr(&gb, &tag);
 266                     if (tag == MP4DecSpecificDescrTag) {
 267                         len = FFMIN(gb.buffer_end - gb.buffer, len);
 268                         memmove(extradata, gb.buffer, len);
 269                         avctx->extradata_size = len;
 270                         break;
 271                     }
 272                 } else if (tag == MP4ESDescrTag) {
 273                     int flags;
 274                     bytestream2_skip(&gb, 2);
 275                     flags = bytestream2_get_byte(&gb);
 276                     if (flags & 0x80) //streamDependenceFlag
 277                         bytestream2_skip(&gb, 2);
 278                     if (flags & 0x40) //URL_Flag
 279                         bytestream2_skip(&gb, bytestream2_get_byte(&gb));
 280                     if (flags & 0x20) //OCRstreamFlag
 281                         bytestream2_skip(&gb, 2);
 282                 }
 283             } while (bytestream2_get_bytes_left(&gb));
 284         } else if (avctx->codec_id != AV_CODEC_ID_ALAC) {
 285             avctx->extradata_size = extradata_size;
 286         }
 287     }
 288
 289     ffat_update_ctx(avctx);
 290
 291 #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1090
 292     if (at->mode == kAudioCodecBitRateControlMode_Variable && avctx->rc_max_rate) {
 293         int max_size = avctx->rc_max_rate * avctx->frame_size / avctx->sample_rate;
 294         if (max_size)
 295         AudioConverterSetProperty(at->converter, kAudioCodecPropertyPacketSizeLimitForVBR,
 296                                   size, &max_size);
 297     }
 298 #endif
 299
 300     ff_af_queue_init(avctx, &at->afq);
 301
 302     return 0;
 303 }
 304
 305 static OSStatus ffat_encode_callback(AudioConverterRef converter, UInt32 *nb_packets,
 306                                      AudioBufferList *data,
 307                                      AudioStreamPacketDescription **packets,
 308                                      void *inctx)
 309 {
 310     AVCodecContext *avctx = inctx;
 311     ATDecodeContext *at = avctx->priv_data;
 312
 313     if (at->eof) {
 314         *nb_packets = 0;
 315         if (packets) {
 316             *packets = &at->pkt_desc;
 317             at->pkt_desc.mDataByteSize = 0;
 318         }
 319         return 0;
 320     }
 321
 322     av_frame_unref(&at->in_frame);
 323     av_frame_move_ref(&at->in_frame, &at->new_in_frame);
 324
 325     if (!at->in_frame.data[0]) {
 326         *nb_packets = 0;
 327         return 1;
 328     }
 329
 330     data->mNumberBuffers              = 1;
 331     data->mBuffers[0].mNumberChannels = 0;
 332     data->mBuffers[0].mDataByteSize   = at->in_frame.nb_samples *
 333                                         av_get_bytes_per_sample(avctx->sample_fmt) *
 334                                         avctx->channels;
 335     data->mBuffers[0].mData           = at->in_frame.data[0];
 336     *nb_packets = (at->in_frame.nb_samples + (at->frame_size - 1)) / at->frame_size;
 337
 338     if (packets) {
 339         *packets = &at->pkt_desc;
 340         at->pkt_desc.mDataByteSize = data->mBuffers[0].mDataByteSize;
 341         at->pkt_desc.mVariableFramesInPacket = at->in_frame.nb_samples;
 342     }
 343
 344     return 0;
 345 }
 346
 347 static int ffat_encode(AVCodecContext *avctx, AVPacket *avpkt,
 348                        const AVFrame *frame, int *got_packet_ptr)
 349 {
 350     ATDecodeContext *at = avctx->priv_data;
 351     OSStatus ret;
 352
 353     AudioBufferList out_buffers = {
 354         .mNumberBuffers = 1,
 355         .mBuffers = {
 356             {
 357                 .mNumberChannels = avctx->channels,
 358                 .mDataByteSize = at->pkt_size,
 359             }
 360         }
 361     };
 362     AudioStreamPacketDescription out_pkt_desc = {0};
 363
 364     if ((ret = ff_alloc_packet2(avctx, avpkt, at->pkt_size, 0)) < 0)
 365         return ret;
 366
 367     av_frame_unref(&at->new_in_frame);
 368
 369     if (frame) {
 370         if ((ret = ff_af_queue_add(&at->afq, frame)) < 0)
 371             return ret;
 372         if ((ret = av_frame_ref(&at->new_in_frame, frame)) < 0)
 373             return ret;
 374     } else {
 375         at->eof = 1;
 376     }
 377
 378     out_buffers.mBuffers[0].mData = avpkt->data;
 379
 380     *got_packet_ptr = avctx->frame_size / at->frame_size;
 381
 382     ret = AudioConverterFillComplexBuffer(at->converter, ffat_encode_callback, avctx,
 383                                           got_packet_ptr, &out_buffers,
 384                                           (avctx->frame_size > at->frame_size) ? NULL : &out_pkt_desc);
 385     if ((!ret || ret == 1) && *got_packet_ptr) {
 386         avpkt->size = out_buffers.mBuffers[0].mDataByteSize;
 387         ff_af_queue_remove(&at->afq, out_pkt_desc.mVariableFramesInPacket ?
 388                                      out_pkt_desc.mVariableFramesInPacket :
 389                                      avctx->frame_size,
 390                            &avpkt->pts,
 391                            &avpkt->duration);
 392     } else if (ret && ret != 1) {
 393         av_log(avctx, AV_LOG_WARNING, "Encode error: %i\n", ret);
 394     }
 395
 396     return 0;
 397 }
 398
 399 static av_cold void ffat_encode_flush(AVCodecContext *avctx)
 400 {
 401     ATDecodeContext *at = avctx->priv_data;
 402     AudioConverterReset(at->converter);
 403     av_frame_unref(&at->new_in_frame);
 404     av_frame_unref(&at->in_frame);
 405 }
 406
 407 static av_cold int ffat_close_encoder(AVCodecContext *avctx)
 408 {
 409     ATDecodeContext *at = avctx->priv_data;
 410     AudioConverterDispose(at->converter);
 411     av_frame_unref(&at->new_in_frame);
 412     av_frame_unref(&at->in_frame);
 413     ff_af_queue_close(&at->afq);
 414     return 0;
 415 }
 416
 417 static const AVProfile aac_profiles[] = {
 418     { FF_PROFILE_AAC_LOW,   "LC"       },
 419     { FF_PROFILE_AAC_HE,    "HE-AAC"   },
 420     { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
 421     { FF_PROFILE_AAC_LD,    "LD"       },
 422     { FF_PROFILE_AAC_ELD,   "ELD"      },
 423     { FF_PROFILE_UNKNOWN },
 424 };
 425
 426 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 427 static const AVOption options[] = {
 428     {"aac_at_mode", "ratecontrol mode", offsetof(ATDecodeContext, mode), AV_OPT_TYPE_INT, {.i64 = -1}, -1, kAudioCodecBitRateControlMode_Variable, AE, "mode"},
 429         {"auto", "VBR if global quality is given; CBR otherwise", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, INT_MIN, INT_MAX, AE, "mode"},
 430         {"cbr",  "constant bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Constant}, INT_MIN, INT_MAX, AE, "mode"},
 431         {"abr",  "long-term average bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_LongTermAverage}, INT_MIN, INT_MAX, AE, "mode"},
 432         {"cvbr", "constrained variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_VariableConstrained}, INT_MIN, INT_MAX, AE, "mode"},
 433         {"vbr" , "variable bitrate", 0, AV_OPT_TYPE_CONST, {.i64 = kAudioCodecBitRateControlMode_Variable}, INT_MIN, INT_MAX, AE, "mode"},
 434     {"aac_at_quality", "quality vs speed control", offsetof(ATDecodeContext, quality), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, AE},
 435     { NULL },
 436 };
 437
 438 #define FFAT_ENC_CLASS(NAME) \
 439     static const AVClass ffat_##NAME##_enc_class = { \
 440         .class_name = "at_" #NAME "_enc", \
 441         .item_name  = av_default_item_name, \
 442         .option     = options, \
 443         .version    = LIBAVUTIL_VERSION_INT, \
 444     };
 445
 446 #define FFAT_ENC(NAME, ID, PROFILES, ...) \
 447     FFAT_ENC_CLASS(NAME) \
 448     AVCodec ff_##NAME##_at_encoder = { \
 449         .name           = #NAME "_at", \
 450         .long_name      = NULL_IF_CONFIG_SMALL(#NAME " (AudioToolbox)"), \
 451         .type           = AVMEDIA_TYPE_AUDIO, \
 452         .id             = ID, \
 453         .priv_data_size = sizeof(ATDecodeContext), \
 454         .init           = ffat_init_encoder, \
 455         .close          = ffat_close_encoder, \
 456         .encode2        = ffat_encode, \
 457         .flush          = ffat_encode_flush, \
 458         .priv_class     = &ffat_##NAME##_enc_class, \
 459         .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY __VA_ARGS__, \
 460         .sample_fmts    = (const enum AVSampleFormat[]) { \
 461             AV_SAMPLE_FMT_S16, \
 462             AV_SAMPLE_FMT_U8,  AV_SAMPLE_FMT_NONE \
 463         }, \
 464         .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE, \
 465         .profiles       = PROFILES, \
 466     };
 467
 468 FFAT_ENC(aac,          AV_CODEC_ID_AAC,          aac_profiles)
 469 //FFAT_ENC(adpcm_ima_qt, AV_CODEC_ID_ADPCM_IMA_QT, NULL)
 470 FFAT_ENC(alac,         AV_CODEC_ID_ALAC,         NULL, | AV_CODEC_CAP_VARIABLE_FRAME_SIZE | AV_CODEC_CAP_LOSSLESS)
 471 FFAT_ENC(ilbc,         AV_CODEC_ID_ILBC,         NULL)
 472 FFAT_ENC(pcm_alaw,     AV_CODEC_ID_PCM_ALAW,     NULL)
 473 FFAT_ENC(pcm_mulaw,    AV_CODEC_ID_PCM_MULAW,    NULL)