git.sesse.net Git - ffmpeg/blob - libavcodec/videotoolboxenc.c

   1 /*
   2  * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include <VideoToolbox/VideoToolbox.h>
  22 #include <CoreVideo/CoreVideo.h>
  23 #include <CoreMedia/CoreMedia.h>
  24 #include <TargetConditionals.h>
  25 #include <Availability.h>
  26 #include "avcodec.h"
  27 #include "libavutil/opt.h"
  28 #include "libavutil/avassert.h"
  29 #include "libavutil/atomic.h"
  30 #include "libavutil/avstring.h"
  31 #include "libavcodec/avcodec.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "internal.h"
  34 #include <pthread.h>
  35 #include "h264.h"
  36 #include "h264_sei.h"
  37 #include <dlfcn.h>
  38
  39 //These symbols may not be present
  40 static struct{
  41     CFStringRef kCVImageBufferColorPrimaries_ITU_R_2020;
  42     CFStringRef kCVImageBufferTransferFunction_ITU_R_2020;
  43     CFStringRef kCVImageBufferYCbCrMatrix_ITU_R_2020;
  44
  45     CFStringRef kVTCompressionPropertyKey_H264EntropyMode;
  46     CFStringRef kVTH264EntropyMode_CAVLC;
  47     CFStringRef kVTH264EntropyMode_CABAC;
  48
  49     CFStringRef kVTProfileLevel_H264_Baseline_4_0;
  50     CFStringRef kVTProfileLevel_H264_Baseline_4_2;
  51     CFStringRef kVTProfileLevel_H264_Baseline_5_0;
  52     CFStringRef kVTProfileLevel_H264_Baseline_5_1;
  53     CFStringRef kVTProfileLevel_H264_Baseline_5_2;
  54     CFStringRef kVTProfileLevel_H264_Baseline_AutoLevel;
  55     CFStringRef kVTProfileLevel_H264_Main_4_2;
  56     CFStringRef kVTProfileLevel_H264_Main_5_1;
  57     CFStringRef kVTProfileLevel_H264_Main_5_2;
  58     CFStringRef kVTProfileLevel_H264_Main_AutoLevel;
  59     CFStringRef kVTProfileLevel_H264_High_3_0;
  60     CFStringRef kVTProfileLevel_H264_High_3_1;
  61     CFStringRef kVTProfileLevel_H264_High_3_2;
  62     CFStringRef kVTProfileLevel_H264_High_4_0;
  63     CFStringRef kVTProfileLevel_H264_High_4_1;
  64     CFStringRef kVTProfileLevel_H264_High_4_2;
  65     CFStringRef kVTProfileLevel_H264_High_5_1;
  66     CFStringRef kVTProfileLevel_H264_High_5_2;
  67     CFStringRef kVTProfileLevel_H264_High_AutoLevel;
  68
  69     CFStringRef kVTCompressionPropertyKey_RealTime;
  70
  71     CFStringRef kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder;
  72     CFStringRef kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder;
  73 } compat_keys;
  74
  75 #define GET_SYM(symbol, defaultVal)                                     \
  76 do{                                                                     \
  77     CFStringRef cfstr = *(CFStringRef*)dlsym(RTLD_DEFAULT, #symbol);    \
  78     if(!cfstr)                                                          \
  79         compat_keys.symbol = CFSTR(defaultVal);                         \
  80     else                                                                \
  81         compat_keys.symbol = cfstr;                                     \
  82 }while(0)
  83
  84 static pthread_once_t once_ctrl = PTHREAD_ONCE_INIT;
  85
  86 static void loadVTEncSymbols(){
  87     GET_SYM(kCVImageBufferColorPrimaries_ITU_R_2020,   "ITU_R_2020");
  88     GET_SYM(kCVImageBufferTransferFunction_ITU_R_2020, "ITU_R_2020");
  89     GET_SYM(kCVImageBufferYCbCrMatrix_ITU_R_2020,      "ITU_R_2020");
  90
  91     GET_SYM(kVTCompressionPropertyKey_H264EntropyMode, "H264EntropyMode");
  92     GET_SYM(kVTH264EntropyMode_CAVLC, "CAVLC");
  93     GET_SYM(kVTH264EntropyMode_CABAC, "CABAC");
  94
  95     GET_SYM(kVTProfileLevel_H264_Baseline_4_0,       "H264_Baseline_4_0");
  96     GET_SYM(kVTProfileLevel_H264_Baseline_4_2,       "H264_Baseline_4_2");
  97     GET_SYM(kVTProfileLevel_H264_Baseline_5_0,       "H264_Baseline_5_0");
  98     GET_SYM(kVTProfileLevel_H264_Baseline_5_1,       "H264_Baseline_5_1");
  99     GET_SYM(kVTProfileLevel_H264_Baseline_5_2,       "H264_Baseline_5_2");
 100     GET_SYM(kVTProfileLevel_H264_Baseline_AutoLevel, "H264_Baseline_AutoLevel");
 101     GET_SYM(kVTProfileLevel_H264_Main_4_2,           "H264_Main_4_2");
 102     GET_SYM(kVTProfileLevel_H264_Main_5_1,           "H264_Main_5_1");
 103     GET_SYM(kVTProfileLevel_H264_Main_5_2,           "H264_Main_5_2");
 104     GET_SYM(kVTProfileLevel_H264_Main_AutoLevel,     "H264_Main_AutoLevel");
 105     GET_SYM(kVTProfileLevel_H264_High_3_0,           "H264_High_3_0");
 106     GET_SYM(kVTProfileLevel_H264_High_3_1,           "H264_High_3_1");
 107     GET_SYM(kVTProfileLevel_H264_High_3_2,           "H264_High_3_2");
 108     GET_SYM(kVTProfileLevel_H264_High_4_0,           "H264_High_4_0");
 109     GET_SYM(kVTProfileLevel_H264_High_4_1,           "H264_High_4_1");
 110     GET_SYM(kVTProfileLevel_H264_High_4_2,           "H264_High_4_2");
 111     GET_SYM(kVTProfileLevel_H264_High_5_1,           "H264_High_5_1");
 112     GET_SYM(kVTProfileLevel_H264_High_5_2,           "H264_High_5_2");
 113     GET_SYM(kVTProfileLevel_H264_High_AutoLevel,     "H264_High_AutoLevel");
 114
 115     GET_SYM(kVTCompressionPropertyKey_RealTime, "RealTime");
 116
 117     GET_SYM(kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
 118             "EnableHardwareAcceleratedVideoEncoder");
 119     GET_SYM(kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
 120             "RequireHardwareAcceleratedVideoEncoder");
 121 }
 122
 123 typedef enum VT_H264Profile {
 124     H264_PROF_AUTO,
 125     H264_PROF_BASELINE,
 126     H264_PROF_MAIN,
 127     H264_PROF_HIGH,
 128     H264_PROF_COUNT
 129 } VT_H264Profile;
 130
 131 typedef enum VTH264Entropy{
 132     VT_ENTROPY_NOT_SET,
 133     VT_CAVLC,
 134     VT_CABAC
 135 } VTH264Entropy;
 136
 137 static const uint8_t start_code[] = { 0, 0, 0, 1 };
 138
 139 typedef struct ExtraSEI {
 140   void *data;
 141   size_t size;
 142 } ExtraSEI;
 143
 144 typedef struct BufNode {
 145     CMSampleBufferRef cm_buffer;
 146     ExtraSEI *sei;
 147     struct BufNode* next;
 148     int error;
 149 } BufNode;
 150
 151 typedef struct VTEncContext {
 152     AVClass *class;
 153     VTCompressionSessionRef session;
 154     CFStringRef ycbcr_matrix;
 155     CFStringRef color_primaries;
 156     CFStringRef transfer_function;
 157
 158     pthread_mutex_t lock;
 159     pthread_cond_t  cv_sample_sent;
 160
 161     int async_error;
 162
 163     BufNode *q_head;
 164     BufNode *q_tail;
 165
 166     int64_t frame_ct_out;
 167     int64_t frame_ct_in;
 168
 169     int64_t first_pts;
 170     int64_t dts_delta;
 171
 172     int64_t profile;
 173     int64_t level;
 174     int64_t entropy;
 175     int64_t realtime;
 176     int64_t frames_before;
 177     int64_t frames_after;
 178
 179     int64_t allow_sw;
 180
 181     bool flushing;
 182     bool has_b_frames;
 183     bool warned_color_range;
 184     bool a53_cc;
 185 } VTEncContext;
 186
 187 static int vtenc_populate_extradata(AVCodecContext   *avctx,
 188                                     CMVideoCodecType codec_type,
 189                                     CFStringRef      profile_level,
 190                                     CFNumberRef      gamma_level,
 191                                     CFDictionaryRef  enc_info,
 192                                     CFDictionaryRef  pixel_buffer_info);
 193
 194 /**
 195  * NULL-safe release of *refPtr, and sets value to NULL.
 196  */
 197 static void vt_release_num(CFNumberRef* refPtr){
 198     if (!*refPtr) {
 199         return;
 200     }
 201
 202     CFRelease(*refPtr);
 203     *refPtr = NULL;
 204 }
 205
 206 static void set_async_error(VTEncContext *vtctx, int err)
 207 {
 208     BufNode *info;
 209
 210     pthread_mutex_lock(&vtctx->lock);
 211
 212     vtctx->async_error = err;
 213
 214     info = vtctx->q_head;
 215     vtctx->q_head = vtctx->q_tail = NULL;
 216
 217     while (info) {
 218         BufNode *next = info->next;
 219         CFRelease(info->cm_buffer);
 220         av_free(info);
 221         info = next;
 222     }
 223
 224     pthread_mutex_unlock(&vtctx->lock);
 225 }
 226
 227 static void clear_frame_queue(VTEncContext *vtctx)
 228 {
 229     set_async_error(vtctx, 0);
 230 }
 231
 232 static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf, ExtraSEI **sei)
 233 {
 234     BufNode *info;
 235
 236     pthread_mutex_lock(&vtctx->lock);
 237
 238     if (vtctx->async_error) {
 239         pthread_mutex_unlock(&vtctx->lock);
 240         return vtctx->async_error;
 241     }
 242
 243     if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
 244         *buf = NULL;
 245
 246         pthread_mutex_unlock(&vtctx->lock);
 247         return 0;
 248     }
 249
 250     while (!vtctx->q_head && !vtctx->async_error && wait) {
 251         pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
 252     }
 253
 254     if (!vtctx->q_head) {
 255         pthread_mutex_unlock(&vtctx->lock);
 256         *buf = NULL;
 257         return 0;
 258     }
 259
 260     info = vtctx->q_head;
 261     vtctx->q_head = vtctx->q_head->next;
 262     if (!vtctx->q_head) {
 263         vtctx->q_tail = NULL;
 264     }
 265
 266     pthread_mutex_unlock(&vtctx->lock);
 267
 268     *buf = info->cm_buffer;
 269     if (sei && *buf) {
 270         *sei = info->sei;
 271     } else if (info->sei) {
 272         if (info->sei->data) av_free(info->sei->data);
 273         av_free(info->sei);
 274     }
 275     av_free(info);
 276
 277     vtctx->frame_ct_out++;
 278
 279     return 0;
 280 }
 281
 282 static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer, ExtraSEI *sei)
 283 {
 284     BufNode *info = av_malloc(sizeof(BufNode));
 285     if (!info) {
 286         set_async_error(vtctx, AVERROR(ENOMEM));
 287         return;
 288     }
 289
 290     CFRetain(buffer);
 291     info->cm_buffer = buffer;
 292     info->sei = sei;
 293     info->next = NULL;
 294
 295     pthread_mutex_lock(&vtctx->lock);
 296     pthread_cond_signal(&vtctx->cv_sample_sent);
 297
 298     if (!vtctx->q_head) {
 299         vtctx->q_head = info;
 300     } else {
 301         vtctx->q_tail->next = info;
 302     }
 303
 304     vtctx->q_tail = info;
 305
 306     pthread_mutex_unlock(&vtctx->lock);
 307 }
 308
 309 static int count_nalus(size_t length_code_size,
 310                        CMSampleBufferRef sample_buffer,
 311                        int *count)
 312 {
 313     size_t offset = 0;
 314     int status;
 315     int nalu_ct = 0;
 316     uint8_t size_buf[4];
 317     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
 318     CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
 319
 320     if (length_code_size > 4)
 321         return AVERROR_INVALIDDATA;
 322
 323     while (offset < src_size) {
 324         size_t curr_src_len;
 325         size_t box_len = 0;
 326         size_t i;
 327
 328         status = CMBlockBufferCopyDataBytes(block,
 329                                             offset,
 330                                             length_code_size,
 331                                             size_buf);
 332
 333         for (i = 0; i < length_code_size; i++) {
 334             box_len <<= 8;
 335             box_len |= size_buf[i];
 336         }
 337
 338         curr_src_len = box_len + length_code_size;
 339         offset += curr_src_len;
 340
 341         nalu_ct++;
 342     }
 343
 344     *count = nalu_ct;
 345     return 0;
 346 }
 347
 348 static CMVideoCodecType get_cm_codec_type(enum AVCodecID id)
 349 {
 350     switch (id) {
 351     case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
 352     default:               return 0;
 353     }
 354 }
 355
 356 /**
 357  * Get the parameter sets from a CMSampleBufferRef.
 358  * @param dst If *dst isn't NULL, the parameters are copied into existing
 359  *            memory. *dst_size must be set accordingly when *dst != NULL.
 360  *            If *dst is NULL, it will be allocated.
 361  *            In all cases, *dst_size is set to the number of bytes used starting
 362  *            at *dst.
 363  */
 364 static int get_params_size(
 365     AVCodecContext              *avctx,
 366     CMVideoFormatDescriptionRef vid_fmt,
 367     size_t                      *size)
 368 {
 369     size_t total_size = 0;
 370     size_t ps_count;
 371     int is_count_bad = 0;
 372     size_t i;
 373     int status;
 374     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 375                                                                 0,
 376                                                                 NULL,
 377                                                                 NULL,
 378                                                                 &ps_count,
 379                                                                 NULL);
 380     if (status) {
 381         is_count_bad = 1;
 382         ps_count     = 0;
 383         status       = 0;
 384     }
 385
 386     for (i = 0; i < ps_count || is_count_bad; i++) {
 387         const uint8_t *ps;
 388         size_t ps_size;
 389         status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 390                                                                     i,
 391                                                                     &ps,
 392                                                                     &ps_size,
 393                                                                     NULL,
 394                                                                     NULL);
 395         if (status) {
 396             /*
 397              * When ps_count is invalid, status != 0 ends the loop normally
 398              * unless we didn't get any parameter sets.
 399              */
 400             if (i > 0 && is_count_bad) status = 0;
 401
 402             break;
 403         }
 404
 405         total_size += ps_size + sizeof(start_code);
 406     }
 407
 408     if (status) {
 409         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
 410         return AVERROR_EXTERNAL;
 411     }
 412
 413     *size = total_size;
 414     return 0;
 415 }
 416
 417 static int copy_param_sets(
 418     AVCodecContext              *avctx,
 419     CMVideoFormatDescriptionRef vid_fmt,
 420     uint8_t                     *dst,
 421     size_t                      dst_size)
 422 {
 423     size_t ps_count;
 424     int is_count_bad = 0;
 425     int status;
 426     size_t offset = 0;
 427     size_t i;
 428
 429     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 430                                                                 0,
 431                                                                 NULL,
 432                                                                 NULL,
 433                                                                 &ps_count,
 434                                                                 NULL);
 435     if (status) {
 436         is_count_bad = 1;
 437         ps_count     = 0;
 438         status       = 0;
 439     }
 440
 441
 442     for (i = 0; i < ps_count || is_count_bad; i++) {
 443         const uint8_t *ps;
 444         size_t ps_size;
 445         size_t next_offset;
 446
 447         status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 448                                                                     i,
 449                                                                     &ps,
 450                                                                     &ps_size,
 451                                                                     NULL,
 452                                                                     NULL);
 453         if (status) {
 454             if (i > 0 && is_count_bad) status = 0;
 455
 456             break;
 457         }
 458
 459         next_offset = offset + sizeof(start_code) + ps_size;
 460         if (dst_size < next_offset) {
 461             av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
 462             return AVERROR_BUFFER_TOO_SMALL;
 463         }
 464
 465         memcpy(dst + offset, start_code, sizeof(start_code));
 466         offset += sizeof(start_code);
 467
 468         memcpy(dst + offset, ps, ps_size);
 469         offset = next_offset;
 470     }
 471
 472     if (status) {
 473         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
 474         return AVERROR_EXTERNAL;
 475     }
 476
 477     return 0;
 478 }
 479
 480 static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
 481 {
 482     CMVideoFormatDescriptionRef vid_fmt;
 483     size_t total_size;
 484     int status;
 485
 486     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
 487     if (!vid_fmt) {
 488         av_log(avctx, AV_LOG_ERROR, "No video format.\n");
 489         return AVERROR_EXTERNAL;
 490     }
 491
 492     status = get_params_size(avctx, vid_fmt, &total_size);
 493     if (status) {
 494         av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
 495         return status;
 496     }
 497
 498     avctx->extradata = av_mallocz(total_size + AV_INPUT_BUFFER_PADDING_SIZE);
 499     if (!avctx->extradata) {
 500         return AVERROR(ENOMEM);
 501     }
 502     avctx->extradata_size = total_size;
 503
 504     status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
 505
 506     if (status) {
 507         av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
 508         return status;
 509     }
 510
 511     return 0;
 512 }
 513
 514 static void vtenc_output_callback(
 515     void *ctx,
 516     void *sourceFrameCtx,
 517     OSStatus status,
 518     VTEncodeInfoFlags flags,
 519     CMSampleBufferRef sample_buffer)
 520 {
 521     AVCodecContext *avctx = ctx;
 522     VTEncContext   *vtctx = avctx->priv_data;
 523     ExtraSEI *sei = sourceFrameCtx;
 524
 525     if (vtctx->async_error) {
 526         if(sample_buffer) CFRelease(sample_buffer);
 527         return;
 528     }
 529
 530     if (status || !sample_buffer) {
 531         av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
 532         set_async_error(vtctx, AVERROR_EXTERNAL);
 533         return;
 534     }
 535
 536     if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
 537         int set_status = set_extradata(avctx, sample_buffer);
 538         if (set_status) {
 539             set_async_error(vtctx, set_status);
 540             return;
 541         }
 542     }
 543
 544     vtenc_q_push(vtctx, sample_buffer, sei);
 545 }
 546
 547 static int get_length_code_size(
 548     AVCodecContext    *avctx,
 549     CMSampleBufferRef sample_buffer,
 550     size_t            *size)
 551 {
 552     CMVideoFormatDescriptionRef vid_fmt;
 553     int isize;
 554     int status;
 555
 556     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
 557     if (!vid_fmt) {
 558         av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
 559         return AVERROR_EXTERNAL;
 560     }
 561
 562     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 563                                                                 0,
 564                                                                 NULL,
 565                                                                 NULL,
 566                                                                 NULL,
 567                                                                 &isize);
 568     if (status) {
 569         av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
 570         return AVERROR_EXTERNAL;
 571     }
 572
 573     *size = isize;
 574     return 0;
 575 }
 576
 577 /*
 578  * Returns true on success.
 579  *
 580  * If profile_level_val is NULL and this method returns true, don't specify the
 581  * profile/level to the encoder.
 582  */
 583 static bool get_vt_profile_level(AVCodecContext *avctx,
 584                                  CFStringRef    *profile_level_val)
 585 {
 586     VTEncContext *vtctx = avctx->priv_data;
 587     int64_t profile = vtctx->profile;
 588
 589     if (profile == H264_PROF_AUTO && vtctx->level) {
 590         //Need to pick a profile if level is not auto-selected.
 591         profile = vtctx->has_b_frames ? H264_PROF_MAIN : H264_PROF_BASELINE;
 592     }
 593
 594     *profile_level_val = NULL;
 595
 596     switch (profile) {
 597         case H264_PROF_AUTO:
 598             return true;
 599
 600         case H264_PROF_BASELINE:
 601             switch (vtctx->level) {
 602                 case  0: *profile_level_val =
 603                                   compat_keys.kVTProfileLevel_H264_Baseline_AutoLevel; break;
 604                 case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3;       break;
 605                 case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0;       break;
 606                 case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1;       break;
 607                 case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2;       break;
 608                 case 40: *profile_level_val =
 609                                   compat_keys.kVTProfileLevel_H264_Baseline_4_0;       break;
 610                 case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1;       break;
 611                 case 42: *profile_level_val =
 612                                   compat_keys.kVTProfileLevel_H264_Baseline_4_2;       break;
 613                 case 50: *profile_level_val =
 614                                   compat_keys.kVTProfileLevel_H264_Baseline_5_0;       break;
 615                 case 51: *profile_level_val =
 616                                   compat_keys.kVTProfileLevel_H264_Baseline_5_1;       break;
 617                 case 52: *profile_level_val =
 618                                   compat_keys.kVTProfileLevel_H264_Baseline_5_2;       break;
 619             }
 620             break;
 621
 622         case H264_PROF_MAIN:
 623             switch (vtctx->level) {
 624                 case  0: *profile_level_val =
 625                                   compat_keys.kVTProfileLevel_H264_Main_AutoLevel; break;
 626                 case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0;       break;
 627                 case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1;       break;
 628                 case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2;       break;
 629                 case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0;       break;
 630                 case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1;       break;
 631                 case 42: *profile_level_val =
 632                                   compat_keys.kVTProfileLevel_H264_Main_4_2;       break;
 633                 case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0;       break;
 634                 case 51: *profile_level_val =
 635                                   compat_keys.kVTProfileLevel_H264_Main_5_1;       break;
 636                 case 52: *profile_level_val =
 637                                   compat_keys.kVTProfileLevel_H264_Main_5_2;       break;
 638             }
 639             break;
 640
 641         case H264_PROF_HIGH:
 642             switch (vtctx->level) {
 643                 case  0: *profile_level_val =
 644                                   compat_keys.kVTProfileLevel_H264_High_AutoLevel; break;
 645                 case 30: *profile_level_val =
 646                                   compat_keys.kVTProfileLevel_H264_High_3_0;       break;
 647                 case 31: *profile_level_val =
 648                                   compat_keys.kVTProfileLevel_H264_High_3_1;       break;
 649                 case 32: *profile_level_val =
 650                                   compat_keys.kVTProfileLevel_H264_High_3_2;       break;
 651                 case 40: *profile_level_val =
 652                                   compat_keys.kVTProfileLevel_H264_High_4_0;       break;
 653                 case 41: *profile_level_val =
 654                                   compat_keys.kVTProfileLevel_H264_High_4_1;       break;
 655                 case 42: *profile_level_val =
 656                                   compat_keys.kVTProfileLevel_H264_High_4_2;       break;
 657                 case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0;       break;
 658                 case 51: *profile_level_val =
 659                                   compat_keys.kVTProfileLevel_H264_High_5_1;       break;
 660                 case 52: *profile_level_val =
 661                                   compat_keys.kVTProfileLevel_H264_High_5_2;       break;
 662             }
 663             break;
 664     }
 665
 666     if (!*profile_level_val) {
 667         av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
 668         return false;
 669     }
 670
 671     return true;
 672 }
 673
 674 static int get_cv_pixel_format(AVCodecContext* avctx,
 675                                enum AVPixelFormat fmt,
 676                                enum AVColorRange range,
 677                                int* av_pixel_format,
 678                                int* range_guessed)
 679 {
 680     if (range_guessed) *range_guessed = range != AVCOL_RANGE_MPEG &&
 681                                         range != AVCOL_RANGE_JPEG;
 682
 683     //MPEG range is used when no range is set
 684     if (fmt == AV_PIX_FMT_NV12) {
 685         *av_pixel_format = range == AVCOL_RANGE_JPEG ?
 686                                         kCVPixelFormatType_420YpCbCr8BiPlanarFullRange :
 687                                         kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
 688     } else if (fmt == AV_PIX_FMT_YUV420P) {
 689         *av_pixel_format = range == AVCOL_RANGE_JPEG ?
 690                                         kCVPixelFormatType_420YpCbCr8PlanarFullRange :
 691                                         kCVPixelFormatType_420YpCbCr8Planar;
 692     } else {
 693         return AVERROR(EINVAL);
 694     }
 695
 696     return 0;
 697 }
 698
 699 static void add_color_attr(AVCodecContext *avctx, CFMutableDictionaryRef dict) {
 700     VTEncContext *vtctx = avctx->priv_data;
 701
 702     if (vtctx->color_primaries) {
 703         CFDictionarySetValue(dict,
 704                              kCVImageBufferColorPrimariesKey,
 705                              vtctx->color_primaries);
 706     }
 707
 708     if (vtctx->transfer_function) {
 709         CFDictionarySetValue(dict,
 710                              kCVImageBufferTransferFunctionKey,
 711                              vtctx->transfer_function);
 712     }
 713
 714     if (vtctx->ycbcr_matrix) {
 715         CFDictionarySetValue(dict,
 716                              kCVImageBufferYCbCrMatrixKey,
 717                              vtctx->ycbcr_matrix);
 718     }
 719 }
 720
 721 static int create_cv_pixel_buffer_info(AVCodecContext* avctx,
 722                                        CFMutableDictionaryRef* dict)
 723 {
 724     CFNumberRef cv_color_format_num = NULL;
 725     CFNumberRef width_num = NULL;
 726     CFNumberRef height_num = NULL;
 727     CFMutableDictionaryRef pixel_buffer_info = NULL;
 728     int cv_color_format;
 729     int status = get_cv_pixel_format(avctx,
 730                                      avctx->pix_fmt,
 731                                      avctx->color_range,
 732                                      &cv_color_format,
 733                                      NULL);
 734     if (status) return status;
 735
 736     pixel_buffer_info = CFDictionaryCreateMutable(
 737                             kCFAllocatorDefault,
 738                             20,
 739                             &kCFCopyStringDictionaryKeyCallBacks,
 740                             &kCFTypeDictionaryValueCallBacks);
 741
 742     if (!pixel_buffer_info) goto pbinfo_nomem;
 743
 744     cv_color_format_num = CFNumberCreate(kCFAllocatorDefault,
 745                                          kCFNumberSInt32Type,
 746                                          &cv_color_format);
 747     if (!cv_color_format_num) goto pbinfo_nomem;
 748
 749     CFDictionarySetValue(pixel_buffer_info,
 750                          kCVPixelBufferPixelFormatTypeKey,
 751                          cv_color_format_num);
 752     vt_release_num(&cv_color_format_num);
 753
 754     width_num = CFNumberCreate(kCFAllocatorDefault,
 755                                kCFNumberSInt32Type,
 756                                &avctx->width);
 757     if (!width_num) return AVERROR(ENOMEM);
 758
 759     CFDictionarySetValue(pixel_buffer_info,
 760                          kCVPixelBufferWidthKey,
 761                          width_num);
 762     vt_release_num(&width_num);
 763
 764     height_num = CFNumberCreate(kCFAllocatorDefault,
 765                                 kCFNumberSInt32Type,
 766                                 &avctx->height);
 767     if (!height_num) goto pbinfo_nomem;
 768
 769     CFDictionarySetValue(pixel_buffer_info,
 770                          kCVPixelBufferHeightKey,
 771                          height_num);
 772     vt_release_num(&height_num);
 773
 774     add_color_attr(avctx, pixel_buffer_info);
 775
 776     *dict = pixel_buffer_info;
 777     return 0;
 778
 779 pbinfo_nomem:
 780     vt_release_num(&cv_color_format_num);
 781     vt_release_num(&width_num);
 782     vt_release_num(&height_num);
 783     if (pixel_buffer_info) CFRelease(pixel_buffer_info);
 784
 785     return AVERROR(ENOMEM);
 786 }
 787
 788 static int get_cv_color_primaries(AVCodecContext *avctx,
 789                                   CFStringRef *primaries)
 790 {
 791     enum AVColorPrimaries pri = avctx->color_primaries;
 792     switch (pri) {
 793         case AVCOL_PRI_UNSPECIFIED:
 794             *primaries = NULL;
 795             break;
 796
 797         case AVCOL_PRI_BT709:
 798             *primaries = kCVImageBufferColorPrimaries_ITU_R_709_2;
 799             break;
 800
 801         case AVCOL_PRI_BT2020:
 802             *primaries = compat_keys.kCVImageBufferColorPrimaries_ITU_R_2020;
 803             break;
 804
 805         default:
 806             av_log(avctx, AV_LOG_ERROR, "Color primaries %s is not supported.\n", av_color_primaries_name(pri));
 807             *primaries = NULL;
 808             return -1;
 809     }
 810
 811     return 0;
 812 }
 813
 814 static int get_cv_transfer_function(AVCodecContext *avctx,
 815                                     CFStringRef *transfer_fnc,
 816                                     CFNumberRef *gamma_level)
 817 {
 818     enum AVColorTransferCharacteristic trc = avctx->color_trc;
 819     Float32 gamma;
 820     *gamma_level = NULL;
 821
 822     switch (trc) {
 823         case AVCOL_TRC_UNSPECIFIED:
 824             *transfer_fnc = NULL;
 825             break;
 826
 827         case AVCOL_TRC_BT709:
 828             *transfer_fnc = kCVImageBufferTransferFunction_ITU_R_709_2;
 829             break;
 830
 831         case AVCOL_TRC_SMPTE240M:
 832             *transfer_fnc = kCVImageBufferTransferFunction_SMPTE_240M_1995;
 833             break;
 834
 835         case AVCOL_TRC_GAMMA22:
 836             gamma = 2.2;
 837             *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
 838             *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
 839             break;
 840
 841         case AVCOL_TRC_GAMMA28:
 842             gamma = 2.8;
 843             *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
 844             *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
 845             break;
 846
 847         case AVCOL_TRC_BT2020_10:
 848         case AVCOL_TRC_BT2020_12:
 849             *transfer_fnc = compat_keys.kCVImageBufferTransferFunction_ITU_R_2020;
 850             break;
 851
 852         default:
 853             av_log(avctx, AV_LOG_ERROR, "Transfer function %s is not supported.\n", av_color_transfer_name(trc));
 854             return -1;
 855     }
 856
 857     return 0;
 858 }
 859
 860 static int get_cv_ycbcr_matrix(AVCodecContext *avctx, CFStringRef *matrix) {
 861     switch(avctx->colorspace) {
 862         case AVCOL_SPC_BT709:
 863             *matrix = kCVImageBufferYCbCrMatrix_ITU_R_709_2;
 864             break;
 865
 866         case AVCOL_SPC_UNSPECIFIED:
 867             *matrix = NULL;
 868             break;
 869
 870         case AVCOL_SPC_BT470BG:
 871         case AVCOL_SPC_SMPTE170M:
 872             *matrix = kCVImageBufferYCbCrMatrix_ITU_R_601_4;
 873             break;
 874
 875         case AVCOL_SPC_SMPTE240M:
 876             *matrix = kCVImageBufferYCbCrMatrix_SMPTE_240M_1995;
 877             break;
 878
 879         case AVCOL_SPC_BT2020_NCL:
 880             *matrix = compat_keys.kCVImageBufferYCbCrMatrix_ITU_R_2020;
 881             break;
 882
 883         default:
 884             av_log(avctx, AV_LOG_ERROR, "Color space %s is not supported.\n", av_color_space_name(avctx->colorspace));
 885             return -1;
 886     }
 887
 888     return 0;
 889 }
 890
 891 static int vtenc_create_encoder(AVCodecContext   *avctx,
 892                                 CMVideoCodecType codec_type,
 893                                 CFStringRef      profile_level,
 894                                 CFNumberRef      gamma_level,
 895                                 CFDictionaryRef  enc_info,
 896                                 CFDictionaryRef  pixel_buffer_info,
 897                                 VTCompressionSessionRef *session)
 898 {
 899     VTEncContext *vtctx = avctx->priv_data;
 900     SInt32       bit_rate = avctx->bit_rate;
 901     SInt32       max_rate = avctx->rc_max_rate;
 902     CFNumberRef  bit_rate_num;
 903     CFNumberRef  bytes_per_second;
 904     CFNumberRef  one_second;
 905     CFArrayRef   data_rate_limits;
 906     int64_t      bytes_per_second_value = 0;
 907     int64_t      one_second_value = 0;
 908     void         *nums[2];
 909
 910     int status = VTCompressionSessionCreate(kCFAllocatorDefault,
 911                                             avctx->width,
 912                                             avctx->height,
 913                                             codec_type,
 914                                             enc_info,
 915                                             pixel_buffer_info,
 916                                             kCFAllocatorDefault,
 917                                             vtenc_output_callback,
 918                                             avctx,
 919                                             session);
 920
 921     if (status || !vtctx->session) {
 922         av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
 923
 924 #if !TARGET_OS_IPHONE
 925         if (!vtctx->allow_sw) {
 926             av_log(avctx, AV_LOG_ERROR, "Try -allow_sw 1. The hardware encoder may be busy, or not supported.\n");
 927         }
 928 #endif
 929
 930         return AVERROR_EXTERNAL;
 931     }
 932
 933     bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
 934                                   kCFNumberSInt32Type,
 935                                   &bit_rate);
 936     if (!bit_rate_num) return AVERROR(ENOMEM);
 937
 938     status = VTSessionSetProperty(vtctx->session,
 939                                   kVTCompressionPropertyKey_AverageBitRate,
 940                                   bit_rate_num);
 941     CFRelease(bit_rate_num);
 942
 943     if (status) {
 944         av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
 945         return AVERROR_EXTERNAL;
 946     }
 947
 948     bytes_per_second_value = max_rate >> 3;
 949     bytes_per_second = CFNumberCreate(kCFAllocatorDefault,
 950                                       kCFNumberSInt64Type,
 951                                       &bytes_per_second_value);
 952     if (!bytes_per_second) {
 953         return AVERROR(ENOMEM);
 954     }
 955     one_second_value = 1;
 956     one_second = CFNumberCreate(kCFAllocatorDefault,
 957                                 kCFNumberSInt64Type,
 958                                 &one_second_value);
 959     if (!one_second) {
 960         CFRelease(bytes_per_second);
 961         return AVERROR(ENOMEM);
 962     }
 963     nums[0] = bytes_per_second;
 964     nums[1] = one_second;
 965     data_rate_limits = CFArrayCreate(kCFAllocatorDefault,
 966                                      nums,
 967                                      2,
 968                                      &kCFTypeArrayCallBacks);
 969
 970     if (!data_rate_limits) {
 971         CFRelease(bytes_per_second);
 972         CFRelease(one_second);
 973         return AVERROR(ENOMEM);
 974     }
 975     status = VTSessionSetProperty(vtctx->session,
 976                                   kVTCompressionPropertyKey_DataRateLimits,
 977                                   data_rate_limits);
 978
 979     CFRelease(bytes_per_second);
 980     CFRelease(one_second);
 981     CFRelease(data_rate_limits);
 982
 983     if (status) {
 984         av_log(avctx, AV_LOG_ERROR, "Error setting max bitrate property: %d\n", status);
 985         return AVERROR_EXTERNAL;
 986     }
 987
 988     if (profile_level) {
 989         status = VTSessionSetProperty(vtctx->session,
 990                                       kVTCompressionPropertyKey_ProfileLevel,
 991                                       profile_level);
 992         if (status) {
 993             av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
 994         }
 995     }
 996
 997     if (avctx->gop_size > 0) {
 998         CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
 999                                               kCFNumberIntType,
1000                                               &avctx->gop_size);
1001         if (!interval) {
1002             return AVERROR(ENOMEM);
1003         }
1004
1005         status = VTSessionSetProperty(vtctx->session,
1006                                       kVTCompressionPropertyKey_MaxKeyFrameInterval,
1007                                       interval);
1008         CFRelease(interval);
1009
1010         if (status) {
1011             av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
1012             return AVERROR_EXTERNAL;
1013         }
1014     }
1015
1016     if (vtctx->frames_before) {
1017         status = VTSessionSetProperty(vtctx->session,
1018                                       kVTCompressionPropertyKey_MoreFramesBeforeStart,
1019                                       kCFBooleanTrue);
1020
1021         if (status == kVTPropertyNotSupportedErr) {
1022             av_log(avctx, AV_LOG_WARNING, "frames_before property is not supported on this device. Ignoring.\n");
1023         } else if (status) {
1024             av_log(avctx, AV_LOG_ERROR, "Error setting frames_before property: %d\n", status);
1025         }
1026     }
1027
1028     if (vtctx->frames_after) {
1029         status = VTSessionSetProperty(vtctx->session,
1030                                       kVTCompressionPropertyKey_MoreFramesAfterEnd,
1031                                       kCFBooleanTrue);
1032
1033         if (status == kVTPropertyNotSupportedErr) {
1034             av_log(avctx, AV_LOG_WARNING, "frames_after property is not supported on this device. Ignoring.\n");
1035         } else if (status) {
1036             av_log(avctx, AV_LOG_ERROR, "Error setting frames_after property: %d\n", status);
1037         }
1038     }
1039
1040     if (avctx->sample_aspect_ratio.num != 0) {
1041         CFNumberRef num;
1042         CFNumberRef den;
1043         CFMutableDictionaryRef par;
1044         AVRational *avpar = &avctx->sample_aspect_ratio;
1045
1046         av_reduce(&avpar->num, &avpar->den,
1047                    avpar->num,  avpar->den,
1048                   0xFFFFFFFF);
1049
1050         num = CFNumberCreate(kCFAllocatorDefault,
1051                              kCFNumberIntType,
1052                              &avpar->num);
1053
1054         den = CFNumberCreate(kCFAllocatorDefault,
1055                              kCFNumberIntType,
1056                              &avpar->den);
1057
1058
1059
1060         par = CFDictionaryCreateMutable(kCFAllocatorDefault,
1061                                         2,
1062                                         &kCFCopyStringDictionaryKeyCallBacks,
1063                                         &kCFTypeDictionaryValueCallBacks);
1064
1065         if (!par || !num || !den) {
1066             if (par) CFRelease(par);
1067             if (num) CFRelease(num);
1068             if (den) CFRelease(den);
1069
1070             return AVERROR(ENOMEM);
1071         }
1072
1073         CFDictionarySetValue(
1074             par,
1075             kCMFormatDescriptionKey_PixelAspectRatioHorizontalSpacing,
1076             num);
1077
1078         CFDictionarySetValue(
1079             par,
1080             kCMFormatDescriptionKey_PixelAspectRatioVerticalSpacing,
1081             den);
1082
1083         status = VTSessionSetProperty(vtctx->session,
1084                                       kVTCompressionPropertyKey_PixelAspectRatio,
1085                                       par);
1086
1087         CFRelease(par);
1088         CFRelease(num);
1089         CFRelease(den);
1090
1091         if (status) {
1092             av_log(avctx,
1093                    AV_LOG_ERROR,
1094                    "Error setting pixel aspect ratio to %d:%d: %d.\n",
1095                    avctx->sample_aspect_ratio.num,
1096                    avctx->sample_aspect_ratio.den,
1097                    status);
1098
1099             return AVERROR_EXTERNAL;
1100         }
1101     }
1102
1103
1104     if (vtctx->transfer_function) {
1105         status = VTSessionSetProperty(vtctx->session,
1106                                       kVTCompressionPropertyKey_TransferFunction,
1107                                       vtctx->transfer_function);
1108
1109         if (status) {
1110             av_log(avctx, AV_LOG_WARNING, "Could not set transfer function: %d\n", status);
1111         }
1112     }
1113
1114
1115     if (vtctx->ycbcr_matrix) {
1116         status = VTSessionSetProperty(vtctx->session,
1117                                       kVTCompressionPropertyKey_YCbCrMatrix,
1118                                       vtctx->ycbcr_matrix);
1119
1120         if (status) {
1121             av_log(avctx, AV_LOG_WARNING, "Could not set ycbcr matrix: %d\n", status);
1122         }
1123     }
1124
1125
1126     if (vtctx->color_primaries) {
1127         status = VTSessionSetProperty(vtctx->session,
1128                                       kVTCompressionPropertyKey_ColorPrimaries,
1129                                       vtctx->color_primaries);
1130
1131         if (status) {
1132             av_log(avctx, AV_LOG_WARNING, "Could not set color primaries: %d\n", status);
1133         }
1134     }
1135
1136     if (gamma_level) {
1137         status = VTSessionSetProperty(vtctx->session,
1138                                       kCVImageBufferGammaLevelKey,
1139                                       gamma_level);
1140
1141         if (status) {
1142             av_log(avctx, AV_LOG_WARNING, "Could not set gamma level: %d\n", status);
1143         }
1144     }
1145
1146     if (!vtctx->has_b_frames) {
1147         status = VTSessionSetProperty(vtctx->session,
1148                                       kVTCompressionPropertyKey_AllowFrameReordering,
1149                                       kCFBooleanFalse);
1150
1151         if (status) {
1152             av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
1153             return AVERROR_EXTERNAL;
1154         }
1155     }
1156
1157     if (vtctx->entropy != VT_ENTROPY_NOT_SET) {
1158         CFStringRef entropy = vtctx->entropy == VT_CABAC ?
1159                                 compat_keys.kVTH264EntropyMode_CABAC:
1160                                 compat_keys.kVTH264EntropyMode_CAVLC;
1161
1162         status = VTSessionSetProperty(vtctx->session,
1163                                       compat_keys.kVTCompressionPropertyKey_H264EntropyMode,
1164                                       entropy);
1165
1166         if (status) {
1167             av_log(avctx, AV_LOG_ERROR, "Error setting entropy property: %d\n", status);
1168         }
1169     }
1170
1171     if (vtctx->realtime) {
1172         status = VTSessionSetProperty(vtctx->session,
1173                                       compat_keys.kVTCompressionPropertyKey_RealTime,
1174                                       kCFBooleanTrue);
1175
1176         if (status) {
1177             av_log(avctx, AV_LOG_ERROR, "Error setting realtime property: %d\n", status);
1178         }
1179     }
1180
1181     status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
1182     if (status) {
1183         av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
1184         return AVERROR_EXTERNAL;
1185     }
1186
1187     return 0;
1188 }
1189
1190 static av_cold int vtenc_init(AVCodecContext *avctx)
1191 {
1192     CFMutableDictionaryRef enc_info;
1193     CFMutableDictionaryRef pixel_buffer_info;
1194     CMVideoCodecType       codec_type;
1195     VTEncContext           *vtctx = avctx->priv_data;
1196     CFStringRef            profile_level;
1197     CFBooleanRef           has_b_frames_cfbool;
1198     CFNumberRef            gamma_level = NULL;
1199     int                    status;
1200
1201     pthread_once(&once_ctrl, loadVTEncSymbols);
1202
1203     codec_type = get_cm_codec_type(avctx->codec_id);
1204     if (!codec_type) {
1205         av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
1206         return AVERROR(EINVAL);
1207     }
1208
1209     vtctx->has_b_frames = avctx->max_b_frames > 0;
1210     if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
1211         av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
1212         vtctx->has_b_frames = false;
1213     }
1214
1215     if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
1216         av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
1217         vtctx->entropy = VT_ENTROPY_NOT_SET;
1218     }
1219
1220     if (!get_vt_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
1221
1222     vtctx->session = NULL;
1223
1224     enc_info = CFDictionaryCreateMutable(
1225         kCFAllocatorDefault,
1226         20,
1227         &kCFCopyStringDictionaryKeyCallBacks,
1228         &kCFTypeDictionaryValueCallBacks
1229     );
1230
1231     if (!enc_info) return AVERROR(ENOMEM);
1232
1233 #if !TARGET_OS_IPHONE
1234     if (!vtctx->allow_sw) {
1235         CFDictionarySetValue(enc_info,
1236                              compat_keys.kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
1237                              kCFBooleanTrue);
1238     } else {
1239         CFDictionarySetValue(enc_info,
1240                              compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
1241                              kCFBooleanTrue);
1242     }
1243 #endif
1244
1245     if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {
1246         status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);
1247         if (status)
1248             goto init_cleanup;
1249     } else {
1250         pixel_buffer_info = NULL;
1251     }
1252
1253     pthread_mutex_init(&vtctx->lock, NULL);
1254     pthread_cond_init(&vtctx->cv_sample_sent, NULL);
1255     vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
1256
1257     get_cv_transfer_function(avctx, &vtctx->transfer_function, &gamma_level);
1258     get_cv_ycbcr_matrix(avctx, &vtctx->ycbcr_matrix);
1259     get_cv_color_primaries(avctx, &vtctx->color_primaries);
1260
1261
1262     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1263         status = vtenc_populate_extradata(avctx,
1264                                           codec_type,
1265                                           profile_level,
1266                                           gamma_level,
1267                                           enc_info,
1268                                           pixel_buffer_info);
1269         if (status)
1270             goto init_cleanup;
1271     }
1272
1273     status = vtenc_create_encoder(avctx,
1274                                   codec_type,
1275                                   profile_level,
1276                                   gamma_level,
1277                                   enc_info,
1278                                   pixel_buffer_info,
1279                                   &vtctx->session);
1280
1281     if (status < 0)
1282         goto init_cleanup;
1283
1284     status = VTSessionCopyProperty(vtctx->session,
1285                                    kVTCompressionPropertyKey_AllowFrameReordering,
1286                                    kCFAllocatorDefault,
1287                                    &has_b_frames_cfbool);
1288
1289     if (!status) {
1290         //Some devices don't output B-frames for main profile, even if requested.
1291         vtctx->has_b_frames = CFBooleanGetValue(has_b_frames_cfbool);
1292         CFRelease(has_b_frames_cfbool);
1293     }
1294     avctx->has_b_frames = vtctx->has_b_frames;
1295
1296 init_cleanup:
1297     if (gamma_level)
1298         CFRelease(gamma_level);
1299
1300     if (pixel_buffer_info)
1301         CFRelease(pixel_buffer_info);
1302
1303     CFRelease(enc_info);
1304
1305     return status;
1306 }
1307
1308 static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
1309 {
1310     CFArrayRef      attachments;
1311     CFDictionaryRef attachment;
1312     CFBooleanRef    not_sync;
1313     CFIndex         len;
1314
1315     attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
1316     len = !attachments ? 0 : CFArrayGetCount(attachments);
1317
1318     if (!len) {
1319         *is_key_frame = true;
1320         return;
1321     }
1322
1323     attachment = CFArrayGetValueAtIndex(attachments, 0);
1324
1325     if (CFDictionaryGetValueIfPresent(attachment,
1326                                       kCMSampleAttachmentKey_NotSync,
1327                                       (const void **)&not_sync))
1328     {
1329         *is_key_frame = !CFBooleanGetValue(not_sync);
1330     } else {
1331         *is_key_frame = true;
1332     }
1333 }
1334
1335 static int is_post_sei_nal_type(int nal_type){
1336     return nal_type != H264_NAL_SEI &&
1337            nal_type != H264_NAL_SPS &&
1338            nal_type != H264_NAL_PPS &&
1339            nal_type != H264_NAL_AUD;
1340 }
1341
1342 /*
1343  * Finds the sei message start/size of type find_sei_type.
1344  * If more than one of that type exists, the last one is returned.
1345  */
1346 static int find_sei_end(AVCodecContext *avctx,
1347                         uint8_t        *nal_data,
1348                         size_t          nal_size,
1349                         uint8_t       **sei_end)
1350 {
1351     int nal_type;
1352     size_t sei_payload_size = 0;
1353     int sei_payload_type = 0;
1354     *sei_end = NULL;
1355     uint8_t *nal_start = nal_data;
1356
1357     if (!nal_size)
1358         return 0;
1359
1360     nal_type = *nal_data & 0x1F;
1361     if (nal_type != H264_NAL_SEI)
1362         return 0;
1363
1364     nal_data++;
1365     nal_size--;
1366
1367     if (nal_data[nal_size - 1] == 0x80)
1368         nal_size--;
1369
1370     while (nal_size > 0 && *nal_data > 0) {
1371         do{
1372             sei_payload_type += *nal_data;
1373             nal_data++;
1374             nal_size--;
1375         } while (nal_size > 0 && *nal_data == 0xFF);
1376
1377         if (!nal_size) {
1378             av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing type.\n");
1379             return AVERROR_INVALIDDATA;
1380         }
1381
1382         do{
1383             sei_payload_size += *nal_data;
1384             nal_data++;
1385             nal_size--;
1386         } while (nal_size > 0 && *nal_data == 0xFF);
1387
1388         if (nal_size < sei_payload_size) {
1389             av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing size.\n");
1390             return AVERROR_INVALIDDATA;
1391         }
1392
1393         nal_data += sei_payload_size;
1394         nal_size -= sei_payload_size;
1395     }
1396
1397     *sei_end = nal_data;
1398
1399     return nal_data - nal_start + 1;
1400 }
1401
1402 /**
1403  * Copies the data inserting emulation prevention bytes as needed.
1404  * Existing data in the destination can be taken into account by providing
1405  * dst with a dst_offset > 0.
1406  *
1407  * @return The number of bytes copied on success. On failure, the negative of
1408  *         the number of bytes needed to copy src is returned.
1409  */
1410 static int copy_emulation_prev(const uint8_t *src,
1411                                size_t         src_size,
1412                                uint8_t       *dst,
1413                                ssize_t        dst_offset,
1414                                size_t         dst_size)
1415 {
1416     int zeros = 0;
1417     int wrote_bytes;
1418     uint8_t* dst_start;
1419     uint8_t* dst_end = dst + dst_size;
1420     const uint8_t* src_end = src + src_size;
1421     int start_at = dst_offset > 2 ? dst_offset - 2 : 0;
1422     int i;
1423     for (i = start_at; i < dst_offset && i < dst_size; i++) {
1424         if (!dst[i])
1425             zeros++;
1426         else
1427             zeros = 0;
1428     }
1429
1430     dst += dst_offset;
1431     dst_start = dst;
1432     for (; src < src_end; src++, dst++) {
1433         if (zeros == 2) {
1434             int insert_ep3_byte = *src <= 3;
1435             if (insert_ep3_byte) {
1436                 if (dst < dst_end)
1437                     *dst = 3;
1438                 dst++;
1439             }
1440
1441             zeros = 0;
1442         }
1443
1444         if (dst < dst_end)
1445             *dst = *src;
1446
1447         if (!*src)
1448             zeros++;
1449         else
1450             zeros = 0;
1451     }
1452
1453     wrote_bytes = dst - dst_start;
1454
1455     if (dst > dst_end)
1456         return -wrote_bytes;
1457
1458     return wrote_bytes;
1459 }
1460
1461 static int write_sei(const ExtraSEI *sei,
1462                      int             sei_type,
1463                      uint8_t        *dst,
1464                      size_t          dst_size)
1465 {
1466     uint8_t *sei_start = dst;
1467     size_t remaining_sei_size = sei->size;
1468     size_t remaining_dst_size = dst_size;
1469     int header_bytes;
1470     int bytes_written;
1471     ssize_t offset;
1472
1473     if (!remaining_dst_size)
1474         return AVERROR_BUFFER_TOO_SMALL;
1475
1476     while (sei_type && remaining_dst_size != 0) {
1477         int sei_byte = sei_type > 255 ? 255 : sei_type;
1478         *dst = sei_byte;
1479
1480         sei_type -= sei_byte;
1481         dst++;
1482         remaining_dst_size--;
1483     }
1484
1485     if (!dst_size)
1486         return AVERROR_BUFFER_TOO_SMALL;
1487
1488     while (remaining_sei_size && remaining_dst_size != 0) {
1489         int size_byte = remaining_sei_size > 255 ? 255 : remaining_sei_size;
1490         *dst = size_byte;
1491
1492         remaining_sei_size -= size_byte;
1493         dst++;
1494         remaining_dst_size--;
1495     }
1496
1497     if (remaining_dst_size < sei->size)
1498         return AVERROR_BUFFER_TOO_SMALL;
1499
1500     header_bytes = dst - sei_start;
1501
1502     offset = header_bytes;
1503     bytes_written = copy_emulation_prev(sei->data,
1504                                         sei->size,
1505                                         sei_start,
1506                                         offset,
1507                                         dst_size);
1508     if (bytes_written < 0)
1509         return AVERROR_BUFFER_TOO_SMALL;
1510
1511     bytes_written += header_bytes;
1512     return bytes_written;
1513 }
1514
1515 /**
1516  * Copies NAL units and replaces length codes with
1517  * H.264 Annex B start codes. On failure, the contents of
1518  * dst_data may have been modified.
1519  *
1520  * @param length_code_size Byte length of each length code
1521  * @param sample_buffer NAL units prefixed with length codes.
1522  * @param sei Optional A53 closed captions SEI data.
1523  * @param dst_data Must be zeroed before calling this function.
1524  *                 Contains the copied NAL units prefixed with
1525  *                 start codes when the function returns
1526  *                 successfully.
1527  * @param dst_size Length of dst_data
1528  * @return 0 on success
1529  *         AVERROR_INVALIDDATA if length_code_size is invalid
1530  *         AVERROR_BUFFER_TOO_SMALL if dst_data is too small
1531  *         or if a length_code in src_data specifies data beyond
1532  *         the end of its buffer.
1533  */
1534 static int copy_replace_length_codes(
1535     AVCodecContext *avctx,
1536     size_t        length_code_size,
1537     CMSampleBufferRef sample_buffer,
1538     ExtraSEI      *sei,
1539     uint8_t       *dst_data,
1540     size_t        dst_size)
1541 {
1542     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1543     size_t remaining_src_size = src_size;
1544     size_t remaining_dst_size = dst_size;
1545     size_t src_offset = 0;
1546     int wrote_sei = 0;
1547     int status;
1548     uint8_t size_buf[4];
1549     uint8_t nal_type;
1550     CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
1551
1552     if (length_code_size > 4) {
1553         return AVERROR_INVALIDDATA;
1554     }
1555
1556     while (remaining_src_size > 0) {
1557         size_t curr_src_len;
1558         size_t curr_dst_len;
1559         size_t box_len = 0;
1560         size_t i;
1561
1562         uint8_t       *dst_box;
1563
1564         status = CMBlockBufferCopyDataBytes(block,
1565                                             src_offset,
1566                                             length_code_size,
1567                                             size_buf);
1568         if (status) {
1569             av_log(avctx, AV_LOG_ERROR, "Cannot copy length: %d\n", status);
1570             return AVERROR_EXTERNAL;
1571         }
1572
1573         status = CMBlockBufferCopyDataBytes(block,
1574                                             src_offset + length_code_size,
1575                                             1,
1576                                             &nal_type);
1577
1578         if (status) {
1579             av_log(avctx, AV_LOG_ERROR, "Cannot copy type: %d\n", status);
1580             return AVERROR_EXTERNAL;
1581         }
1582
1583         nal_type &= 0x1F;
1584
1585         for (i = 0; i < length_code_size; i++) {
1586             box_len <<= 8;
1587             box_len |= size_buf[i];
1588         }
1589
1590         if (sei && !wrote_sei && is_post_sei_nal_type(nal_type)) {
1591             //No SEI NAL unit - insert.
1592             int wrote_bytes;
1593
1594             memcpy(dst_data, start_code, sizeof(start_code));
1595             dst_data += sizeof(start_code);
1596             remaining_dst_size -= sizeof(start_code);
1597
1598             *dst_data = H264_NAL_SEI;
1599             dst_data++;
1600             remaining_dst_size--;
1601
1602             wrote_bytes = write_sei(sei,
1603                                     SEI_TYPE_USER_DATA_REGISTERED,
1604                                     dst_data,
1605                                     remaining_dst_size);
1606
1607             if (wrote_bytes < 0)
1608                 return wrote_bytes;
1609
1610             remaining_dst_size -= wrote_bytes;
1611             dst_data += wrote_bytes;
1612
1613             if (remaining_dst_size <= 0)
1614                 return AVERROR_BUFFER_TOO_SMALL;
1615
1616             *dst_data = 0x80;
1617
1618             dst_data++;
1619             remaining_dst_size--;
1620
1621             wrote_sei = 1;
1622         }
1623
1624         curr_src_len = box_len + length_code_size;
1625         curr_dst_len = box_len + sizeof(start_code);
1626
1627         if (remaining_src_size < curr_src_len) {
1628             return AVERROR_BUFFER_TOO_SMALL;
1629         }
1630
1631         if (remaining_dst_size < curr_dst_len) {
1632             return AVERROR_BUFFER_TOO_SMALL;
1633         }
1634
1635         dst_box = dst_data + sizeof(start_code);
1636
1637         memcpy(dst_data, start_code, sizeof(start_code));
1638         status = CMBlockBufferCopyDataBytes(block,
1639                                             src_offset + length_code_size,
1640                                             box_len,
1641                                             dst_box);
1642
1643         if (status) {
1644             av_log(avctx, AV_LOG_ERROR, "Cannot copy data: %d\n", status);
1645             return AVERROR_EXTERNAL;
1646         }
1647
1648         if (sei && !wrote_sei && nal_type == H264_NAL_SEI) {
1649             //Found SEI NAL unit - append.
1650             int wrote_bytes;
1651             int old_sei_length;
1652             int extra_bytes;
1653             uint8_t *new_sei;
1654             old_sei_length = find_sei_end(avctx, dst_box, box_len, &new_sei);
1655             if (old_sei_length < 0)
1656                 return status;
1657
1658             wrote_bytes = write_sei(sei,
1659                                     SEI_TYPE_USER_DATA_REGISTERED,
1660                                     new_sei,
1661                                     remaining_dst_size - old_sei_length);
1662             if (wrote_bytes < 0)
1663                 return wrote_bytes;
1664
1665             if (new_sei + wrote_bytes >= dst_data + remaining_dst_size)
1666                 return AVERROR_BUFFER_TOO_SMALL;
1667
1668             new_sei[wrote_bytes++] = 0x80;
1669             extra_bytes = wrote_bytes - (dst_box + box_len - new_sei);
1670
1671             dst_data += extra_bytes;
1672             remaining_dst_size -= extra_bytes;
1673
1674             wrote_sei = 1;
1675         }
1676
1677         src_offset += curr_src_len;
1678         dst_data += curr_dst_len;
1679
1680         remaining_src_size -= curr_src_len;
1681         remaining_dst_size -= curr_dst_len;
1682     }
1683
1684     return 0;
1685 }
1686
1687 /**
1688  * Returns a sufficient number of bytes to contain the sei data.
1689  * It may be greater than the minimum required.
1690  */
1691 static int get_sei_msg_bytes(const ExtraSEI* sei, int type){
1692     int copied_size;
1693     if (sei->size == 0)
1694         return 0;
1695
1696     copied_size = -copy_emulation_prev(sei->data,
1697                                        sei->size,
1698                                        NULL,
1699                                        0,
1700                                        0);
1701
1702     if ((sei->size % 255) == 0) //may result in an extra byte
1703         copied_size++;
1704
1705     return copied_size + sei->size / 255 + 1 + type / 255 + 1;
1706 }
1707
1708 static int vtenc_cm_to_avpacket(
1709     AVCodecContext    *avctx,
1710     CMSampleBufferRef sample_buffer,
1711     AVPacket          *pkt,
1712     ExtraSEI          *sei)
1713 {
1714     VTEncContext *vtctx = avctx->priv_data;
1715
1716     int     status;
1717     bool    is_key_frame;
1718     bool    add_header;
1719     size_t  length_code_size;
1720     size_t  header_size = 0;
1721     size_t  in_buf_size;
1722     size_t  out_buf_size;
1723     size_t  sei_nalu_size = 0;
1724     int64_t dts_delta;
1725     int64_t time_base_num;
1726     int nalu_count;
1727     CMTime  pts;
1728     CMTime  dts;
1729     CMVideoFormatDescriptionRef vid_fmt;
1730
1731
1732     vtenc_get_frame_info(sample_buffer, &is_key_frame);
1733     status = get_length_code_size(avctx, sample_buffer, &length_code_size);
1734     if (status) return status;
1735
1736     add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
1737
1738     if (add_header) {
1739         vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
1740         if (!vid_fmt) {
1741             av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
1742             return AVERROR_EXTERNAL;
1743         }
1744
1745         int status = get_params_size(avctx, vid_fmt, &header_size);
1746         if (status) return status;
1747     }
1748
1749     status = count_nalus(length_code_size, sample_buffer, &nalu_count);
1750     if(status)
1751         return status;
1752
1753     if (sei) {
1754         size_t msg_size = get_sei_msg_bytes(sei,
1755                                             SEI_TYPE_USER_DATA_REGISTERED);
1756
1757         sei_nalu_size = sizeof(start_code) + 1 + msg_size + 1;
1758     }
1759
1760     in_buf_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1761     out_buf_size = header_size +
1762                    in_buf_size +
1763                    sei_nalu_size +
1764                    nalu_count * ((int)sizeof(start_code) - (int)length_code_size);
1765
1766     status = ff_alloc_packet2(avctx, pkt, out_buf_size, out_buf_size);
1767     if (status < 0)
1768         return status;
1769
1770     if (add_header) {
1771         status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
1772         if(status) return status;
1773     }
1774
1775     status = copy_replace_length_codes(
1776         avctx,
1777         length_code_size,
1778         sample_buffer,
1779         sei,
1780         pkt->data + header_size,
1781         pkt->size - header_size
1782     );
1783
1784     if (status) {
1785         av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d\n", status);
1786         return status;
1787     }
1788
1789     if (is_key_frame) {
1790         pkt->flags |= AV_PKT_FLAG_KEY;
1791     }
1792
1793     pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
1794     dts = CMSampleBufferGetDecodeTimeStamp      (sample_buffer);
1795
1796     if (CMTIME_IS_INVALID(dts)) {
1797         if (!vtctx->has_b_frames) {
1798             dts = pts;
1799         } else {
1800             av_log(avctx, AV_LOG_ERROR, "DTS is invalid.\n");
1801             return AVERROR_EXTERNAL;
1802         }
1803     }
1804
1805     dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
1806     time_base_num = avctx->time_base.num;
1807     pkt->pts = pts.value / time_base_num;
1808     pkt->dts = dts.value / time_base_num - dts_delta;
1809     pkt->size = out_buf_size;
1810
1811     return 0;
1812 }
1813
1814 /*
1815  * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
1816  * containing all planes if so.
1817  */
1818 static int get_cv_pixel_info(
1819     AVCodecContext *avctx,
1820     const AVFrame  *frame,
1821     int            *color,
1822     int            *plane_count,
1823     size_t         *widths,
1824     size_t         *heights,
1825     size_t         *strides,
1826     size_t         *contiguous_buf_size)
1827 {
1828     VTEncContext *vtctx = avctx->priv_data;
1829     int av_format       = frame->format;
1830     int av_color_range  = av_frame_get_color_range(frame);
1831     int i;
1832     int range_guessed;
1833     int status;
1834
1835     status = get_cv_pixel_format(avctx, av_format, av_color_range, color, &range_guessed);
1836     if (status) {
1837         av_log(avctx,
1838             AV_LOG_ERROR,
1839             "Could not get pixel format for color format '%s' range '%s'.\n",
1840             av_get_pix_fmt_name(av_format),
1841             av_color_range > AVCOL_RANGE_UNSPECIFIED &&
1842             av_color_range < AVCOL_RANGE_NB ?
1843                av_color_range_name(av_color_range) :
1844                "Unknown");
1845
1846         return AVERROR(EINVAL);
1847     }
1848
1849     if (range_guessed) {
1850         if (!vtctx->warned_color_range) {
1851             vtctx->warned_color_range = true;
1852             av_log(avctx,
1853                    AV_LOG_WARNING,
1854                    "Color range not set for %s. Using MPEG range.\n",
1855                    av_get_pix_fmt_name(av_format));
1856         }
1857
1858         av_log(avctx, AV_LOG_WARNING, "");
1859     }
1860
1861     switch (av_format) {
1862     case AV_PIX_FMT_NV12:
1863         *plane_count = 2;
1864
1865         widths [0] = avctx->width;
1866         heights[0] = avctx->height;
1867         strides[0] = frame ? frame->linesize[0] : avctx->width;
1868
1869         widths [1] = (avctx->width  + 1) / 2;
1870         heights[1] = (avctx->height + 1) / 2;
1871         strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) & -2;
1872         break;
1873
1874     case AV_PIX_FMT_YUV420P:
1875         *plane_count = 3;
1876
1877         widths [0] = avctx->width;
1878         heights[0] = avctx->height;
1879         strides[0] = frame ? frame->linesize[0] : avctx->width;
1880
1881         widths [1] = (avctx->width  + 1) / 2;
1882         heights[1] = (avctx->height + 1) / 2;
1883         strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) / 2;
1884
1885         widths [2] = (avctx->width  + 1) / 2;
1886         heights[2] = (avctx->height + 1) / 2;
1887         strides[2] = frame ? frame->linesize[2] : (avctx->width + 1) / 2;
1888         break;
1889
1890     default:
1891         av_log(
1892                avctx,
1893                AV_LOG_ERROR,
1894                "Could not get frame format info for color %d range %d.\n",
1895                av_format,
1896                av_color_range);
1897
1898         return AVERROR(EINVAL);
1899     }
1900
1901     *contiguous_buf_size = 0;
1902     for (i = 0; i < *plane_count; i++) {
1903         if (i < *plane_count - 1 &&
1904             frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
1905             *contiguous_buf_size = 0;
1906             break;
1907         }
1908
1909         *contiguous_buf_size += strides[i] * heights[i];
1910     }
1911
1912     return 0;
1913 }
1914
1915 #if !TARGET_OS_IPHONE
1916 //Not used on iOS - frame is always copied.
1917 static void free_avframe(
1918     void       *release_ctx,
1919     const void *data,
1920     size_t      size,
1921     size_t      plane_count,
1922     const void *plane_addresses[])
1923 {
1924     AVFrame *frame = release_ctx;
1925     av_frame_free(&frame);
1926 }
1927 #else
1928 //Not used on OSX - frame is never copied.
1929 static int copy_avframe_to_pixel_buffer(AVCodecContext   *avctx,
1930                                         const AVFrame    *frame,
1931                                         CVPixelBufferRef cv_img,
1932                                         const size_t     *plane_strides,
1933                                         const size_t     *plane_rows)
1934 {
1935     int i, j;
1936     size_t plane_count;
1937     int status;
1938     int rows;
1939     int src_stride;
1940     int dst_stride;
1941     uint8_t *src_addr;
1942     uint8_t *dst_addr;
1943     size_t copy_bytes;
1944
1945     status = CVPixelBufferLockBaseAddress(cv_img, 0);
1946     if (status) {
1947         av_log(
1948             avctx,
1949             AV_LOG_ERROR,
1950             "Error: Could not lock base address of CVPixelBuffer: %d.\n",
1951             status
1952         );
1953     }
1954
1955     if (CVPixelBufferIsPlanar(cv_img)) {
1956         plane_count = CVPixelBufferGetPlaneCount(cv_img);
1957         for (i = 0; frame->data[i]; i++) {
1958             if (i == plane_count) {
1959                 CVPixelBufferUnlockBaseAddress(cv_img, 0);
1960                 av_log(avctx,
1961                     AV_LOG_ERROR,
1962                     "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
1963                 );
1964
1965                 return AVERROR_EXTERNAL;
1966             }
1967
1968             dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
1969             src_addr = (uint8_t*)frame->data[i];
1970             dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
1971             src_stride = plane_strides[i];
1972             rows = plane_rows[i];
1973
1974             if (dst_stride == src_stride) {
1975                 memcpy(dst_addr, src_addr, src_stride * rows);
1976             } else {
1977                 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1978
1979                 for (j = 0; j < rows; j++) {
1980                     memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1981                 }
1982             }
1983         }
1984     } else {
1985         if (frame->data[1]) {
1986             CVPixelBufferUnlockBaseAddress(cv_img, 0);
1987             av_log(avctx,
1988                 AV_LOG_ERROR,
1989                 "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
1990             );
1991
1992             return AVERROR_EXTERNAL;
1993         }
1994
1995         dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
1996         src_addr = (uint8_t*)frame->data[0];
1997         dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
1998         src_stride = plane_strides[0];
1999         rows = plane_rows[0];
2000
2001         if (dst_stride == src_stride) {
2002             memcpy(dst_addr, src_addr, src_stride * rows);
2003         } else {
2004             copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
2005
2006             for (j = 0; j < rows; j++) {
2007                 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
2008             }
2009         }
2010     }
2011
2012     status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
2013     if (status) {
2014         av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
2015         return AVERROR_EXTERNAL;
2016     }
2017
2018     return 0;
2019 }
2020 #endif //!TARGET_OS_IPHONE
2021
2022 static int create_cv_pixel_buffer(AVCodecContext   *avctx,
2023                                   const AVFrame    *frame,
2024                                   CVPixelBufferRef *cv_img)
2025 {
2026     int plane_count;
2027     int color;
2028     size_t widths [AV_NUM_DATA_POINTERS];
2029     size_t heights[AV_NUM_DATA_POINTERS];
2030     size_t strides[AV_NUM_DATA_POINTERS];
2031     int status;
2032     size_t contiguous_buf_size;
2033 #if TARGET_OS_IPHONE
2034     CVPixelBufferPoolRef pix_buf_pool;
2035     VTEncContext* vtctx = avctx->priv_data;
2036 #else
2037     CFMutableDictionaryRef pix_buf_attachments = CFDictionaryCreateMutable(
2038                                                    kCFAllocatorDefault,
2039                                                    10,
2040                                                    &kCFCopyStringDictionaryKeyCallBacks,
2041                                                    &kCFTypeDictionaryValueCallBacks);
2042
2043     if (!pix_buf_attachments) return AVERROR(ENOMEM);
2044 #endif
2045
2046     if (avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX) {
2047         av_assert0(frame->format == AV_PIX_FMT_VIDEOTOOLBOX);
2048
2049         *cv_img = (CVPixelBufferRef)frame->data[3];
2050         av_assert0(*cv_img);
2051
2052         CFRetain(*cv_img);
2053         return 0;
2054     }
2055
2056     memset(widths,  0, sizeof(widths));
2057     memset(heights, 0, sizeof(heights));
2058     memset(strides, 0, sizeof(strides));
2059
2060     status = get_cv_pixel_info(
2061         avctx,
2062         frame,
2063         &color,
2064         &plane_count,
2065         widths,
2066         heights,
2067         strides,
2068         &contiguous_buf_size
2069     );
2070
2071     if (status) {
2072         av_log(
2073             avctx,
2074             AV_LOG_ERROR,
2075             "Error: Cannot convert format %d color_range %d: %d\n",
2076             frame->format,
2077             av_frame_get_color_range(frame),
2078             status
2079         );
2080
2081         return AVERROR_EXTERNAL;
2082     }
2083
2084 #if TARGET_OS_IPHONE
2085     pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2086     if (!pix_buf_pool) {
2087         av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
2088         return AVERROR_EXTERNAL;
2089     }
2090
2091     status = CVPixelBufferPoolCreatePixelBuffer(NULL,
2092                                                 pix_buf_pool,
2093                                                 cv_img);
2094
2095
2096     if (status) {
2097         av_log(avctx, AV_LOG_ERROR, "Could not create pixel buffer from pool: %d.\n", status);
2098         return AVERROR_EXTERNAL;
2099     }
2100
2101     status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
2102     if (status) {
2103         CFRelease(*cv_img);
2104         *cv_img = NULL;
2105         return status;
2106     }
2107 #else
2108     AVFrame *enc_frame = av_frame_alloc();
2109     if (!enc_frame) return AVERROR(ENOMEM);
2110
2111     status = av_frame_ref(enc_frame, frame);
2112     if (status) {
2113         av_frame_free(&enc_frame);
2114         return status;
2115     }
2116
2117     status = CVPixelBufferCreateWithPlanarBytes(
2118         kCFAllocatorDefault,
2119         enc_frame->width,
2120         enc_frame->height,
2121         color,
2122         NULL,
2123         contiguous_buf_size,
2124         plane_count,
2125         (void **)enc_frame->data,
2126         widths,
2127         heights,
2128         strides,
2129         free_avframe,
2130         enc_frame,
2131         NULL,
2132         cv_img
2133     );
2134
2135     add_color_attr(avctx, pix_buf_attachments);
2136     CVBufferSetAttachments(*cv_img, pix_buf_attachments, kCVAttachmentMode_ShouldPropagate);
2137     CFRelease(pix_buf_attachments);
2138
2139     if (status) {
2140         av_log(avctx, AV_LOG_ERROR, "Error: Could not create CVPixelBuffer: %d\n", status);
2141         return AVERROR_EXTERNAL;
2142     }
2143 #endif
2144
2145     return 0;
2146 }
2147
2148 static int create_encoder_dict_h264(const AVFrame *frame,
2149                                     CFDictionaryRef* dict_out)
2150 {
2151     CFDictionaryRef dict = NULL;
2152     if (frame->pict_type == AV_PICTURE_TYPE_I) {
2153         const void *keys[] = { kVTEncodeFrameOptionKey_ForceKeyFrame };
2154         const void *vals[] = { kCFBooleanTrue };
2155
2156         dict = CFDictionaryCreate(NULL, keys, vals, 1, NULL, NULL);
2157         if(!dict) return AVERROR(ENOMEM);
2158     }
2159
2160     *dict_out = dict;
2161     return 0;
2162 }
2163
2164 static int vtenc_send_frame(AVCodecContext *avctx,
2165                             VTEncContext   *vtctx,
2166                             const AVFrame  *frame)
2167 {
2168     CMTime time;
2169     CFDictionaryRef frame_dict;
2170     CVPixelBufferRef cv_img = NULL;
2171     AVFrameSideData *side_data = NULL;
2172     ExtraSEI *sei = NULL;
2173     int status = create_cv_pixel_buffer(avctx, frame, &cv_img);
2174
2175     if (status) return status;
2176
2177     status = create_encoder_dict_h264(frame, &frame_dict);
2178     if (status) {
2179         CFRelease(cv_img);
2180         return status;
2181     }
2182
2183     side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
2184     if (vtctx->a53_cc && side_data && side_data->size) {
2185         sei = av_mallocz(sizeof(*sei));
2186         if (!sei) {
2187             av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2188         } else {
2189             int ret = ff_alloc_a53_sei(frame, 0, &sei->data, &sei->size);
2190             if (ret < 0) {
2191                 av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2192                 av_free(sei);
2193                 sei = NULL;
2194             }
2195         }
2196     }
2197
2198     time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
2199     status = VTCompressionSessionEncodeFrame(
2200         vtctx->session,
2201         cv_img,
2202         time,
2203         kCMTimeInvalid,
2204         frame_dict,
2205         sei,
2206         NULL
2207     );
2208
2209     if (frame_dict) CFRelease(frame_dict);
2210     CFRelease(cv_img);
2211
2212     if (status) {
2213         av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
2214         return AVERROR_EXTERNAL;
2215     }
2216
2217     return 0;
2218 }
2219
2220 static av_cold int vtenc_frame(
2221     AVCodecContext *avctx,
2222     AVPacket       *pkt,
2223     const AVFrame  *frame,
2224     int            *got_packet)
2225 {
2226     VTEncContext *vtctx = avctx->priv_data;
2227     bool get_frame;
2228     int status;
2229     CMSampleBufferRef buf = NULL;
2230     ExtraSEI *sei = NULL;
2231
2232     if (frame) {
2233         status = vtenc_send_frame(avctx, vtctx, frame);
2234
2235         if (status) {
2236             status = AVERROR_EXTERNAL;
2237             goto end_nopkt;
2238         }
2239
2240         if (vtctx->frame_ct_in == 0) {
2241             vtctx->first_pts = frame->pts;
2242         } else if(vtctx->frame_ct_in == 1 && vtctx->has_b_frames) {
2243             vtctx->dts_delta = frame->pts - vtctx->first_pts;
2244         }
2245
2246         vtctx->frame_ct_in++;
2247     } else if(!vtctx->flushing) {
2248         vtctx->flushing = true;
2249
2250         status = VTCompressionSessionCompleteFrames(vtctx->session,
2251                                                     kCMTimeIndefinite);
2252
2253         if (status) {
2254             av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
2255             status = AVERROR_EXTERNAL;
2256             goto end_nopkt;
2257         }
2258     }
2259
2260     *got_packet = 0;
2261     get_frame = vtctx->dts_delta >= 0 || !frame;
2262     if (!get_frame) {
2263         status = 0;
2264         goto end_nopkt;
2265     }
2266
2267     status = vtenc_q_pop(vtctx, !frame, &buf, &sei);
2268     if (status) goto end_nopkt;
2269     if (!buf)   goto end_nopkt;
2270
2271     status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei);
2272     if (sei) {
2273         if (sei->data) av_free(sei->data);
2274         av_free(sei);
2275     }
2276     CFRelease(buf);
2277     if (status) goto end_nopkt;
2278
2279     *got_packet = 1;
2280     return 0;
2281
2282 end_nopkt:
2283     av_packet_unref(pkt);
2284     return status;
2285 }
2286
2287 static int vtenc_populate_extradata(AVCodecContext   *avctx,
2288                                     CMVideoCodecType codec_type,
2289                                     CFStringRef      profile_level,
2290                                     CFNumberRef      gamma_level,
2291                                     CFDictionaryRef  enc_info,
2292                                     CFDictionaryRef  pixel_buffer_info)
2293 {
2294     VTEncContext *vtctx = avctx->priv_data;
2295     AVFrame *frame = av_frame_alloc();
2296     int y_size = avctx->width * avctx->height;
2297     int chroma_size = (avctx->width / 2) * (avctx->height / 2);
2298     CMSampleBufferRef buf = NULL;
2299     int status;
2300
2301     if (!frame)
2302         return AVERROR(ENOMEM);
2303
2304     frame->buf[0] = av_buffer_alloc(y_size + 2 * chroma_size);
2305
2306     if(!frame->buf[0]){
2307         status = AVERROR(ENOMEM);
2308         goto pe_cleanup;
2309     }
2310
2311     status = vtenc_create_encoder(avctx,
2312                                   codec_type,
2313                                   profile_level,
2314                                   gamma_level,
2315                                   enc_info,
2316                                   pixel_buffer_info,
2317                                   &vtctx->session);
2318     if (status)
2319         goto pe_cleanup;
2320
2321     frame->data[0] = frame->buf[0]->data;
2322     memset(frame->data[0],   0,      y_size);
2323
2324     frame->data[1] = frame->buf[0]->data + y_size;
2325     memset(frame->data[1], 128, chroma_size);
2326
2327
2328     if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
2329         frame->data[2] = frame->buf[0]->data + y_size + chroma_size;
2330         memset(frame->data[2], 128, chroma_size);
2331     }
2332
2333     frame->linesize[0] = avctx->width;
2334
2335     if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
2336         frame->linesize[1] =
2337         frame->linesize[2] = (avctx->width + 1) / 2;
2338     } else {
2339         frame->linesize[1] = (avctx->width + 1) / 2;
2340     }
2341
2342     frame->format          = avctx->pix_fmt;
2343     frame->width           = avctx->width;
2344     frame->height          = avctx->height;
2345     av_frame_set_colorspace(frame, avctx->colorspace);
2346     av_frame_set_color_range(frame, avctx->color_range);
2347     frame->color_trc       = avctx->color_trc;
2348     frame->color_primaries = avctx->color_primaries;
2349
2350     frame->pts = 0;
2351     status = vtenc_send_frame(avctx, vtctx, frame);
2352     if (status) {
2353         av_log(avctx, AV_LOG_ERROR, "Error sending frame: %d\n", status);
2354         goto pe_cleanup;
2355     }
2356
2357     //Populates extradata - output frames are flushed and param sets are available.
2358     status = VTCompressionSessionCompleteFrames(vtctx->session,
2359                                                 kCMTimeIndefinite);
2360
2361     if (status)
2362         goto pe_cleanup;
2363
2364     status = vtenc_q_pop(vtctx, 0, &buf, NULL);
2365     if (status) {
2366         av_log(avctx, AV_LOG_ERROR, "popping: %d\n", status);
2367         goto pe_cleanup;
2368     }
2369
2370     CFRelease(buf);
2371
2372
2373
2374 pe_cleanup:
2375     if(vtctx->session)
2376         CFRelease(vtctx->session);
2377
2378     vtctx->session = NULL;
2379     vtctx->frame_ct_out = 0;
2380
2381     av_frame_unref(frame);
2382     av_frame_free(&frame);
2383
2384     av_assert0(status != 0 || (avctx->extradata && avctx->extradata_size > 0));
2385
2386     return status;
2387 }
2388
2389 static av_cold int vtenc_close(AVCodecContext *avctx)
2390 {
2391     VTEncContext *vtctx = avctx->priv_data;
2392
2393     if(!vtctx->session) return 0;
2394
2395     VTCompressionSessionCompleteFrames(vtctx->session,
2396                                        kCMTimeIndefinite);
2397     clear_frame_queue(vtctx);
2398     pthread_cond_destroy(&vtctx->cv_sample_sent);
2399     pthread_mutex_destroy(&vtctx->lock);
2400     CFRelease(vtctx->session);
2401     vtctx->session = NULL;
2402
2403     if (vtctx->color_primaries) {
2404         CFRelease(vtctx->color_primaries);
2405         vtctx->color_primaries = NULL;
2406     }
2407
2408     if (vtctx->transfer_function) {
2409         CFRelease(vtctx->transfer_function);
2410         vtctx->transfer_function = NULL;
2411     }
2412
2413     if (vtctx->ycbcr_matrix) {
2414         CFRelease(vtctx->ycbcr_matrix);
2415         vtctx->ycbcr_matrix = NULL;
2416     }
2417
2418     return 0;
2419 }
2420
2421 static const enum AVPixelFormat pix_fmts[] = {
2422     AV_PIX_FMT_VIDEOTOOLBOX,
2423     AV_PIX_FMT_NV12,
2424     AV_PIX_FMT_YUV420P,
2425     AV_PIX_FMT_NONE
2426 };
2427
2428 #define OFFSET(x) offsetof(VTEncContext, x)
2429 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
2430 static const AVOption options[] = {
2431     { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
2432     { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
2433     { "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN     }, INT_MIN, INT_MAX, VE, "profile" },
2434     { "high",     "High Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH     }, INT_MIN, INT_MAX, VE, "profile" },
2435
2436     { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },
2437     { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },
2438     { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },
2439     { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },
2440     { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },
2441     { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },
2442     { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },
2443     { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },
2444     { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },
2445     { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
2446     { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
2447
2448     { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL,
2449         { .i64 = 0 }, 0, 1, VE },
2450
2451     { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },
2452     { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2453     { "vlc",   "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2454     { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2455     { "ac",    "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2456
2457     { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).",
2458         OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2459
2460     { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.",
2461         OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2462     { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.",
2463         OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2464
2465     { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, VE },
2466
2467     { NULL },
2468 };
2469
2470 static const AVClass h264_videotoolbox_class = {
2471     .class_name = "h264_videotoolbox",
2472     .item_name  = av_default_item_name,
2473     .option     = options,
2474     .version    = LIBAVUTIL_VERSION_INT,
2475 };
2476
2477 AVCodec ff_h264_videotoolbox_encoder = {
2478     .name             = "h264_videotoolbox",
2479     .long_name        = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),
2480     .type             = AVMEDIA_TYPE_VIDEO,
2481     .id               = AV_CODEC_ID_H264,
2482     .priv_data_size   = sizeof(VTEncContext),
2483     .pix_fmts         = pix_fmts,
2484     .init             = vtenc_init,
2485     .encode2          = vtenc_frame,
2486     .close            = vtenc_close,
2487     .capabilities     = AV_CODEC_CAP_DELAY,
2488     .priv_class       = &h264_videotoolbox_class,
2489     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
2490                         FF_CODEC_CAP_INIT_CLEANUP,
2491 };