git.sesse.net Git - ffmpeg/blob - libavcodec/videotoolboxenc.c

   1 /*
   2  * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include <VideoToolbox/VideoToolbox.h>
  22 #include <CoreVideo/CoreVideo.h>
  23 #include <CoreMedia/CoreMedia.h>
  24 #include <TargetConditionals.h>
  25 #include <Availability.h>
  26 #include "avcodec.h"
  27 #include "libavutil/opt.h"
  28 #include "libavutil/avassert.h"
  29 #include "libavutil/atomic.h"
  30 #include "libavutil/avstring.h"
  31 #include "libavcodec/avcodec.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "internal.h"
  34 #include <pthread.h>
  35 #include "h264.h"
  36 #include "h264_sei.h"
  37 #include <dlfcn.h>
  38
  39 //These symbols may not be present
  40 static struct{
  41     CFStringRef kCVImageBufferColorPrimaries_ITU_R_2020;
  42     CFStringRef kCVImageBufferTransferFunction_ITU_R_2020;
  43     CFStringRef kCVImageBufferYCbCrMatrix_ITU_R_2020;
  44
  45     CFStringRef kVTCompressionPropertyKey_H264EntropyMode;
  46     CFStringRef kVTH264EntropyMode_CAVLC;
  47     CFStringRef kVTH264EntropyMode_CABAC;
  48
  49     CFStringRef kVTProfileLevel_H264_Baseline_4_0;
  50     CFStringRef kVTProfileLevel_H264_Baseline_4_2;
  51     CFStringRef kVTProfileLevel_H264_Baseline_5_0;
  52     CFStringRef kVTProfileLevel_H264_Baseline_5_1;
  53     CFStringRef kVTProfileLevel_H264_Baseline_5_2;
  54     CFStringRef kVTProfileLevel_H264_Baseline_AutoLevel;
  55     CFStringRef kVTProfileLevel_H264_Main_4_2;
  56     CFStringRef kVTProfileLevel_H264_Main_5_1;
  57     CFStringRef kVTProfileLevel_H264_Main_5_2;
  58     CFStringRef kVTProfileLevel_H264_Main_AutoLevel;
  59     CFStringRef kVTProfileLevel_H264_High_3_0;
  60     CFStringRef kVTProfileLevel_H264_High_3_1;
  61     CFStringRef kVTProfileLevel_H264_High_3_2;
  62     CFStringRef kVTProfileLevel_H264_High_4_0;
  63     CFStringRef kVTProfileLevel_H264_High_4_1;
  64     CFStringRef kVTProfileLevel_H264_High_4_2;
  65     CFStringRef kVTProfileLevel_H264_High_5_1;
  66     CFStringRef kVTProfileLevel_H264_High_5_2;
  67     CFStringRef kVTProfileLevel_H264_High_AutoLevel;
  68
  69     CFStringRef kVTCompressionPropertyKey_RealTime;
  70
  71     CFStringRef kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder;
  72     CFStringRef kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder;
  73 } compat_keys;
  74
  75 #define GET_SYM(symbol, defaultVal)                                     \
  76 do{                                                                     \
  77     CFStringRef cfstr = *(CFStringRef*)dlsym(RTLD_DEFAULT, #symbol);    \
  78     if(!cfstr)                                                          \
  79         compat_keys.symbol = CFSTR(defaultVal);                         \
  80     else                                                                \
  81         compat_keys.symbol = cfstr;                                     \
  82 }while(0)
  83
  84 static pthread_once_t once_ctrl = PTHREAD_ONCE_INIT;
  85
  86 static void loadVTEncSymbols(){
  87     GET_SYM(kCVImageBufferColorPrimaries_ITU_R_2020,   "ITU_R_2020");
  88     GET_SYM(kCVImageBufferTransferFunction_ITU_R_2020, "ITU_R_2020");
  89     GET_SYM(kCVImageBufferYCbCrMatrix_ITU_R_2020,      "ITU_R_2020");
  90
  91     GET_SYM(kVTCompressionPropertyKey_H264EntropyMode, "H264EntropyMode");
  92     GET_SYM(kVTH264EntropyMode_CAVLC, "CAVLC");
  93     GET_SYM(kVTH264EntropyMode_CABAC, "CABAC");
  94
  95     GET_SYM(kVTProfileLevel_H264_Baseline_4_0,       "H264_Baseline_4_0");
  96     GET_SYM(kVTProfileLevel_H264_Baseline_4_2,       "H264_Baseline_4_2");
  97     GET_SYM(kVTProfileLevel_H264_Baseline_5_0,       "H264_Baseline_5_0");
  98     GET_SYM(kVTProfileLevel_H264_Baseline_5_1,       "H264_Baseline_5_1");
  99     GET_SYM(kVTProfileLevel_H264_Baseline_5_2,       "H264_Baseline_5_2");
 100     GET_SYM(kVTProfileLevel_H264_Baseline_AutoLevel, "H264_Baseline_AutoLevel");
 101     GET_SYM(kVTProfileLevel_H264_Main_4_2,           "H264_Main_4_2");
 102     GET_SYM(kVTProfileLevel_H264_Main_5_1,           "H264_Main_5_1");
 103     GET_SYM(kVTProfileLevel_H264_Main_5_2,           "H264_Main_5_2");
 104     GET_SYM(kVTProfileLevel_H264_Main_AutoLevel,     "H264_Main_AutoLevel");
 105     GET_SYM(kVTProfileLevel_H264_High_3_0,           "H264_High_3_0");
 106     GET_SYM(kVTProfileLevel_H264_High_3_1,           "H264_High_3_1");
 107     GET_SYM(kVTProfileLevel_H264_High_3_2,           "H264_High_3_2");
 108     GET_SYM(kVTProfileLevel_H264_High_4_0,           "H264_High_4_0");
 109     GET_SYM(kVTProfileLevel_H264_High_4_1,           "H264_High_4_1");
 110     GET_SYM(kVTProfileLevel_H264_High_4_2,           "H264_High_4_2");
 111     GET_SYM(kVTProfileLevel_H264_High_5_1,           "H264_High_5_1");
 112     GET_SYM(kVTProfileLevel_H264_High_5_2,           "H264_High_5_2");
 113     GET_SYM(kVTProfileLevel_H264_High_AutoLevel,     "H264_High_AutoLevel");
 114
 115     GET_SYM(kVTCompressionPropertyKey_RealTime, "RealTime");
 116
 117     GET_SYM(kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
 118             "EnableHardwareAcceleratedVideoEncoder");
 119     GET_SYM(kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
 120             "RequireHardwareAcceleratedVideoEncoder");
 121 }
 122
 123 typedef enum VT_H264Profile {
 124     H264_PROF_AUTO,
 125     H264_PROF_BASELINE,
 126     H264_PROF_MAIN,
 127     H264_PROF_HIGH,
 128     H264_PROF_COUNT
 129 } VT_H264Profile;
 130
 131 typedef enum VTH264Entropy{
 132     VT_ENTROPY_NOT_SET,
 133     VT_CAVLC,
 134     VT_CABAC
 135 } VTH264Entropy;
 136
 137 static const uint8_t start_code[] = { 0, 0, 0, 1 };
 138
 139 typedef struct ExtraSEI {
 140   void *data;
 141   size_t size;
 142 } ExtraSEI;
 143
 144 typedef struct BufNode {
 145     CMSampleBufferRef cm_buffer;
 146     ExtraSEI *sei;
 147     struct BufNode* next;
 148     int error;
 149 } BufNode;
 150
 151 typedef struct VTEncContext {
 152     AVClass *class;
 153     VTCompressionSessionRef session;
 154     CFStringRef ycbcr_matrix;
 155     CFStringRef color_primaries;
 156     CFStringRef transfer_function;
 157
 158     pthread_mutex_t lock;
 159     pthread_cond_t  cv_sample_sent;
 160
 161     int async_error;
 162
 163     BufNode *q_head;
 164     BufNode *q_tail;
 165
 166     int64_t frame_ct_out;
 167     int64_t frame_ct_in;
 168
 169     int64_t first_pts;
 170     int64_t dts_delta;
 171
 172     int64_t profile;
 173     int64_t level;
 174     int64_t entropy;
 175     int64_t realtime;
 176     int64_t frames_before;
 177     int64_t frames_after;
 178
 179     int64_t allow_sw;
 180
 181     bool flushing;
 182     bool has_b_frames;
 183     bool warned_color_range;
 184     bool a53_cc;
 185 } VTEncContext;
 186
 187 static int vtenc_populate_extradata(AVCodecContext   *avctx,
 188                                     CMVideoCodecType codec_type,
 189                                     CFStringRef      profile_level,
 190                                     CFNumberRef      gamma_level,
 191                                     CFDictionaryRef  enc_info,
 192                                     CFDictionaryRef  pixel_buffer_info);
 193
 194 /**
 195  * NULL-safe release of *refPtr, and sets value to NULL.
 196  */
 197 static void vt_release_num(CFNumberRef* refPtr){
 198     if (!*refPtr) {
 199         return;
 200     }
 201
 202     CFRelease(*refPtr);
 203     *refPtr = NULL;
 204 }
 205
 206 static void set_async_error(VTEncContext *vtctx, int err)
 207 {
 208     BufNode *info;
 209
 210     pthread_mutex_lock(&vtctx->lock);
 211
 212     vtctx->async_error = err;
 213
 214     info = vtctx->q_head;
 215     vtctx->q_head = vtctx->q_tail = NULL;
 216
 217     while (info) {
 218         BufNode *next = info->next;
 219         CFRelease(info->cm_buffer);
 220         av_free(info);
 221         info = next;
 222     }
 223
 224     pthread_mutex_unlock(&vtctx->lock);
 225 }
 226
 227 static void clear_frame_queue(VTEncContext *vtctx)
 228 {
 229     set_async_error(vtctx, 0);
 230 }
 231
 232 static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf, ExtraSEI **sei)
 233 {
 234     BufNode *info;
 235
 236     pthread_mutex_lock(&vtctx->lock);
 237
 238     if (vtctx->async_error) {
 239         pthread_mutex_unlock(&vtctx->lock);
 240         return vtctx->async_error;
 241     }
 242
 243     if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
 244         *buf = NULL;
 245
 246         pthread_mutex_unlock(&vtctx->lock);
 247         return 0;
 248     }
 249
 250     while (!vtctx->q_head && !vtctx->async_error && wait) {
 251         pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
 252     }
 253
 254     if (!vtctx->q_head) {
 255         pthread_mutex_unlock(&vtctx->lock);
 256         *buf = NULL;
 257         return 0;
 258     }
 259
 260     info = vtctx->q_head;
 261     vtctx->q_head = vtctx->q_head->next;
 262     if (!vtctx->q_head) {
 263         vtctx->q_tail = NULL;
 264     }
 265
 266     pthread_mutex_unlock(&vtctx->lock);
 267
 268     *buf = info->cm_buffer;
 269     if (sei && *buf) {
 270         *sei = info->sei;
 271     } else if (info->sei) {
 272         if (info->sei->data) av_free(info->sei->data);
 273         av_free(info->sei);
 274     }
 275     av_free(info);
 276
 277     vtctx->frame_ct_out++;
 278
 279     return 0;
 280 }
 281
 282 static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer, ExtraSEI *sei)
 283 {
 284     BufNode *info = av_malloc(sizeof(BufNode));
 285     if (!info) {
 286         set_async_error(vtctx, AVERROR(ENOMEM));
 287         return;
 288     }
 289
 290     CFRetain(buffer);
 291     info->cm_buffer = buffer;
 292     info->sei = sei;
 293     info->next = NULL;
 294
 295     pthread_mutex_lock(&vtctx->lock);
 296     pthread_cond_signal(&vtctx->cv_sample_sent);
 297
 298     if (!vtctx->q_head) {
 299         vtctx->q_head = info;
 300     } else {
 301         vtctx->q_tail->next = info;
 302     }
 303
 304     vtctx->q_tail = info;
 305
 306     pthread_mutex_unlock(&vtctx->lock);
 307 }
 308
 309 static int count_nalus(size_t length_code_size,
 310                        CMSampleBufferRef sample_buffer,
 311                        int *count)
 312 {
 313     size_t offset = 0;
 314     int status;
 315     int nalu_ct = 0;
 316     uint8_t size_buf[4];
 317     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
 318     CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
 319
 320     if (length_code_size > 4)
 321         return AVERROR_INVALIDDATA;
 322
 323     while (offset < src_size) {
 324         size_t curr_src_len;
 325         size_t box_len = 0;
 326         size_t i;
 327
 328         status = CMBlockBufferCopyDataBytes(block,
 329                                             offset,
 330                                             length_code_size,
 331                                             size_buf);
 332
 333         for (i = 0; i < length_code_size; i++) {
 334             box_len <<= 8;
 335             box_len |= size_buf[i];
 336         }
 337
 338         curr_src_len = box_len + length_code_size;
 339         offset += curr_src_len;
 340
 341         nalu_ct++;
 342     }
 343
 344     *count = nalu_ct;
 345     return 0;
 346 }
 347
 348 static CMVideoCodecType get_cm_codec_type(enum AVCodecID id)
 349 {
 350     switch (id) {
 351     case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
 352     default:               return 0;
 353     }
 354 }
 355
 356 /**
 357  * Get the parameter sets from a CMSampleBufferRef.
 358  * @param dst If *dst isn't NULL, the parameters are copied into existing
 359  *            memory. *dst_size must be set accordingly when *dst != NULL.
 360  *            If *dst is NULL, it will be allocated.
 361  *            In all cases, *dst_size is set to the number of bytes used starting
 362  *            at *dst.
 363  */
 364 static int get_params_size(
 365     AVCodecContext              *avctx,
 366     CMVideoFormatDescriptionRef vid_fmt,
 367     size_t                      *size)
 368 {
 369     size_t total_size = 0;
 370     size_t ps_count;
 371     int is_count_bad = 0;
 372     size_t i;
 373     int status;
 374     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 375                                                                 0,
 376                                                                 NULL,
 377                                                                 NULL,
 378                                                                 &ps_count,
 379                                                                 NULL);
 380     if (status) {
 381         is_count_bad = 1;
 382         ps_count     = 0;
 383         status       = 0;
 384     }
 385
 386     for (i = 0; i < ps_count || is_count_bad; i++) {
 387         const uint8_t *ps;
 388         size_t ps_size;
 389         status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 390                                                                     i,
 391                                                                     &ps,
 392                                                                     &ps_size,
 393                                                                     NULL,
 394                                                                     NULL);
 395         if (status) {
 396             /*
 397              * When ps_count is invalid, status != 0 ends the loop normally
 398              * unless we didn't get any parameter sets.
 399              */
 400             if (i > 0 && is_count_bad) status = 0;
 401
 402             break;
 403         }
 404
 405         total_size += ps_size + sizeof(start_code);
 406     }
 407
 408     if (status) {
 409         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
 410         return AVERROR_EXTERNAL;
 411     }
 412
 413     *size = total_size;
 414     return 0;
 415 }
 416
 417 static int copy_param_sets(
 418     AVCodecContext              *avctx,
 419     CMVideoFormatDescriptionRef vid_fmt,
 420     uint8_t                     *dst,
 421     size_t                      dst_size)
 422 {
 423     size_t ps_count;
 424     int is_count_bad = 0;
 425     int status;
 426     size_t offset = 0;
 427     size_t i;
 428
 429     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 430                                                                 0,
 431                                                                 NULL,
 432                                                                 NULL,
 433                                                                 &ps_count,
 434                                                                 NULL);
 435     if (status) {
 436         is_count_bad = 1;
 437         ps_count     = 0;
 438         status       = 0;
 439     }
 440
 441
 442     for (i = 0; i < ps_count || is_count_bad; i++) {
 443         const uint8_t *ps;
 444         size_t ps_size;
 445         size_t next_offset;
 446
 447         status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 448                                                                     i,
 449                                                                     &ps,
 450                                                                     &ps_size,
 451                                                                     NULL,
 452                                                                     NULL);
 453         if (status) {
 454             if (i > 0 && is_count_bad) status = 0;
 455
 456             break;
 457         }
 458
 459         next_offset = offset + sizeof(start_code) + ps_size;
 460         if (dst_size < next_offset) {
 461             av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
 462             return AVERROR_BUFFER_TOO_SMALL;
 463         }
 464
 465         memcpy(dst + offset, start_code, sizeof(start_code));
 466         offset += sizeof(start_code);
 467
 468         memcpy(dst + offset, ps, ps_size);
 469         offset = next_offset;
 470     }
 471
 472     if (status) {
 473         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
 474         return AVERROR_EXTERNAL;
 475     }
 476
 477     return 0;
 478 }
 479
 480 static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
 481 {
 482     CMVideoFormatDescriptionRef vid_fmt;
 483     size_t total_size;
 484     int status;
 485
 486     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
 487     if (!vid_fmt) {
 488         av_log(avctx, AV_LOG_ERROR, "No video format.\n");
 489         return AVERROR_EXTERNAL;
 490     }
 491
 492     status = get_params_size(avctx, vid_fmt, &total_size);
 493     if (status) {
 494         av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
 495         return status;
 496     }
 497
 498     avctx->extradata = av_mallocz(total_size + AV_INPUT_BUFFER_PADDING_SIZE);
 499     if (!avctx->extradata) {
 500         return AVERROR(ENOMEM);
 501     }
 502     avctx->extradata_size = total_size;
 503
 504     status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
 505
 506     if (status) {
 507         av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
 508         return status;
 509     }
 510
 511     return 0;
 512 }
 513
 514 static void vtenc_output_callback(
 515     void *ctx,
 516     void *sourceFrameCtx,
 517     OSStatus status,
 518     VTEncodeInfoFlags flags,
 519     CMSampleBufferRef sample_buffer)
 520 {
 521     AVCodecContext *avctx = ctx;
 522     VTEncContext   *vtctx = avctx->priv_data;
 523     ExtraSEI *sei = sourceFrameCtx;
 524
 525     if (vtctx->async_error) {
 526         if(sample_buffer) CFRelease(sample_buffer);
 527         return;
 528     }
 529
 530     if (status || !sample_buffer) {
 531         av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
 532         set_async_error(vtctx, AVERROR_EXTERNAL);
 533         return;
 534     }
 535
 536     if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
 537         int set_status = set_extradata(avctx, sample_buffer);
 538         if (set_status) {
 539             set_async_error(vtctx, set_status);
 540             return;
 541         }
 542     }
 543
 544     vtenc_q_push(vtctx, sample_buffer, sei);
 545 }
 546
 547 static int get_length_code_size(
 548     AVCodecContext    *avctx,
 549     CMSampleBufferRef sample_buffer,
 550     size_t            *size)
 551 {
 552     CMVideoFormatDescriptionRef vid_fmt;
 553     int isize;
 554     int status;
 555
 556     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
 557     if (!vid_fmt) {
 558         av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
 559         return AVERROR_EXTERNAL;
 560     }
 561
 562     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 563                                                                 0,
 564                                                                 NULL,
 565                                                                 NULL,
 566                                                                 NULL,
 567                                                                 &isize);
 568     if (status) {
 569         av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
 570         return AVERROR_EXTERNAL;
 571     }
 572
 573     *size = isize;
 574     return 0;
 575 }
 576
 577 /*
 578  * Returns true on success.
 579  *
 580  * If profile_level_val is NULL and this method returns true, don't specify the
 581  * profile/level to the encoder.
 582  */
 583 static bool get_vt_profile_level(AVCodecContext *avctx,
 584                                  CFStringRef    *profile_level_val)
 585 {
 586     VTEncContext *vtctx = avctx->priv_data;
 587     int64_t profile = vtctx->profile;
 588
 589     if (profile == H264_PROF_AUTO && vtctx->level) {
 590         //Need to pick a profile if level is not auto-selected.
 591         profile = vtctx->has_b_frames ? H264_PROF_MAIN : H264_PROF_BASELINE;
 592     }
 593
 594     *profile_level_val = NULL;
 595
 596     switch (profile) {
 597         case H264_PROF_AUTO:
 598             return true;
 599
 600         case H264_PROF_BASELINE:
 601             switch (vtctx->level) {
 602                 case  0: *profile_level_val =
 603                                   compat_keys.kVTProfileLevel_H264_Baseline_AutoLevel; break;
 604                 case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3;       break;
 605                 case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0;       break;
 606                 case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1;       break;
 607                 case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2;       break;
 608                 case 40: *profile_level_val =
 609                                   compat_keys.kVTProfileLevel_H264_Baseline_4_0;       break;
 610                 case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1;       break;
 611                 case 42: *profile_level_val =
 612                                   compat_keys.kVTProfileLevel_H264_Baseline_4_2;       break;
 613                 case 50: *profile_level_val =
 614                                   compat_keys.kVTProfileLevel_H264_Baseline_5_0;       break;
 615                 case 51: *profile_level_val =
 616                                   compat_keys.kVTProfileLevel_H264_Baseline_5_1;       break;
 617                 case 52: *profile_level_val =
 618                                   compat_keys.kVTProfileLevel_H264_Baseline_5_2;       break;
 619             }
 620             break;
 621
 622         case H264_PROF_MAIN:
 623             switch (vtctx->level) {
 624                 case  0: *profile_level_val =
 625                                   compat_keys.kVTProfileLevel_H264_Main_AutoLevel; break;
 626                 case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0;       break;
 627                 case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1;       break;
 628                 case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2;       break;
 629                 case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0;       break;
 630                 case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1;       break;
 631                 case 42: *profile_level_val =
 632                                   compat_keys.kVTProfileLevel_H264_Main_4_2;       break;
 633                 case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0;       break;
 634                 case 51: *profile_level_val =
 635                                   compat_keys.kVTProfileLevel_H264_Main_5_1;       break;
 636                 case 52: *profile_level_val =
 637                                   compat_keys.kVTProfileLevel_H264_Main_5_2;       break;
 638             }
 639             break;
 640
 641         case H264_PROF_HIGH:
 642             switch (vtctx->level) {
 643                 case  0: *profile_level_val =
 644                                   compat_keys.kVTProfileLevel_H264_High_AutoLevel; break;
 645                 case 30: *profile_level_val =
 646                                   compat_keys.kVTProfileLevel_H264_High_3_0;       break;
 647                 case 31: *profile_level_val =
 648                                   compat_keys.kVTProfileLevel_H264_High_3_1;       break;
 649                 case 32: *profile_level_val =
 650                                   compat_keys.kVTProfileLevel_H264_High_3_2;       break;
 651                 case 40: *profile_level_val =
 652                                   compat_keys.kVTProfileLevel_H264_High_4_0;       break;
 653                 case 41: *profile_level_val =
 654                                   compat_keys.kVTProfileLevel_H264_High_4_1;       break;
 655                 case 42: *profile_level_val =
 656                                   compat_keys.kVTProfileLevel_H264_High_4_2;       break;
 657                 case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0;       break;
 658                 case 51: *profile_level_val =
 659                                   compat_keys.kVTProfileLevel_H264_High_5_1;       break;
 660                 case 52: *profile_level_val =
 661                                   compat_keys.kVTProfileLevel_H264_High_5_2;       break;
 662             }
 663             break;
 664     }
 665
 666     if (!*profile_level_val) {
 667         av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
 668         return false;
 669     }
 670
 671     return true;
 672 }
 673
 674 static int get_cv_pixel_format(AVCodecContext* avctx,
 675                                enum AVPixelFormat fmt,
 676                                enum AVColorRange range,
 677                                int* av_pixel_format,
 678                                int* range_guessed)
 679 {
 680     if (range_guessed) *range_guessed = range != AVCOL_RANGE_MPEG &&
 681                                         range != AVCOL_RANGE_JPEG;
 682
 683     //MPEG range is used when no range is set
 684     if (fmt == AV_PIX_FMT_NV12) {
 685         *av_pixel_format = range == AVCOL_RANGE_JPEG ?
 686                                         kCVPixelFormatType_420YpCbCr8BiPlanarFullRange :
 687                                         kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
 688     } else if (fmt == AV_PIX_FMT_YUV420P) {
 689         *av_pixel_format = range == AVCOL_RANGE_JPEG ?
 690                                         kCVPixelFormatType_420YpCbCr8PlanarFullRange :
 691                                         kCVPixelFormatType_420YpCbCr8Planar;
 692     } else {
 693         return AVERROR(EINVAL);
 694     }
 695
 696     return 0;
 697 }
 698
 699 static void add_color_attr(AVCodecContext *avctx, CFMutableDictionaryRef dict) {
 700     VTEncContext *vtctx = avctx->priv_data;
 701
 702     if (vtctx->color_primaries) {
 703         CFDictionarySetValue(dict,
 704                              kCVImageBufferColorPrimariesKey,
 705                              vtctx->color_primaries);
 706     }
 707
 708     if (vtctx->transfer_function) {
 709         CFDictionarySetValue(dict,
 710                              kCVImageBufferTransferFunctionKey,
 711                              vtctx->transfer_function);
 712     }
 713
 714     if (vtctx->ycbcr_matrix) {
 715         CFDictionarySetValue(dict,
 716                              kCVImageBufferYCbCrMatrixKey,
 717                              vtctx->ycbcr_matrix);
 718     }
 719 }
 720
 721 static int create_cv_pixel_buffer_info(AVCodecContext* avctx,
 722                                        CFMutableDictionaryRef* dict)
 723 {
 724     CFNumberRef cv_color_format_num = NULL;
 725     CFNumberRef width_num = NULL;
 726     CFNumberRef height_num = NULL;
 727     CFMutableDictionaryRef pixel_buffer_info = NULL;
 728     int cv_color_format;
 729     int status = get_cv_pixel_format(avctx,
 730                                      avctx->pix_fmt,
 731                                      avctx->color_range,
 732                                      &cv_color_format,
 733                                      NULL);
 734     if (status) return status;
 735
 736     pixel_buffer_info = CFDictionaryCreateMutable(
 737                             kCFAllocatorDefault,
 738                             20,
 739                             &kCFCopyStringDictionaryKeyCallBacks,
 740                             &kCFTypeDictionaryValueCallBacks);
 741
 742     if (!pixel_buffer_info) goto pbinfo_nomem;
 743
 744     cv_color_format_num = CFNumberCreate(kCFAllocatorDefault,
 745                                          kCFNumberSInt32Type,
 746                                          &cv_color_format);
 747     if (!cv_color_format_num) goto pbinfo_nomem;
 748
 749     CFDictionarySetValue(pixel_buffer_info,
 750                          kCVPixelBufferPixelFormatTypeKey,
 751                          cv_color_format_num);
 752     vt_release_num(&cv_color_format_num);
 753
 754     width_num = CFNumberCreate(kCFAllocatorDefault,
 755                                kCFNumberSInt32Type,
 756                                &avctx->width);
 757     if (!width_num) return AVERROR(ENOMEM);
 758
 759     CFDictionarySetValue(pixel_buffer_info,
 760                          kCVPixelBufferWidthKey,
 761                          width_num);
 762     vt_release_num(&width_num);
 763
 764     height_num = CFNumberCreate(kCFAllocatorDefault,
 765                                 kCFNumberSInt32Type,
 766                                 &avctx->height);
 767     if (!height_num) goto pbinfo_nomem;
 768
 769     CFDictionarySetValue(pixel_buffer_info,
 770                          kCVPixelBufferHeightKey,
 771                          height_num);
 772     vt_release_num(&height_num);
 773
 774     add_color_attr(avctx, pixel_buffer_info);
 775
 776     *dict = pixel_buffer_info;
 777     return 0;
 778
 779 pbinfo_nomem:
 780     vt_release_num(&cv_color_format_num);
 781     vt_release_num(&width_num);
 782     vt_release_num(&height_num);
 783     if (pixel_buffer_info) CFRelease(pixel_buffer_info);
 784
 785     return AVERROR(ENOMEM);
 786 }
 787
 788 static int get_cv_color_primaries(AVCodecContext *avctx,
 789                                   CFStringRef *primaries)
 790 {
 791     enum AVColorPrimaries pri = avctx->color_primaries;
 792     switch (pri) {
 793         case AVCOL_PRI_UNSPECIFIED:
 794             *primaries = NULL;
 795             break;
 796
 797         case AVCOL_PRI_BT709:
 798             *primaries = kCVImageBufferColorPrimaries_ITU_R_709_2;
 799             break;
 800
 801         case AVCOL_PRI_BT2020:
 802             *primaries = compat_keys.kCVImageBufferColorPrimaries_ITU_R_2020;
 803             break;
 804
 805         default:
 806             av_log(avctx, AV_LOG_ERROR, "Color primaries %s is not supported.\n", av_color_primaries_name(pri));
 807             *primaries = NULL;
 808             return -1;
 809     }
 810
 811     return 0;
 812 }
 813
 814 static int get_cv_transfer_function(AVCodecContext *avctx,
 815                                     CFStringRef *transfer_fnc,
 816                                     CFNumberRef *gamma_level)
 817 {
 818     enum AVColorTransferCharacteristic trc = avctx->color_trc;
 819     Float32 gamma;
 820     *gamma_level = NULL;
 821
 822     switch (trc) {
 823         case AVCOL_TRC_UNSPECIFIED:
 824             *transfer_fnc = NULL;
 825             break;
 826
 827         case AVCOL_TRC_BT709:
 828             *transfer_fnc = kCVImageBufferTransferFunction_ITU_R_709_2;
 829             break;
 830
 831         case AVCOL_TRC_SMPTE240M:
 832             *transfer_fnc = kCVImageBufferTransferFunction_SMPTE_240M_1995;
 833             break;
 834
 835         case AVCOL_TRC_GAMMA22:
 836             gamma = 2.2;
 837             *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
 838             *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
 839             break;
 840
 841         case AVCOL_TRC_GAMMA28:
 842             gamma = 2.8;
 843             *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
 844             *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
 845             break;
 846
 847         case AVCOL_TRC_BT2020_10:
 848         case AVCOL_TRC_BT2020_12:
 849             *transfer_fnc = compat_keys.kCVImageBufferTransferFunction_ITU_R_2020;
 850             break;
 851
 852         default:
 853             av_log(avctx, AV_LOG_ERROR, "Transfer function %s is not supported.\n", av_color_transfer_name(trc));
 854             return -1;
 855     }
 856
 857     return 0;
 858 }
 859
 860 static int get_cv_ycbcr_matrix(AVCodecContext *avctx, CFStringRef *matrix) {
 861     switch(avctx->colorspace) {
 862         case AVCOL_SPC_BT709:
 863             *matrix = kCVImageBufferYCbCrMatrix_ITU_R_709_2;
 864             break;
 865
 866         case AVCOL_SPC_UNSPECIFIED:
 867             *matrix = NULL;
 868             break;
 869
 870         case AVCOL_SPC_BT470BG:
 871         case AVCOL_SPC_SMPTE170M:
 872             *matrix = kCVImageBufferYCbCrMatrix_ITU_R_601_4;
 873             break;
 874
 875         case AVCOL_SPC_SMPTE240M:
 876             *matrix = kCVImageBufferYCbCrMatrix_SMPTE_240M_1995;
 877             break;
 878
 879         case AVCOL_SPC_BT2020_NCL:
 880             *matrix = compat_keys.kCVImageBufferYCbCrMatrix_ITU_R_2020;
 881             break;
 882
 883         default:
 884             av_log(avctx, AV_LOG_ERROR, "Color space %s is not supported.\n", av_color_space_name(avctx->colorspace));
 885             return -1;
 886     }
 887
 888     return 0;
 889 }
 890
 891 static int vtenc_create_encoder(AVCodecContext   *avctx,
 892                                 CMVideoCodecType codec_type,
 893                                 CFStringRef      profile_level,
 894                                 CFNumberRef      gamma_level,
 895                                 CFDictionaryRef  enc_info,
 896                                 CFDictionaryRef  pixel_buffer_info,
 897                                 VTCompressionSessionRef *session)
 898 {
 899     VTEncContext *vtctx = avctx->priv_data;
 900     SInt32       bit_rate = avctx->bit_rate;
 901     CFNumberRef  bit_rate_num;
 902
 903     int status = VTCompressionSessionCreate(kCFAllocatorDefault,
 904                                             avctx->width,
 905                                             avctx->height,
 906                                             codec_type,
 907                                             enc_info,
 908                                             pixel_buffer_info,
 909                                             kCFAllocatorDefault,
 910                                             vtenc_output_callback,
 911                                             avctx,
 912                                             session);
 913
 914     if (status || !vtctx->session) {
 915         av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
 916
 917 #if !TARGET_OS_IPHONE
 918         if (!vtctx->allow_sw) {
 919             av_log(avctx, AV_LOG_ERROR, "Try -allow_sw 1. The hardware encoder may be busy, or not supported.\n");
 920         }
 921 #endif
 922
 923         return AVERROR_EXTERNAL;
 924     }
 925
 926     bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
 927                                   kCFNumberSInt32Type,
 928                                   &bit_rate);
 929     if (!bit_rate_num) return AVERROR(ENOMEM);
 930
 931     status = VTSessionSetProperty(vtctx->session,
 932                                   kVTCompressionPropertyKey_AverageBitRate,
 933                                   bit_rate_num);
 934     CFRelease(bit_rate_num);
 935
 936     if (status) {
 937         av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
 938         return AVERROR_EXTERNAL;
 939     }
 940
 941     if (profile_level) {
 942         status = VTSessionSetProperty(vtctx->session,
 943                                       kVTCompressionPropertyKey_ProfileLevel,
 944                                       profile_level);
 945         if (status) {
 946             av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
 947         }
 948     }
 949
 950     if (avctx->gop_size > 0) {
 951         CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
 952                                               kCFNumberIntType,
 953                                               &avctx->gop_size);
 954         if (!interval) {
 955             return AVERROR(ENOMEM);
 956         }
 957
 958         status = VTSessionSetProperty(vtctx->session,
 959                                       kVTCompressionPropertyKey_MaxKeyFrameInterval,
 960                                       interval);
 961         CFRelease(interval);
 962
 963         if (status) {
 964             av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
 965             return AVERROR_EXTERNAL;
 966         }
 967     }
 968
 969     if (vtctx->frames_before) {
 970         status = VTSessionSetProperty(vtctx->session,
 971                                       kVTCompressionPropertyKey_MoreFramesBeforeStart,
 972                                       kCFBooleanTrue);
 973
 974         if (status == kVTPropertyNotSupportedErr) {
 975             av_log(avctx, AV_LOG_WARNING, "frames_before property is not supported on this device. Ignoring.\n");
 976         } else if (status) {
 977             av_log(avctx, AV_LOG_ERROR, "Error setting frames_before property: %d\n", status);
 978         }
 979     }
 980
 981     if (vtctx->frames_after) {
 982         status = VTSessionSetProperty(vtctx->session,
 983                                       kVTCompressionPropertyKey_MoreFramesAfterEnd,
 984                                       kCFBooleanTrue);
 985
 986         if (status == kVTPropertyNotSupportedErr) {
 987             av_log(avctx, AV_LOG_WARNING, "frames_after property is not supported on this device. Ignoring.\n");
 988         } else if (status) {
 989             av_log(avctx, AV_LOG_ERROR, "Error setting frames_after property: %d\n", status);
 990         }
 991     }
 992
 993     if (avctx->sample_aspect_ratio.num != 0) {
 994         CFNumberRef num;
 995         CFNumberRef den;
 996         CFMutableDictionaryRef par;
 997         AVRational *avpar = &avctx->sample_aspect_ratio;
 998
 999         av_reduce(&avpar->num, &avpar->den,
1000                    avpar->num,  avpar->den,
1001                   0xFFFFFFFF);
1002
1003         num = CFNumberCreate(kCFAllocatorDefault,
1004                              kCFNumberIntType,
1005                              &avpar->num);
1006
1007         den = CFNumberCreate(kCFAllocatorDefault,
1008                              kCFNumberIntType,
1009                              &avpar->den);
1010
1011
1012
1013         par = CFDictionaryCreateMutable(kCFAllocatorDefault,
1014                                         2,
1015                                         &kCFCopyStringDictionaryKeyCallBacks,
1016                                         &kCFTypeDictionaryValueCallBacks);
1017
1018         if (!par || !num || !den) {
1019             if (par) CFRelease(par);
1020             if (num) CFRelease(num);
1021             if (den) CFRelease(den);
1022
1023             return AVERROR(ENOMEM);
1024         }
1025
1026         CFDictionarySetValue(
1027             par,
1028             kCMFormatDescriptionKey_PixelAspectRatioHorizontalSpacing,
1029             num);
1030
1031         CFDictionarySetValue(
1032             par,
1033             kCMFormatDescriptionKey_PixelAspectRatioVerticalSpacing,
1034             den);
1035
1036         status = VTSessionSetProperty(vtctx->session,
1037                                       kVTCompressionPropertyKey_PixelAspectRatio,
1038                                       par);
1039
1040         CFRelease(par);
1041         CFRelease(num);
1042         CFRelease(den);
1043
1044         if (status) {
1045             av_log(avctx,
1046                    AV_LOG_ERROR,
1047                    "Error setting pixel aspect ratio to %d:%d: %d.\n",
1048                    avctx->sample_aspect_ratio.num,
1049                    avctx->sample_aspect_ratio.den,
1050                    status);
1051
1052             return AVERROR_EXTERNAL;
1053         }
1054     }
1055
1056
1057     if (vtctx->transfer_function) {
1058         status = VTSessionSetProperty(vtctx->session,
1059                                       kVTCompressionPropertyKey_TransferFunction,
1060                                       vtctx->transfer_function);
1061
1062         if (status) {
1063             av_log(avctx, AV_LOG_WARNING, "Could not set transfer function: %d\n", status);
1064         }
1065     }
1066
1067
1068     if (vtctx->ycbcr_matrix) {
1069         status = VTSessionSetProperty(vtctx->session,
1070                                       kVTCompressionPropertyKey_YCbCrMatrix,
1071                                       vtctx->ycbcr_matrix);
1072
1073         if (status) {
1074             av_log(avctx, AV_LOG_WARNING, "Could not set ycbcr matrix: %d\n", status);
1075         }
1076     }
1077
1078
1079     if (vtctx->color_primaries) {
1080         status = VTSessionSetProperty(vtctx->session,
1081                                       kVTCompressionPropertyKey_ColorPrimaries,
1082                                       vtctx->color_primaries);
1083
1084         if (status) {
1085             av_log(avctx, AV_LOG_WARNING, "Could not set color primaries: %d\n", status);
1086         }
1087     }
1088
1089     if (gamma_level) {
1090         status = VTSessionSetProperty(vtctx->session,
1091                                       kCVImageBufferGammaLevelKey,
1092                                       gamma_level);
1093
1094         if (status) {
1095             av_log(avctx, AV_LOG_WARNING, "Could not set gamma level: %d\n", status);
1096         }
1097     }
1098
1099     if (!vtctx->has_b_frames) {
1100         status = VTSessionSetProperty(vtctx->session,
1101                                       kVTCompressionPropertyKey_AllowFrameReordering,
1102                                       kCFBooleanFalse);
1103
1104         if (status) {
1105             av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
1106             return AVERROR_EXTERNAL;
1107         }
1108     }
1109
1110     if (vtctx->entropy != VT_ENTROPY_NOT_SET) {
1111         CFStringRef entropy = vtctx->entropy == VT_CABAC ?
1112                                 compat_keys.kVTH264EntropyMode_CABAC:
1113                                 compat_keys.kVTH264EntropyMode_CAVLC;
1114
1115         status = VTSessionSetProperty(vtctx->session,
1116                                       compat_keys.kVTCompressionPropertyKey_H264EntropyMode,
1117                                       entropy);
1118
1119         if (status) {
1120             av_log(avctx, AV_LOG_ERROR, "Error setting entropy property: %d\n", status);
1121         }
1122     }
1123
1124     if (vtctx->realtime) {
1125         status = VTSessionSetProperty(vtctx->session,
1126                                       compat_keys.kVTCompressionPropertyKey_RealTime,
1127                                       kCFBooleanTrue);
1128
1129         if (status) {
1130             av_log(avctx, AV_LOG_ERROR, "Error setting realtime property: %d\n", status);
1131         }
1132     }
1133
1134     status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
1135     if (status) {
1136         av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
1137         return AVERROR_EXTERNAL;
1138     }
1139
1140     return 0;
1141 }
1142
1143 static av_cold int vtenc_init(AVCodecContext *avctx)
1144 {
1145     CFMutableDictionaryRef enc_info;
1146     CFMutableDictionaryRef pixel_buffer_info;
1147     CMVideoCodecType       codec_type;
1148     VTEncContext           *vtctx = avctx->priv_data;
1149     CFStringRef            profile_level;
1150     CFBooleanRef           has_b_frames_cfbool;
1151     CFNumberRef            gamma_level = NULL;
1152     int                    status;
1153
1154     pthread_once(&once_ctrl, loadVTEncSymbols);
1155
1156     codec_type = get_cm_codec_type(avctx->codec_id);
1157     if (!codec_type) {
1158         av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
1159         return AVERROR(EINVAL);
1160     }
1161
1162     vtctx->has_b_frames = avctx->max_b_frames > 0;
1163     if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
1164         av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
1165         vtctx->has_b_frames = false;
1166     }
1167
1168     if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
1169         av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
1170         vtctx->entropy = VT_ENTROPY_NOT_SET;
1171     }
1172
1173     if (!get_vt_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
1174
1175     vtctx->session = NULL;
1176
1177     enc_info = CFDictionaryCreateMutable(
1178         kCFAllocatorDefault,
1179         20,
1180         &kCFCopyStringDictionaryKeyCallBacks,
1181         &kCFTypeDictionaryValueCallBacks
1182     );
1183
1184     if (!enc_info) return AVERROR(ENOMEM);
1185
1186 #if !TARGET_OS_IPHONE
1187     if (!vtctx->allow_sw) {
1188         CFDictionarySetValue(enc_info,
1189                              compat_keys.kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
1190                              kCFBooleanTrue);
1191     } else {
1192         CFDictionarySetValue(enc_info,
1193                              compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
1194                              kCFBooleanTrue);
1195     }
1196 #endif
1197
1198     if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {
1199         status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);
1200         if (status)
1201             goto init_cleanup;
1202     } else {
1203         pixel_buffer_info = NULL;
1204     }
1205
1206     pthread_mutex_init(&vtctx->lock, NULL);
1207     pthread_cond_init(&vtctx->cv_sample_sent, NULL);
1208     vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
1209
1210     get_cv_transfer_function(avctx, &vtctx->transfer_function, &gamma_level);
1211     get_cv_ycbcr_matrix(avctx, &vtctx->ycbcr_matrix);
1212     get_cv_color_primaries(avctx, &vtctx->color_primaries);
1213
1214
1215     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1216         status = vtenc_populate_extradata(avctx,
1217                                           codec_type,
1218                                           profile_level,
1219                                           gamma_level,
1220                                           enc_info,
1221                                           pixel_buffer_info);
1222         if (status)
1223             goto init_cleanup;
1224     }
1225
1226     status = vtenc_create_encoder(avctx,
1227                                   codec_type,
1228                                   profile_level,
1229                                   gamma_level,
1230                                   enc_info,
1231                                   pixel_buffer_info,
1232                                   &vtctx->session);
1233
1234     if (status < 0)
1235         goto init_cleanup;
1236
1237     status = VTSessionCopyProperty(vtctx->session,
1238                                    kVTCompressionPropertyKey_AllowFrameReordering,
1239                                    kCFAllocatorDefault,
1240                                    &has_b_frames_cfbool);
1241
1242     if (!status) {
1243         //Some devices don't output B-frames for main profile, even if requested.
1244         vtctx->has_b_frames = CFBooleanGetValue(has_b_frames_cfbool);
1245         CFRelease(has_b_frames_cfbool);
1246     }
1247     avctx->has_b_frames = vtctx->has_b_frames;
1248
1249 init_cleanup:
1250     if (gamma_level)
1251         CFRelease(gamma_level);
1252
1253     if (pixel_buffer_info)
1254         CFRelease(pixel_buffer_info);
1255
1256     CFRelease(enc_info);
1257
1258     return status;
1259 }
1260
1261 static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
1262 {
1263     CFArrayRef      attachments;
1264     CFDictionaryRef attachment;
1265     CFBooleanRef    not_sync;
1266     CFIndex         len;
1267
1268     attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
1269     len = !attachments ? 0 : CFArrayGetCount(attachments);
1270
1271     if (!len) {
1272         *is_key_frame = true;
1273         return;
1274     }
1275
1276     attachment = CFArrayGetValueAtIndex(attachments, 0);
1277
1278     if (CFDictionaryGetValueIfPresent(attachment,
1279                                       kCMSampleAttachmentKey_NotSync,
1280                                       (const void **)&not_sync))
1281     {
1282         *is_key_frame = !CFBooleanGetValue(not_sync);
1283     } else {
1284         *is_key_frame = true;
1285     }
1286 }
1287
1288 static int is_post_sei_nal_type(int nal_type){
1289     return nal_type != H264_NAL_SEI &&
1290            nal_type != H264_NAL_SPS &&
1291            nal_type != H264_NAL_PPS &&
1292            nal_type != H264_NAL_AUD;
1293 }
1294
1295 /*
1296  * Finds the sei message start/size of type find_sei_type.
1297  * If more than one of that type exists, the last one is returned.
1298  */
1299 static int find_sei_end(AVCodecContext *avctx,
1300                         uint8_t        *nal_data,
1301                         size_t          nal_size,
1302                         uint8_t       **sei_end)
1303 {
1304     int nal_type;
1305     size_t sei_payload_size = 0;
1306     int sei_payload_type = 0;
1307     *sei_end = NULL;
1308     uint8_t *nal_start = nal_data;
1309
1310     if (!nal_size)
1311         return 0;
1312
1313     nal_type = *nal_data & 0x1F;
1314     if (nal_type != H264_NAL_SEI)
1315         return 0;
1316
1317     nal_data++;
1318     nal_size--;
1319
1320     if (nal_data[nal_size - 1] == 0x80)
1321         nal_size--;
1322
1323     while (nal_size > 0 && *nal_data > 0) {
1324         do{
1325             sei_payload_type += *nal_data;
1326             nal_data++;
1327             nal_size--;
1328         } while (nal_size > 0 && *nal_data == 0xFF);
1329
1330         if (!nal_size) {
1331             av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing type.\n");
1332             return AVERROR_INVALIDDATA;
1333         }
1334
1335         do{
1336             sei_payload_size += *nal_data;
1337             nal_data++;
1338             nal_size--;
1339         } while (nal_size > 0 && *nal_data == 0xFF);
1340
1341         if (nal_size < sei_payload_size) {
1342             av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing size.\n");
1343             return AVERROR_INVALIDDATA;
1344         }
1345
1346         nal_data += sei_payload_size;
1347         nal_size -= sei_payload_size;
1348     }
1349
1350     *sei_end = nal_data;
1351
1352     return nal_data - nal_start + 1;
1353 }
1354
1355 /**
1356  * Copies the data inserting emulation prevention bytes as needed.
1357  * Existing data in the destination can be taken into account by providing
1358  * dst with a dst_offset > 0.
1359  *
1360  * @return The number of bytes copied on success. On failure, the negative of
1361  *         the number of bytes needed to copy src is returned.
1362  */
1363 static int copy_emulation_prev(const uint8_t *src,
1364                                size_t         src_size,
1365                                uint8_t       *dst,
1366                                ssize_t        dst_offset,
1367                                size_t         dst_size)
1368 {
1369     int zeros = 0;
1370     int wrote_bytes;
1371     uint8_t* dst_start;
1372     uint8_t* dst_end = dst + dst_size;
1373     const uint8_t* src_end = src + src_size;
1374     int start_at = dst_offset > 2 ? dst_offset - 2 : 0;
1375     int i;
1376     for (i = start_at; i < dst_offset && i < dst_size; i++) {
1377         if (!dst[i])
1378             zeros++;
1379         else
1380             zeros = 0;
1381     }
1382
1383     dst += dst_offset;
1384     dst_start = dst;
1385     for (; src < src_end; src++, dst++) {
1386         if (zeros == 2) {
1387             int insert_ep3_byte = *src <= 3;
1388             if (insert_ep3_byte) {
1389                 if (dst < dst_end)
1390                     *dst = 3;
1391                 dst++;
1392             }
1393
1394             zeros = 0;
1395         }
1396
1397         if (dst < dst_end)
1398             *dst = *src;
1399
1400         if (!*src)
1401             zeros++;
1402         else
1403             zeros = 0;
1404     }
1405
1406     wrote_bytes = dst - dst_start;
1407
1408     if (dst > dst_end)
1409         return -wrote_bytes;
1410
1411     return wrote_bytes;
1412 }
1413
1414 static int write_sei(const ExtraSEI *sei,
1415                      int             sei_type,
1416                      uint8_t        *dst,
1417                      size_t          dst_size)
1418 {
1419     uint8_t *sei_start = dst;
1420     size_t remaining_sei_size = sei->size;
1421     size_t remaining_dst_size = dst_size;
1422     int header_bytes;
1423     int bytes_written;
1424     ssize_t offset;
1425
1426     if (!remaining_dst_size)
1427         return AVERROR_BUFFER_TOO_SMALL;
1428
1429     while (sei_type && remaining_dst_size != 0) {
1430         int sei_byte = sei_type > 255 ? 255 : sei_type;
1431         *dst = sei_byte;
1432
1433         sei_type -= sei_byte;
1434         dst++;
1435         remaining_dst_size--;
1436     }
1437
1438     if (!dst_size)
1439         return AVERROR_BUFFER_TOO_SMALL;
1440
1441     while (remaining_sei_size && remaining_dst_size != 0) {
1442         int size_byte = remaining_sei_size > 255 ? 255 : remaining_sei_size;
1443         *dst = size_byte;
1444
1445         remaining_sei_size -= size_byte;
1446         dst++;
1447         remaining_dst_size--;
1448     }
1449
1450     if (remaining_dst_size < sei->size)
1451         return AVERROR_BUFFER_TOO_SMALL;
1452
1453     header_bytes = dst - sei_start;
1454
1455     offset = header_bytes;
1456     bytes_written = copy_emulation_prev(sei->data,
1457                                         sei->size,
1458                                         sei_start,
1459                                         offset,
1460                                         dst_size);
1461     if (bytes_written < 0)
1462         return AVERROR_BUFFER_TOO_SMALL;
1463
1464     bytes_written += header_bytes;
1465     return bytes_written;
1466 }
1467
1468 /**
1469  * Copies NAL units and replaces length codes with
1470  * H.264 Annex B start codes. On failure, the contents of
1471  * dst_data may have been modified.
1472  *
1473  * @param length_code_size Byte length of each length code
1474  * @param sample_buffer NAL units prefixed with length codes.
1475  * @param sei Optional A53 closed captions SEI data.
1476  * @param dst_data Must be zeroed before calling this function.
1477  *                 Contains the copied NAL units prefixed with
1478  *                 start codes when the function returns
1479  *                 successfully.
1480  * @param dst_size Length of dst_data
1481  * @return 0 on success
1482  *         AVERROR_INVALIDDATA if length_code_size is invalid
1483  *         AVERROR_BUFFER_TOO_SMALL if dst_data is too small
1484  *         or if a length_code in src_data specifies data beyond
1485  *         the end of its buffer.
1486  */
1487 static int copy_replace_length_codes(
1488     AVCodecContext *avctx,
1489     size_t        length_code_size,
1490     CMSampleBufferRef sample_buffer,
1491     ExtraSEI      *sei,
1492     uint8_t       *dst_data,
1493     size_t        dst_size)
1494 {
1495     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1496     size_t remaining_src_size = src_size;
1497     size_t remaining_dst_size = dst_size;
1498     size_t src_offset = 0;
1499     int wrote_sei = 0;
1500     int status;
1501     uint8_t size_buf[4];
1502     uint8_t nal_type;
1503     CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
1504
1505     if (length_code_size > 4) {
1506         return AVERROR_INVALIDDATA;
1507     }
1508
1509     while (remaining_src_size > 0) {
1510         size_t curr_src_len;
1511         size_t curr_dst_len;
1512         size_t box_len = 0;
1513         size_t i;
1514
1515         uint8_t       *dst_box;
1516
1517         status = CMBlockBufferCopyDataBytes(block,
1518                                             src_offset,
1519                                             length_code_size,
1520                                             size_buf);
1521         if (status) {
1522             av_log(avctx, AV_LOG_ERROR, "Cannot copy length: %d\n", status);
1523             return AVERROR_EXTERNAL;
1524         }
1525
1526         status = CMBlockBufferCopyDataBytes(block,
1527                                             src_offset + length_code_size,
1528                                             1,
1529                                             &nal_type);
1530
1531         if (status) {
1532             av_log(avctx, AV_LOG_ERROR, "Cannot copy type: %d\n", status);
1533             return AVERROR_EXTERNAL;
1534         }
1535
1536         nal_type &= 0x1F;
1537
1538         for (i = 0; i < length_code_size; i++) {
1539             box_len <<= 8;
1540             box_len |= size_buf[i];
1541         }
1542
1543         if (sei && !wrote_sei && is_post_sei_nal_type(nal_type)) {
1544             //No SEI NAL unit - insert.
1545             int wrote_bytes;
1546
1547             memcpy(dst_data, start_code, sizeof(start_code));
1548             dst_data += sizeof(start_code);
1549             remaining_dst_size -= sizeof(start_code);
1550
1551             *dst_data = H264_NAL_SEI;
1552             dst_data++;
1553             remaining_dst_size--;
1554
1555             wrote_bytes = write_sei(sei,
1556                                     SEI_TYPE_USER_DATA_REGISTERED,
1557                                     dst_data,
1558                                     remaining_dst_size);
1559
1560             if (wrote_bytes < 0)
1561                 return wrote_bytes;
1562
1563             remaining_dst_size -= wrote_bytes;
1564             dst_data += wrote_bytes;
1565
1566             if (remaining_dst_size <= 0)
1567                 return AVERROR_BUFFER_TOO_SMALL;
1568
1569             *dst_data = 0x80;
1570
1571             dst_data++;
1572             remaining_dst_size--;
1573
1574             wrote_sei = 1;
1575         }
1576
1577         curr_src_len = box_len + length_code_size;
1578         curr_dst_len = box_len + sizeof(start_code);
1579
1580         if (remaining_src_size < curr_src_len) {
1581             return AVERROR_BUFFER_TOO_SMALL;
1582         }
1583
1584         if (remaining_dst_size < curr_dst_len) {
1585             return AVERROR_BUFFER_TOO_SMALL;
1586         }
1587
1588         dst_box = dst_data + sizeof(start_code);
1589
1590         memcpy(dst_data, start_code, sizeof(start_code));
1591         status = CMBlockBufferCopyDataBytes(block,
1592                                             src_offset + length_code_size,
1593                                             box_len,
1594                                             dst_box);
1595
1596         if (status) {
1597             av_log(avctx, AV_LOG_ERROR, "Cannot copy data: %d\n", status);
1598             return AVERROR_EXTERNAL;
1599         }
1600
1601         if (sei && !wrote_sei && nal_type == H264_NAL_SEI) {
1602             //Found SEI NAL unit - append.
1603             int wrote_bytes;
1604             int old_sei_length;
1605             int extra_bytes;
1606             uint8_t *new_sei;
1607             old_sei_length = find_sei_end(avctx, dst_box, box_len, &new_sei);
1608             if (old_sei_length < 0)
1609                 return status;
1610
1611             wrote_bytes = write_sei(sei,
1612                                     SEI_TYPE_USER_DATA_REGISTERED,
1613                                     new_sei,
1614                                     remaining_dst_size - old_sei_length);
1615             if (wrote_bytes < 0)
1616                 return wrote_bytes;
1617
1618             if (new_sei + wrote_bytes >= dst_data + remaining_dst_size)
1619                 return AVERROR_BUFFER_TOO_SMALL;
1620
1621             new_sei[wrote_bytes++] = 0x80;
1622             extra_bytes = wrote_bytes - (dst_box + box_len - new_sei);
1623
1624             dst_data += extra_bytes;
1625             remaining_dst_size -= extra_bytes;
1626
1627             wrote_sei = 1;
1628         }
1629
1630         src_offset += curr_src_len;
1631         dst_data += curr_dst_len;
1632
1633         remaining_src_size -= curr_src_len;
1634         remaining_dst_size -= curr_dst_len;
1635     }
1636
1637     return 0;
1638 }
1639
1640 /**
1641  * Returns a sufficient number of bytes to contain the sei data.
1642  * It may be greater than the minimum required.
1643  */
1644 static int get_sei_msg_bytes(const ExtraSEI* sei, int type){
1645     int copied_size;
1646     if (sei->size == 0)
1647         return 0;
1648
1649     copied_size = -copy_emulation_prev(sei->data,
1650                                        sei->size,
1651                                        NULL,
1652                                        0,
1653                                        0);
1654
1655     if ((sei->size % 255) == 0) //may result in an extra byte
1656         copied_size++;
1657
1658     return copied_size + sei->size / 255 + 1 + type / 255 + 1;
1659 }
1660
1661 static int vtenc_cm_to_avpacket(
1662     AVCodecContext    *avctx,
1663     CMSampleBufferRef sample_buffer,
1664     AVPacket          *pkt,
1665     ExtraSEI          *sei)
1666 {
1667     VTEncContext *vtctx = avctx->priv_data;
1668
1669     int     status;
1670     bool    is_key_frame;
1671     bool    add_header;
1672     size_t  length_code_size;
1673     size_t  header_size = 0;
1674     size_t  in_buf_size;
1675     size_t  out_buf_size;
1676     size_t  sei_nalu_size = 0;
1677     int64_t dts_delta;
1678     int64_t time_base_num;
1679     int nalu_count;
1680     CMTime  pts;
1681     CMTime  dts;
1682     CMVideoFormatDescriptionRef vid_fmt;
1683
1684
1685     vtenc_get_frame_info(sample_buffer, &is_key_frame);
1686     status = get_length_code_size(avctx, sample_buffer, &length_code_size);
1687     if (status) return status;
1688
1689     add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
1690
1691     if (add_header) {
1692         vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
1693         if (!vid_fmt) {
1694             av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
1695             return AVERROR_EXTERNAL;
1696         }
1697
1698         int status = get_params_size(avctx, vid_fmt, &header_size);
1699         if (status) return status;
1700     }
1701
1702     status = count_nalus(length_code_size, sample_buffer, &nalu_count);
1703     if(status)
1704         return status;
1705
1706     if (sei) {
1707         size_t msg_size = get_sei_msg_bytes(sei,
1708                                             SEI_TYPE_USER_DATA_REGISTERED);
1709
1710         sei_nalu_size = sizeof(start_code) + 1 + msg_size + 1;
1711     }
1712
1713     in_buf_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1714     out_buf_size = header_size +
1715                    in_buf_size +
1716                    sei_nalu_size +
1717                    nalu_count * ((int)sizeof(start_code) - (int)length_code_size);
1718
1719     status = ff_alloc_packet2(avctx, pkt, out_buf_size, out_buf_size);
1720     if (status < 0)
1721         return status;
1722
1723     if (add_header) {
1724         status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
1725         if(status) return status;
1726     }
1727
1728     status = copy_replace_length_codes(
1729         avctx,
1730         length_code_size,
1731         sample_buffer,
1732         sei,
1733         pkt->data + header_size,
1734         pkt->size - header_size
1735     );
1736
1737     if (status) {
1738         av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d\n", status);
1739         return status;
1740     }
1741
1742     if (is_key_frame) {
1743         pkt->flags |= AV_PKT_FLAG_KEY;
1744     }
1745
1746     pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
1747     dts = CMSampleBufferGetDecodeTimeStamp      (sample_buffer);
1748
1749     if (CMTIME_IS_INVALID(dts)) {
1750         if (!vtctx->has_b_frames) {
1751             dts = pts;
1752         } else {
1753             av_log(avctx, AV_LOG_ERROR, "DTS is invalid.\n");
1754             return AVERROR_EXTERNAL;
1755         }
1756     }
1757
1758     dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
1759     time_base_num = avctx->time_base.num;
1760     pkt->pts = pts.value / time_base_num;
1761     pkt->dts = dts.value / time_base_num - dts_delta;
1762     pkt->size = out_buf_size;
1763
1764     return 0;
1765 }
1766
1767 /*
1768  * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
1769  * containing all planes if so.
1770  */
1771 static int get_cv_pixel_info(
1772     AVCodecContext *avctx,
1773     const AVFrame  *frame,
1774     int            *color,
1775     int            *plane_count,
1776     size_t         *widths,
1777     size_t         *heights,
1778     size_t         *strides,
1779     size_t         *contiguous_buf_size)
1780 {
1781     VTEncContext *vtctx = avctx->priv_data;
1782     int av_format       = frame->format;
1783     int av_color_range  = av_frame_get_color_range(frame);
1784     int i;
1785     int range_guessed;
1786     int status;
1787
1788     status = get_cv_pixel_format(avctx, av_format, av_color_range, color, &range_guessed);
1789     if (status) {
1790         av_log(avctx,
1791             AV_LOG_ERROR,
1792             "Could not get pixel format for color format '%s' range '%s'.\n",
1793             av_get_pix_fmt_name(av_format),
1794             av_color_range > AVCOL_RANGE_UNSPECIFIED &&
1795             av_color_range < AVCOL_RANGE_NB ?
1796                av_color_range_name(av_color_range) :
1797                "Unknown");
1798
1799         return AVERROR(EINVAL);
1800     }
1801
1802     if (range_guessed) {
1803         if (!vtctx->warned_color_range) {
1804             vtctx->warned_color_range = true;
1805             av_log(avctx,
1806                    AV_LOG_WARNING,
1807                    "Color range not set for %s. Using MPEG range.\n",
1808                    av_get_pix_fmt_name(av_format));
1809         }
1810
1811         av_log(avctx, AV_LOG_WARNING, "");
1812     }
1813
1814     switch (av_format) {
1815     case AV_PIX_FMT_NV12:
1816         *plane_count = 2;
1817
1818         widths [0] = avctx->width;
1819         heights[0] = avctx->height;
1820         strides[0] = frame ? frame->linesize[0] : avctx->width;
1821
1822         widths [1] = (avctx->width  + 1) / 2;
1823         heights[1] = (avctx->height + 1) / 2;
1824         strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) & -2;
1825         break;
1826
1827     case AV_PIX_FMT_YUV420P:
1828         *plane_count = 3;
1829
1830         widths [0] = avctx->width;
1831         heights[0] = avctx->height;
1832         strides[0] = frame ? frame->linesize[0] : avctx->width;
1833
1834         widths [1] = (avctx->width  + 1) / 2;
1835         heights[1] = (avctx->height + 1) / 2;
1836         strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) / 2;
1837
1838         widths [2] = (avctx->width  + 1) / 2;
1839         heights[2] = (avctx->height + 1) / 2;
1840         strides[2] = frame ? frame->linesize[2] : (avctx->width + 1) / 2;
1841         break;
1842
1843     default:
1844         av_log(
1845                avctx,
1846                AV_LOG_ERROR,
1847                "Could not get frame format info for color %d range %d.\n",
1848                av_format,
1849                av_color_range);
1850
1851         return AVERROR(EINVAL);
1852     }
1853
1854     *contiguous_buf_size = 0;
1855     for (i = 0; i < *plane_count; i++) {
1856         if (i < *plane_count - 1 &&
1857             frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
1858             *contiguous_buf_size = 0;
1859             break;
1860         }
1861
1862         *contiguous_buf_size += strides[i] * heights[i];
1863     }
1864
1865     return 0;
1866 }
1867
1868 #if !TARGET_OS_IPHONE
1869 //Not used on iOS - frame is always copied.
1870 static void free_avframe(
1871     void       *release_ctx,
1872     const void *data,
1873     size_t      size,
1874     size_t      plane_count,
1875     const void *plane_addresses[])
1876 {
1877     AVFrame *frame = release_ctx;
1878     av_frame_free(&frame);
1879 }
1880 #else
1881 //Not used on OSX - frame is never copied.
1882 static int copy_avframe_to_pixel_buffer(AVCodecContext   *avctx,
1883                                         const AVFrame    *frame,
1884                                         CVPixelBufferRef cv_img,
1885                                         const size_t     *plane_strides,
1886                                         const size_t     *plane_rows)
1887 {
1888     int i, j;
1889     size_t plane_count;
1890     int status;
1891     int rows;
1892     int src_stride;
1893     int dst_stride;
1894     uint8_t *src_addr;
1895     uint8_t *dst_addr;
1896     size_t copy_bytes;
1897
1898     status = CVPixelBufferLockBaseAddress(cv_img, 0);
1899     if (status) {
1900         av_log(
1901             avctx,
1902             AV_LOG_ERROR,
1903             "Error: Could not lock base address of CVPixelBuffer: %d.\n",
1904             status
1905         );
1906     }
1907
1908     if (CVPixelBufferIsPlanar(cv_img)) {
1909         plane_count = CVPixelBufferGetPlaneCount(cv_img);
1910         for (i = 0; frame->data[i]; i++) {
1911             if (i == plane_count) {
1912                 CVPixelBufferUnlockBaseAddress(cv_img, 0);
1913                 av_log(avctx,
1914                     AV_LOG_ERROR,
1915                     "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
1916                 );
1917
1918                 return AVERROR_EXTERNAL;
1919             }
1920
1921             dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
1922             src_addr = (uint8_t*)frame->data[i];
1923             dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
1924             src_stride = plane_strides[i];
1925             rows = plane_rows[i];
1926
1927             if (dst_stride == src_stride) {
1928                 memcpy(dst_addr, src_addr, src_stride * rows);
1929             } else {
1930                 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1931
1932                 for (j = 0; j < rows; j++) {
1933                     memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1934                 }
1935             }
1936         }
1937     } else {
1938         if (frame->data[1]) {
1939             CVPixelBufferUnlockBaseAddress(cv_img, 0);
1940             av_log(avctx,
1941                 AV_LOG_ERROR,
1942                 "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
1943             );
1944
1945             return AVERROR_EXTERNAL;
1946         }
1947
1948         dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
1949         src_addr = (uint8_t*)frame->data[0];
1950         dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
1951         src_stride = plane_strides[0];
1952         rows = plane_rows[0];
1953
1954         if (dst_stride == src_stride) {
1955             memcpy(dst_addr, src_addr, src_stride * rows);
1956         } else {
1957             copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1958
1959             for (j = 0; j < rows; j++) {
1960                 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1961             }
1962         }
1963     }
1964
1965     status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
1966     if (status) {
1967         av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
1968         return AVERROR_EXTERNAL;
1969     }
1970
1971     return 0;
1972 }
1973 #endif //!TARGET_OS_IPHONE
1974
1975 static int create_cv_pixel_buffer(AVCodecContext   *avctx,
1976                                   const AVFrame    *frame,
1977                                   CVPixelBufferRef *cv_img)
1978 {
1979     int plane_count;
1980     int color;
1981     size_t widths [AV_NUM_DATA_POINTERS];
1982     size_t heights[AV_NUM_DATA_POINTERS];
1983     size_t strides[AV_NUM_DATA_POINTERS];
1984     int status;
1985     size_t contiguous_buf_size;
1986 #if TARGET_OS_IPHONE
1987     CVPixelBufferPoolRef pix_buf_pool;
1988     VTEncContext* vtctx = avctx->priv_data;
1989 #else
1990     CFMutableDictionaryRef pix_buf_attachments = CFDictionaryCreateMutable(
1991                                                    kCFAllocatorDefault,
1992                                                    10,
1993                                                    &kCFCopyStringDictionaryKeyCallBacks,
1994                                                    &kCFTypeDictionaryValueCallBacks);
1995
1996     if (!pix_buf_attachments) return AVERROR(ENOMEM);
1997 #endif
1998
1999     if (avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX) {
2000         av_assert0(frame->format == AV_PIX_FMT_VIDEOTOOLBOX);
2001
2002         *cv_img = (CVPixelBufferRef)frame->data[3];
2003         av_assert0(*cv_img);
2004
2005         CFRetain(*cv_img);
2006         return 0;
2007     }
2008
2009     memset(widths,  0, sizeof(widths));
2010     memset(heights, 0, sizeof(heights));
2011     memset(strides, 0, sizeof(strides));
2012
2013     status = get_cv_pixel_info(
2014         avctx,
2015         frame,
2016         &color,
2017         &plane_count,
2018         widths,
2019         heights,
2020         strides,
2021         &contiguous_buf_size
2022     );
2023
2024     if (status) {
2025         av_log(
2026             avctx,
2027             AV_LOG_ERROR,
2028             "Error: Cannot convert format %d color_range %d: %d\n",
2029             frame->format,
2030             av_frame_get_color_range(frame),
2031             status
2032         );
2033
2034         return AVERROR_EXTERNAL;
2035     }
2036
2037 #if TARGET_OS_IPHONE
2038     pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2039     if (!pix_buf_pool) {
2040         av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
2041         return AVERROR_EXTERNAL;
2042     }
2043
2044     status = CVPixelBufferPoolCreatePixelBuffer(NULL,
2045                                                 pix_buf_pool,
2046                                                 cv_img);
2047
2048
2049     if (status) {
2050         av_log(avctx, AV_LOG_ERROR, "Could not create pixel buffer from pool: %d.\n", status);
2051         return AVERROR_EXTERNAL;
2052     }
2053
2054     status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
2055     if (status) {
2056         CFRelease(*cv_img);
2057         *cv_img = NULL;
2058         return status;
2059     }
2060 #else
2061     AVFrame *enc_frame = av_frame_alloc();
2062     if (!enc_frame) return AVERROR(ENOMEM);
2063
2064     status = av_frame_ref(enc_frame, frame);
2065     if (status) {
2066         av_frame_free(&enc_frame);
2067         return status;
2068     }
2069
2070     status = CVPixelBufferCreateWithPlanarBytes(
2071         kCFAllocatorDefault,
2072         enc_frame->width,
2073         enc_frame->height,
2074         color,
2075         NULL,
2076         contiguous_buf_size,
2077         plane_count,
2078         (void **)enc_frame->data,
2079         widths,
2080         heights,
2081         strides,
2082         free_avframe,
2083         enc_frame,
2084         NULL,
2085         cv_img
2086     );
2087
2088     add_color_attr(avctx, pix_buf_attachments);
2089     CVBufferSetAttachments(*cv_img, pix_buf_attachments, kCVAttachmentMode_ShouldPropagate);
2090     CFRelease(pix_buf_attachments);
2091
2092     if (status) {
2093         av_log(avctx, AV_LOG_ERROR, "Error: Could not create CVPixelBuffer: %d\n", status);
2094         return AVERROR_EXTERNAL;
2095     }
2096 #endif
2097
2098     return 0;
2099 }
2100
2101 static int create_encoder_dict_h264(const AVFrame *frame,
2102                                     CFDictionaryRef* dict_out)
2103 {
2104     CFDictionaryRef dict = NULL;
2105     if (frame->pict_type == AV_PICTURE_TYPE_I) {
2106         const void *keys[] = { kVTEncodeFrameOptionKey_ForceKeyFrame };
2107         const void *vals[] = { kCFBooleanTrue };
2108
2109         dict = CFDictionaryCreate(NULL, keys, vals, 1, NULL, NULL);
2110         if(!dict) return AVERROR(ENOMEM);
2111     }
2112
2113     *dict_out = dict;
2114     return 0;
2115 }
2116
2117 static int vtenc_send_frame(AVCodecContext *avctx,
2118                             VTEncContext   *vtctx,
2119                             const AVFrame  *frame)
2120 {
2121     CMTime time;
2122     CFDictionaryRef frame_dict;
2123     CVPixelBufferRef cv_img = NULL;
2124     AVFrameSideData *side_data = NULL;
2125     ExtraSEI *sei = NULL;
2126     int status = create_cv_pixel_buffer(avctx, frame, &cv_img);
2127
2128     if (status) return status;
2129
2130     status = create_encoder_dict_h264(frame, &frame_dict);
2131     if (status) {
2132         CFRelease(cv_img);
2133         return status;
2134     }
2135
2136     side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
2137     if (vtctx->a53_cc && side_data && side_data->size) {
2138         sei = av_mallocz(sizeof(*sei));
2139         if (!sei) {
2140             av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2141         } else {
2142             int ret = ff_alloc_a53_sei(frame, 0, &sei->data, &sei->size);
2143             if (ret < 0) {
2144                 av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2145                 av_free(sei);
2146                 sei = NULL;
2147             }
2148         }
2149     }
2150
2151     time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
2152     status = VTCompressionSessionEncodeFrame(
2153         vtctx->session,
2154         cv_img,
2155         time,
2156         kCMTimeInvalid,
2157         frame_dict,
2158         sei,
2159         NULL
2160     );
2161
2162     if (frame_dict) CFRelease(frame_dict);
2163     CFRelease(cv_img);
2164
2165     if (status) {
2166         av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
2167         return AVERROR_EXTERNAL;
2168     }
2169
2170     return 0;
2171 }
2172
2173 static av_cold int vtenc_frame(
2174     AVCodecContext *avctx,
2175     AVPacket       *pkt,
2176     const AVFrame  *frame,
2177     int            *got_packet)
2178 {
2179     VTEncContext *vtctx = avctx->priv_data;
2180     bool get_frame;
2181     int status;
2182     CMSampleBufferRef buf = NULL;
2183     ExtraSEI *sei = NULL;
2184
2185     if (frame) {
2186         status = vtenc_send_frame(avctx, vtctx, frame);
2187
2188         if (status) {
2189             status = AVERROR_EXTERNAL;
2190             goto end_nopkt;
2191         }
2192
2193         if (vtctx->frame_ct_in == 0) {
2194             vtctx->first_pts = frame->pts;
2195         } else if(vtctx->frame_ct_in == 1 && vtctx->has_b_frames) {
2196             vtctx->dts_delta = frame->pts - vtctx->first_pts;
2197         }
2198
2199         vtctx->frame_ct_in++;
2200     } else if(!vtctx->flushing) {
2201         vtctx->flushing = true;
2202
2203         status = VTCompressionSessionCompleteFrames(vtctx->session,
2204                                                     kCMTimeIndefinite);
2205
2206         if (status) {
2207             av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
2208             status = AVERROR_EXTERNAL;
2209             goto end_nopkt;
2210         }
2211     }
2212
2213     *got_packet = 0;
2214     get_frame = vtctx->dts_delta >= 0 || !frame;
2215     if (!get_frame) {
2216         status = 0;
2217         goto end_nopkt;
2218     }
2219
2220     status = vtenc_q_pop(vtctx, !frame, &buf, &sei);
2221     if (status) goto end_nopkt;
2222     if (!buf)   goto end_nopkt;
2223
2224     status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei);
2225     if (sei) {
2226         if (sei->data) av_free(sei->data);
2227         av_free(sei);
2228     }
2229     CFRelease(buf);
2230     if (status) goto end_nopkt;
2231
2232     *got_packet = 1;
2233     return 0;
2234
2235 end_nopkt:
2236     av_packet_unref(pkt);
2237     return status;
2238 }
2239
2240 static int vtenc_populate_extradata(AVCodecContext   *avctx,
2241                                     CMVideoCodecType codec_type,
2242                                     CFStringRef      profile_level,
2243                                     CFNumberRef      gamma_level,
2244                                     CFDictionaryRef  enc_info,
2245                                     CFDictionaryRef  pixel_buffer_info)
2246 {
2247     VTEncContext *vtctx = avctx->priv_data;
2248     AVFrame *frame = av_frame_alloc();
2249     int y_size = avctx->width * avctx->height;
2250     int chroma_size = (avctx->width / 2) * (avctx->height / 2);
2251     CMSampleBufferRef buf = NULL;
2252     int status;
2253
2254     if (!frame)
2255         return AVERROR(ENOMEM);
2256
2257     frame->buf[0] = av_buffer_alloc(y_size + 2 * chroma_size);
2258
2259     if(!frame->buf[0]){
2260         status = AVERROR(ENOMEM);
2261         goto pe_cleanup;
2262     }
2263
2264     status = vtenc_create_encoder(avctx,
2265                                   codec_type,
2266                                   profile_level,
2267                                   gamma_level,
2268                                   enc_info,
2269                                   pixel_buffer_info,
2270                                   &vtctx->session);
2271     if (status)
2272         goto pe_cleanup;
2273
2274     frame->data[0] = frame->buf[0]->data;
2275     memset(frame->data[0],   0,      y_size);
2276
2277     frame->data[1] = frame->buf[0]->data + y_size;
2278     memset(frame->data[1], 128, chroma_size);
2279
2280
2281     if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
2282         frame->data[2] = frame->buf[0]->data + y_size + chroma_size;
2283         memset(frame->data[2], 128, chroma_size);
2284     }
2285
2286     frame->linesize[0] = avctx->width;
2287
2288     if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
2289         frame->linesize[1] =
2290         frame->linesize[2] = (avctx->width + 1) / 2;
2291     } else {
2292         frame->linesize[1] = (avctx->width + 1) / 2;
2293     }
2294
2295     frame->format          = avctx->pix_fmt;
2296     frame->width           = avctx->width;
2297     frame->height          = avctx->height;
2298     av_frame_set_colorspace(frame, avctx->colorspace);
2299     av_frame_set_color_range(frame, avctx->color_range);
2300     frame->color_trc       = avctx->color_trc;
2301     frame->color_primaries = avctx->color_primaries;
2302
2303     frame->pts = 0;
2304     status = vtenc_send_frame(avctx, vtctx, frame);
2305     if (status) {
2306         av_log(avctx, AV_LOG_ERROR, "Error sending frame: %d\n", status);
2307         goto pe_cleanup;
2308     }
2309
2310     //Populates extradata - output frames are flushed and param sets are available.
2311     status = VTCompressionSessionCompleteFrames(vtctx->session,
2312                                                 kCMTimeIndefinite);
2313
2314     if (status)
2315         goto pe_cleanup;
2316
2317     status = vtenc_q_pop(vtctx, 0, &buf, NULL);
2318     if (status) {
2319         av_log(avctx, AV_LOG_ERROR, "popping: %d\n", status);
2320         goto pe_cleanup;
2321     }
2322
2323     CFRelease(buf);
2324
2325
2326
2327 pe_cleanup:
2328     if(vtctx->session)
2329         CFRelease(vtctx->session);
2330
2331     vtctx->session = NULL;
2332     vtctx->frame_ct_out = 0;
2333
2334     av_frame_unref(frame);
2335     av_frame_free(&frame);
2336
2337     av_assert0(status != 0 || (avctx->extradata && avctx->extradata_size > 0));
2338
2339     return status;
2340 }
2341
2342 static av_cold int vtenc_close(AVCodecContext *avctx)
2343 {
2344     VTEncContext *vtctx = avctx->priv_data;
2345
2346     if(!vtctx->session) return 0;
2347
2348     VTCompressionSessionCompleteFrames(vtctx->session,
2349                                        kCMTimeIndefinite);
2350     clear_frame_queue(vtctx);
2351     pthread_cond_destroy(&vtctx->cv_sample_sent);
2352     pthread_mutex_destroy(&vtctx->lock);
2353     CFRelease(vtctx->session);
2354     vtctx->session = NULL;
2355
2356     if (vtctx->color_primaries) {
2357         CFRelease(vtctx->color_primaries);
2358         vtctx->color_primaries = NULL;
2359     }
2360
2361     if (vtctx->transfer_function) {
2362         CFRelease(vtctx->transfer_function);
2363         vtctx->transfer_function = NULL;
2364     }
2365
2366     if (vtctx->ycbcr_matrix) {
2367         CFRelease(vtctx->ycbcr_matrix);
2368         vtctx->ycbcr_matrix = NULL;
2369     }
2370
2371     return 0;
2372 }
2373
2374 static const enum AVPixelFormat pix_fmts[] = {
2375     AV_PIX_FMT_VIDEOTOOLBOX,
2376     AV_PIX_FMT_NV12,
2377     AV_PIX_FMT_YUV420P,
2378     AV_PIX_FMT_NONE
2379 };
2380
2381 #define OFFSET(x) offsetof(VTEncContext, x)
2382 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
2383 static const AVOption options[] = {
2384     { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
2385     { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
2386     { "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN     }, INT_MIN, INT_MAX, VE, "profile" },
2387     { "high",     "High Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH     }, INT_MIN, INT_MAX, VE, "profile" },
2388
2389     { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },
2390     { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },
2391     { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },
2392     { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },
2393     { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },
2394     { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },
2395     { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },
2396     { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },
2397     { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },
2398     { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
2399     { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
2400
2401     { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL,
2402         { .i64 = 0 }, 0, 1, VE },
2403
2404     { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },
2405     { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2406     { "vlc",   "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2407     { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2408     { "ac",    "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2409
2410     { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).",
2411         OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2412
2413     { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.",
2414         OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2415     { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.",
2416         OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2417
2418     { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, VE },
2419
2420     { NULL },
2421 };
2422
2423 static const AVClass h264_videotoolbox_class = {
2424     .class_name = "h264_videotoolbox",
2425     .item_name  = av_default_item_name,
2426     .option     = options,
2427     .version    = LIBAVUTIL_VERSION_INT,
2428 };
2429
2430 AVCodec ff_h264_videotoolbox_encoder = {
2431     .name             = "h264_videotoolbox",
2432     .long_name        = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),
2433     .type             = AVMEDIA_TYPE_VIDEO,
2434     .id               = AV_CODEC_ID_H264,
2435     .priv_data_size   = sizeof(VTEncContext),
2436     .pix_fmts         = pix_fmts,
2437     .init             = vtenc_init,
2438     .encode2          = vtenc_frame,
2439     .close            = vtenc_close,
2440     .capabilities     = AV_CODEC_CAP_DELAY,
2441     .priv_class       = &h264_videotoolbox_class,
2442     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
2443                         FF_CODEC_CAP_INIT_CLEANUP,
2444 };