git.sesse.net Git - ffmpeg/blob - libavcodec/videotoolboxenc.c

   1 /*
   2  * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include <VideoToolbox/VideoToolbox.h>
  22 #include <CoreVideo/CoreVideo.h>
  23 #include <CoreMedia/CoreMedia.h>
  24 #include <TargetConditionals.h>
  25 #include <Availability.h>
  26 #include "avcodec.h"
  27 #include "libavutil/opt.h"
  28 #include "libavutil/avassert.h"
  29 #include "libavutil/atomic.h"
  30 #include "libavutil/avstring.h"
  31 #include "libavcodec/avcodec.h"
  32 #include "libavutil/pixdesc.h"
  33 #include "internal.h"
  34 #include <pthread.h>
  35
  36 typedef enum VT_H264Profile {
  37     H264_PROF_AUTO,
  38     H264_PROF_BASELINE,
  39     H264_PROF_MAIN,
  40     H264_PROF_HIGH,
  41     H264_PROF_COUNT
  42 } VT_H264Profile;
  43
  44 typedef enum VTH264Entropy{
  45     VT_ENTROPY_NOT_SET,
  46     VT_CAVLC,
  47     VT_CABAC
  48 } VTH264Entropy;
  49
  50 static const uint8_t start_code[] = { 0, 0, 0, 1 };
  51
  52 typedef struct BufNode {
  53     CMSampleBufferRef cm_buffer;
  54     struct BufNode* next;
  55     int error;
  56 } BufNode;
  57
  58 typedef struct VTEncContext {
  59     AVClass *class;
  60     VTCompressionSessionRef session;
  61
  62     pthread_mutex_t lock;
  63     pthread_cond_t  cv_sample_sent;
  64
  65     int async_error;
  66
  67     BufNode *q_head;
  68     BufNode *q_tail;
  69
  70     int64_t frame_ct_out;
  71     int64_t frame_ct_in;
  72
  73     int64_t first_pts;
  74     int64_t dts_delta;
  75
  76     int64_t profile;
  77     int64_t level;
  78     int64_t entropy;
  79     int64_t realtime;
  80     int64_t frames_before;
  81     int64_t frames_after;
  82
  83     int64_t allow_sw;
  84
  85     bool flushing;
  86     bool has_b_frames;
  87     bool warned_color_range;
  88 } VTEncContext;
  89
  90 /**
  91  * NULL-safe release of *refPtr, and sets value to NULL.
  92  */
  93 static void vt_release_num(CFNumberRef* refPtr){
  94     if (!*refPtr) {
  95         return;
  96     }
  97
  98     CFRelease(*refPtr);
  99     *refPtr = NULL;
 100 }
 101
 102 static void set_async_error(VTEncContext *vtctx, int err)
 103 {
 104     BufNode *info;
 105
 106     pthread_mutex_lock(&vtctx->lock);
 107
 108     vtctx->async_error = err;
 109
 110     info = vtctx->q_head;
 111     vtctx->q_head = vtctx->q_tail = NULL;
 112
 113     while (info) {
 114         BufNode *next = info->next;
 115         CFRelease(info->cm_buffer);
 116         av_free(info);
 117         info = next;
 118     }
 119
 120     pthread_mutex_unlock(&vtctx->lock);
 121 }
 122
 123 static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf)
 124 {
 125     BufNode *info;
 126
 127     pthread_mutex_lock(&vtctx->lock);
 128
 129     if (vtctx->async_error) {
 130         pthread_mutex_unlock(&vtctx->lock);
 131         return vtctx->async_error;
 132     }
 133
 134     if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
 135         *buf = NULL;
 136
 137         pthread_mutex_unlock(&vtctx->lock);
 138         return 0;
 139     }
 140
 141     while (!vtctx->q_head && !vtctx->async_error && wait) {
 142         pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
 143     }
 144
 145     if (!vtctx->q_head) {
 146         pthread_mutex_unlock(&vtctx->lock);
 147         *buf = NULL;
 148         return 0;
 149     }
 150
 151     info = vtctx->q_head;
 152     vtctx->q_head = vtctx->q_head->next;
 153     if (!vtctx->q_head) {
 154         vtctx->q_tail = NULL;
 155     }
 156
 157     pthread_mutex_unlock(&vtctx->lock);
 158
 159     *buf = info->cm_buffer;
 160     av_free(info);
 161
 162     vtctx->frame_ct_out++;
 163
 164     return 0;
 165 }
 166
 167 static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer)
 168 {
 169     BufNode *info = av_malloc(sizeof(BufNode));
 170     if (!info) {
 171         set_async_error(vtctx, AVERROR(ENOMEM));
 172         return;
 173     }
 174
 175     CFRetain(buffer);
 176     info->cm_buffer = buffer;
 177     info->next = NULL;
 178
 179     pthread_mutex_lock(&vtctx->lock);
 180     pthread_cond_signal(&vtctx->cv_sample_sent);
 181
 182     if (!vtctx->q_head) {
 183         vtctx->q_head = info;
 184     } else {
 185         vtctx->q_tail->next = info;
 186     }
 187
 188     vtctx->q_tail = info;
 189
 190     pthread_mutex_unlock(&vtctx->lock);
 191 }
 192
 193 static CMVideoCodecType get_cm_codec_type(enum AVCodecID id)
 194 {
 195     switch (id) {
 196     case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
 197     default:               return 0;
 198     }
 199 }
 200
 201 static void vtenc_free_block(void *opaque, uint8_t *data)
 202 {
 203     CMBlockBufferRef block = opaque;
 204     CFRelease(block);
 205 }
 206
 207 /**
 208  * Get the parameter sets from a CMSampleBufferRef.
 209  * @param dst If *dst isn't NULL, the parameters are copied into existing
 210  *            memory. *dst_size must be set accordingly when *dst != NULL.
 211  *            If *dst is NULL, it will be allocated.
 212  *            In all cases, *dst_size is set to the number of bytes used starting
 213  *            at *dst.
 214  */
 215 static int get_params_size(
 216     AVCodecContext              *avctx,
 217     CMVideoFormatDescriptionRef vid_fmt,
 218     size_t                      *size)
 219 {
 220     size_t total_size = 0;
 221     size_t ps_count;
 222     int is_count_bad = 0;
 223     size_t i;
 224     int status;
 225     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 226                                                                 0,
 227                                                                 NULL,
 228                                                                 NULL,
 229                                                                 &ps_count,
 230                                                                 NULL);
 231     if (status) {
 232         is_count_bad = 1;
 233         ps_count     = 0;
 234         status       = 0;
 235     }
 236
 237     for (i = 0; i < ps_count || is_count_bad; i++) {
 238         const uint8_t *ps;
 239         size_t ps_size;
 240         status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 241                                                                     i,
 242                                                                     &ps,
 243                                                                     &ps_size,
 244                                                                     NULL,
 245                                                                     NULL);
 246         if (status) {
 247             /*
 248              * When ps_count is invalid, status != 0 ends the loop normally
 249              * unless we didn't get any parameter sets.
 250              */
 251             if (i > 0 && is_count_bad) status = 0;
 252
 253             break;
 254         }
 255
 256         total_size += ps_size + sizeof(start_code);
 257     }
 258
 259     if (status) {
 260         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
 261         return AVERROR_EXTERNAL;
 262     }
 263
 264     *size = total_size;
 265     return 0;
 266 }
 267
 268 static int copy_param_sets(
 269     AVCodecContext              *avctx,
 270     CMVideoFormatDescriptionRef vid_fmt,
 271     uint8_t                     *dst,
 272     size_t                      dst_size)
 273 {
 274     size_t ps_count;
 275     int is_count_bad = 0;
 276     int status;
 277     size_t offset = 0;
 278     size_t i;
 279
 280     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 281                                                                 0,
 282                                                                 NULL,
 283                                                                 NULL,
 284                                                                 &ps_count,
 285                                                                 NULL);
 286     if (status) {
 287         is_count_bad = 1;
 288         ps_count     = 0;
 289         status       = 0;
 290     }
 291
 292
 293     for (i = 0; i < ps_count || is_count_bad; i++) {
 294         const uint8_t *ps;
 295         size_t ps_size;
 296         size_t next_offset;
 297
 298         status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 299                                                                     i,
 300                                                                     &ps,
 301                                                                     &ps_size,
 302                                                                     NULL,
 303                                                                     NULL);
 304         if (status) {
 305             if (i > 0 && is_count_bad) status = 0;
 306
 307             break;
 308         }
 309
 310         next_offset = offset + sizeof(start_code) + ps_size;
 311         if (dst_size < next_offset) {
 312             av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
 313             return AVERROR_BUFFER_TOO_SMALL;
 314         }
 315
 316         memcpy(dst + offset, start_code, sizeof(start_code));
 317         offset += sizeof(start_code);
 318
 319         memcpy(dst + offset, ps, ps_size);
 320         offset = next_offset;
 321     }
 322
 323     if (status) {
 324         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
 325         return AVERROR_EXTERNAL;
 326     }
 327
 328     return 0;
 329 }
 330
 331 static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
 332 {
 333     CMVideoFormatDescriptionRef vid_fmt;
 334     size_t total_size;
 335     int status;
 336
 337     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
 338     if (!vid_fmt) {
 339         av_log(avctx, AV_LOG_ERROR, "No video format.\n");
 340         return AVERROR_EXTERNAL;
 341     }
 342
 343     status = get_params_size(avctx, vid_fmt, &total_size);
 344     if (status) {
 345         av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
 346         return status;
 347     }
 348
 349     avctx->extradata = av_malloc(total_size);
 350     if (!avctx->extradata) {
 351         return AVERROR(ENOMEM);
 352     }
 353     avctx->extradata_size = total_size;
 354
 355     status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
 356
 357     if (status) {
 358         av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
 359         return status;
 360     }
 361
 362     return 0;
 363 }
 364
 365 static void vtenc_output_callback(
 366     void *ctx,
 367     void *sourceFrameCtx,
 368     OSStatus status,
 369     VTEncodeInfoFlags flags,
 370     CMSampleBufferRef sample_buffer)
 371 {
 372     AVCodecContext *avctx = ctx;
 373     VTEncContext   *vtctx = avctx->priv_data;
 374
 375     if (vtctx->async_error) {
 376         if(sample_buffer) CFRelease(sample_buffer);
 377         return;
 378     }
 379
 380     if (status || !sample_buffer) {
 381         av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
 382         set_async_error(vtctx, AVERROR_EXTERNAL);
 383         return;
 384     }
 385
 386     if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
 387         int set_status = set_extradata(avctx, sample_buffer);
 388         if (set_status) {
 389             set_async_error(vtctx, set_status);
 390             return;
 391         }
 392     }
 393
 394     vtenc_q_push(vtctx, sample_buffer);
 395 }
 396
 397 static int get_length_code_size(
 398     AVCodecContext    *avctx,
 399     CMSampleBufferRef sample_buffer,
 400     size_t            *size)
 401 {
 402     CMVideoFormatDescriptionRef vid_fmt;
 403     int isize;
 404     int status;
 405
 406     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
 407     if (!vid_fmt) {
 408         av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
 409         return AVERROR_EXTERNAL;
 410     }
 411
 412     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
 413                                                                 0,
 414                                                                 NULL,
 415                                                                 NULL,
 416                                                                 NULL,
 417                                                                 &isize);
 418     if (status) {
 419         av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
 420         return AVERROR_EXTERNAL;
 421     }
 422
 423     *size = isize;
 424     return 0;
 425 }
 426
 427 /*
 428  * Returns true on success.
 429  *
 430  * If profile_level_val is NULL and this method returns true, don't specify the
 431  * profile/level to the encoder.
 432  */
 433 static bool get_vt_profile_level(AVCodecContext *avctx,
 434                                  CFStringRef    *profile_level_val)
 435 {
 436     VTEncContext *vtctx = avctx->priv_data;
 437     int64_t profile = vtctx->profile;
 438
 439     if (profile == H264_PROF_AUTO && vtctx->level) {
 440         //Need to pick a profile if level is not auto-selected.
 441         profile = vtctx->has_b_frames ? H264_PROF_MAIN : H264_PROF_BASELINE;
 442     }
 443
 444     *profile_level_val = NULL;
 445
 446     switch (profile) {
 447         case H264_PROF_AUTO:
 448             return true;
 449
 450         case H264_PROF_BASELINE:
 451             switch (vtctx->level) {
 452                 case  0: *profile_level_val = kVTProfileLevel_H264_Baseline_AutoLevel; break;
 453                 case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3;       break;
 454                 case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0;       break;
 455                 case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1;       break;
 456                 case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2;       break;
 457                 case 40: *profile_level_val = kVTProfileLevel_H264_Baseline_4_0;       break;
 458                 case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1;       break;
 459                 case 42: *profile_level_val = kVTProfileLevel_H264_Baseline_4_2;       break;
 460                 case 50: *profile_level_val = kVTProfileLevel_H264_Baseline_5_0;       break;
 461                 case 51: *profile_level_val = kVTProfileLevel_H264_Baseline_5_1;       break;
 462                 case 52: *profile_level_val = kVTProfileLevel_H264_Baseline_5_2;       break;
 463             }
 464             break;
 465
 466         case H264_PROF_MAIN:
 467             switch (vtctx->level) {
 468                 case  0: *profile_level_val = kVTProfileLevel_H264_Main_AutoLevel; break;
 469                 case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0;       break;
 470                 case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1;       break;
 471                 case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2;       break;
 472                 case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0;       break;
 473                 case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1;       break;
 474                 case 42: *profile_level_val = kVTProfileLevel_H264_Main_4_2;       break;
 475                 case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0;       break;
 476                 case 51: *profile_level_val = kVTProfileLevel_H264_Main_5_1;       break;
 477                 case 52: *profile_level_val = kVTProfileLevel_H264_Main_5_2;       break;
 478             }
 479             break;
 480
 481         case H264_PROF_HIGH:
 482             switch (vtctx->level) {
 483                 case  0: *profile_level_val = kVTProfileLevel_H264_High_AutoLevel; break;
 484                 case 30: *profile_level_val = kVTProfileLevel_H264_High_3_0;       break;
 485                 case 31: *profile_level_val = kVTProfileLevel_H264_High_3_1;       break;
 486                 case 32: *profile_level_val = kVTProfileLevel_H264_High_3_2;       break;
 487                 case 40: *profile_level_val = kVTProfileLevel_H264_High_4_0;       break;
 488                 case 41: *profile_level_val = kVTProfileLevel_H264_High_4_1;       break;
 489                 case 42: *profile_level_val = kVTProfileLevel_H264_High_4_2;       break;
 490                 case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0;       break;
 491                 case 51: *profile_level_val = kVTProfileLevel_H264_High_5_1;       break;
 492                 case 52: *profile_level_val = kVTProfileLevel_H264_High_5_2;       break;
 493             }
 494             break;
 495     }
 496
 497     if (!*profile_level_val) {
 498         av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
 499         return false;
 500     }
 501
 502     return true;
 503 }
 504
 505 static int get_cv_pixel_format(AVCodecContext* avctx,
 506                                enum AVPixelFormat fmt,
 507                                enum AVColorRange range,
 508                                int* av_pixel_format,
 509                                int* range_guessed)
 510 {
 511     if (range_guessed) *range_guessed = range != AVCOL_RANGE_MPEG &&
 512                                         range != AVCOL_RANGE_JPEG;
 513
 514     //MPEG range is used when no range is set
 515     if (fmt == AV_PIX_FMT_NV12) {
 516         *av_pixel_format = range == AVCOL_RANGE_JPEG ?
 517                                         kCVPixelFormatType_420YpCbCr8BiPlanarFullRange :
 518                                         kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
 519     } else if (fmt == AV_PIX_FMT_YUV420P) {
 520         *av_pixel_format = range == AVCOL_RANGE_JPEG ?
 521                                         kCVPixelFormatType_420YpCbCr8PlanarFullRange :
 522                                         kCVPixelFormatType_420YpCbCr8Planar;
 523     } else {
 524         return AVERROR(EINVAL);
 525     }
 526
 527     return 0;
 528 }
 529
 530 static int create_cv_pixel_buffer_info(AVCodecContext* avctx,
 531                                        CFMutableDictionaryRef* dict)
 532 {
 533     CFNumberRef cv_color_format_num = NULL;
 534     CFNumberRef width_num = NULL;
 535     CFNumberRef height_num = NULL;
 536     CFMutableDictionaryRef pixel_buffer_info = NULL;
 537     int cv_color_format;
 538     int status = get_cv_pixel_format(avctx,
 539                                      avctx->pix_fmt,
 540                                      avctx->color_range,
 541                                      &cv_color_format,
 542                                      NULL);
 543     if (status) return status;
 544
 545     pixel_buffer_info = CFDictionaryCreateMutable(
 546                             kCFAllocatorDefault,
 547                             20,
 548                             &kCFCopyStringDictionaryKeyCallBacks,
 549                             &kCFTypeDictionaryValueCallBacks);
 550
 551     if (!pixel_buffer_info) goto pbinfo_nomem;
 552
 553     cv_color_format_num = CFNumberCreate(kCFAllocatorDefault,
 554                                          kCFNumberSInt32Type,
 555                                          &cv_color_format);
 556     if (!cv_color_format_num) goto pbinfo_nomem;
 557
 558     CFDictionarySetValue(pixel_buffer_info,
 559                          kCVPixelBufferPixelFormatTypeKey,
 560                          cv_color_format_num);
 561     vt_release_num(&cv_color_format_num);
 562
 563     width_num = CFNumberCreate(kCFAllocatorDefault,
 564                                kCFNumberSInt32Type,
 565                                &avctx->width);
 566     if (!width_num) return AVERROR(ENOMEM);
 567
 568     CFDictionarySetValue(pixel_buffer_info,
 569                          kCVPixelBufferWidthKey,
 570                          width_num);
 571     vt_release_num(&width_num);
 572
 573     height_num = CFNumberCreate(kCFAllocatorDefault,
 574                                 kCFNumberSInt32Type,
 575                                 &avctx->height);
 576     if (!height_num) goto pbinfo_nomem;
 577
 578     CFDictionarySetValue(pixel_buffer_info,
 579                          kCVPixelBufferHeightKey,
 580                          height_num);
 581     vt_release_num(&height_num);
 582
 583     *dict = pixel_buffer_info;
 584     return 0;
 585
 586 pbinfo_nomem:
 587     vt_release_num(&cv_color_format_num);
 588     vt_release_num(&width_num);
 589     vt_release_num(&height_num);
 590     if (pixel_buffer_info) CFRelease(pixel_buffer_info);
 591
 592     return AVERROR(ENOMEM);
 593 }
 594
 595 static av_cold int vtenc_init(AVCodecContext *avctx)
 596 {
 597     CFMutableDictionaryRef enc_info;
 598     CFMutableDictionaryRef pixel_buffer_info;
 599     CMVideoCodecType       codec_type;
 600     VTEncContext           *vtctx = avctx->priv_data;
 601     CFStringRef            profile_level;
 602     SInt32                 bit_rate = avctx->bit_rate;
 603     CFNumberRef            bit_rate_num;
 604     CFBooleanRef           has_b_frames_cfbool;
 605     int                    status;
 606
 607     codec_type = get_cm_codec_type(avctx->codec_id);
 608     if (!codec_type) {
 609         av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
 610         return AVERROR(EINVAL);
 611     }
 612
 613     vtctx->has_b_frames = avctx->max_b_frames > 0;
 614     if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
 615         av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
 616         vtctx->has_b_frames = false;
 617     }
 618
 619     if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
 620         av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
 621         vtctx->entropy = VT_ENTROPY_NOT_SET;
 622     }
 623
 624     if (!get_vt_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
 625
 626     vtctx->session = NULL;
 627
 628     enc_info = CFDictionaryCreateMutable(
 629         kCFAllocatorDefault,
 630         20,
 631         &kCFCopyStringDictionaryKeyCallBacks,
 632         &kCFTypeDictionaryValueCallBacks
 633     );
 634
 635     if (!enc_info) return AVERROR(ENOMEM);
 636
 637 #if !TARGET_OS_IPHONE
 638     if (!vtctx->allow_sw) {
 639         CFDictionarySetValue(enc_info, kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder, kCFBooleanTrue);
 640     } else {
 641         CFDictionarySetValue(enc_info, kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,  kCFBooleanTrue);
 642     }
 643 #endif
 644
 645     if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {
 646         status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);
 647         if (status) {
 648             CFRelease(enc_info);
 649             return status;
 650         }
 651     } else {
 652         pixel_buffer_info = NULL;
 653     }
 654
 655     status = VTCompressionSessionCreate(
 656         kCFAllocatorDefault,
 657         avctx->width,
 658         avctx->height,
 659         codec_type,
 660         enc_info,
 661         pixel_buffer_info,
 662         kCFAllocatorDefault,
 663         vtenc_output_callback,
 664         avctx,
 665         &vtctx->session
 666     );
 667
 668     if (pixel_buffer_info) CFRelease(pixel_buffer_info);
 669     CFRelease(enc_info);
 670
 671     if (status || !vtctx->session) {
 672         av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
 673
 674 #if !TARGET_OS_IPHONE
 675         if (!vtctx->allow_sw) {
 676             av_log(avctx, AV_LOG_ERROR, "Try -allow_sw 1. The hardware encoder may be busy, or not supported.\n");
 677         }
 678 #endif
 679
 680         return AVERROR_EXTERNAL;
 681     }
 682
 683     bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
 684                                   kCFNumberSInt32Type,
 685                                   &bit_rate);
 686     if (!bit_rate_num) return AVERROR(ENOMEM);
 687
 688     status = VTSessionSetProperty(vtctx->session,
 689                                   kVTCompressionPropertyKey_AverageBitRate,
 690                                   bit_rate_num);
 691     CFRelease(bit_rate_num);
 692
 693     if (status) {
 694         av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
 695         return AVERROR_EXTERNAL;
 696     }
 697
 698     if (profile_level) {
 699         status = VTSessionSetProperty(vtctx->session,
 700                                       kVTCompressionPropertyKey_ProfileLevel,
 701                                       profile_level);
 702         if (status) {
 703             av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
 704             return AVERROR_EXTERNAL;
 705         }
 706     }
 707
 708     if (avctx->gop_size > 0) {
 709         CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
 710                                               kCFNumberIntType,
 711                                               &avctx->gop_size);
 712         if (!interval) {
 713             return AVERROR(ENOMEM);
 714         }
 715
 716         status = VTSessionSetProperty(vtctx->session,
 717                                       kVTCompressionPropertyKey_MaxKeyFrameInterval,
 718                                       interval);
 719         CFRelease(interval);
 720
 721         if (status) {
 722             av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
 723             return AVERROR_EXTERNAL;
 724         }
 725     }
 726
 727     if (vtctx->frames_before) {
 728         status = VTSessionSetProperty(vtctx->session,
 729                                       kVTCompressionPropertyKey_MoreFramesBeforeStart,
 730                                       kCFBooleanTrue);
 731
 732         if (status == kVTPropertyNotSupportedErr) {
 733             av_log(avctx, AV_LOG_WARNING, "frames_before property is not supported on this device. Ignoring.\n");
 734         } else if (status) {
 735             av_log(avctx, AV_LOG_ERROR, "Error setting frames_before property: %d\n", status);
 736         }
 737     }
 738
 739     if (vtctx->frames_after) {
 740         status = VTSessionSetProperty(vtctx->session,
 741                                       kVTCompressionPropertyKey_MoreFramesAfterEnd,
 742                                       kCFBooleanTrue);
 743
 744         if (status == kVTPropertyNotSupportedErr) {
 745             av_log(avctx, AV_LOG_WARNING, "frames_after property is not supported on this device. Ignoring.\n");
 746         } else if (status) {
 747             av_log(avctx, AV_LOG_ERROR, "Error setting frames_after property: %d\n", status);
 748         }
 749     }
 750
 751     if (!vtctx->has_b_frames) {
 752         status = VTSessionSetProperty(vtctx->session,
 753                                       kVTCompressionPropertyKey_AllowFrameReordering,
 754                                       kCFBooleanFalse);
 755
 756         if (status) {
 757             av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
 758             return AVERROR_EXTERNAL;
 759         }
 760     }
 761
 762     if (vtctx->entropy != VT_ENTROPY_NOT_SET) {
 763         CFStringRef entropy = vtctx->entropy == VT_CABAC ?
 764                                 kVTH264EntropyMode_CABAC:
 765                                 kVTH264EntropyMode_CAVLC;
 766
 767         status = VTSessionSetProperty(vtctx->session,
 768                                       kVTCompressionPropertyKey_H264EntropyMode,
 769                                       entropy);
 770
 771         if (status) {
 772             av_log(avctx, AV_LOG_ERROR, "Error setting entropy property: %d\n", status);
 773             return AVERROR_EXTERNAL;
 774         }
 775     }
 776
 777     if (vtctx->realtime) {
 778         status = VTSessionSetProperty(vtctx->session,
 779                                       kVTCompressionPropertyKey_RealTime,
 780                                       kCFBooleanTrue);
 781
 782         if (status) {
 783             av_log(avctx, AV_LOG_ERROR, "Error setting realtime property: %d\n", status);
 784         }
 785     }
 786
 787     status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
 788     if (status) {
 789         av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
 790         return AVERROR_EXTERNAL;
 791     }
 792
 793     pthread_mutex_init(&vtctx->lock, NULL);
 794     pthread_cond_init(&vtctx->cv_sample_sent, NULL);
 795     vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
 796
 797     status = VTSessionCopyProperty(vtctx->session,
 798                                    kVTCompressionPropertyKey_AllowFrameReordering,
 799                                    kCFAllocatorDefault,
 800                                    &has_b_frames_cfbool);
 801
 802     if (!status) {
 803         //Some devices don't output B-frames for main profile, even if requested.
 804         vtctx->has_b_frames = CFBooleanGetValue(has_b_frames_cfbool);
 805         CFRelease(has_b_frames_cfbool);
 806     }
 807     avctx->has_b_frames = vtctx->has_b_frames;
 808
 809     return 0;
 810 }
 811
 812 static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
 813 {
 814     CFArrayRef      attachments;
 815     CFDictionaryRef attachment;
 816     CFBooleanRef    not_sync;
 817     CFIndex         len;
 818
 819     attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
 820     len = !attachments ? 0 : CFArrayGetCount(attachments);
 821
 822     if (!len) {
 823         *is_key_frame = true;
 824         return;
 825     }
 826
 827     attachment = CFArrayGetValueAtIndex(attachments, 0);
 828
 829     if (CFDictionaryGetValueIfPresent(attachment,
 830                                       kCMSampleAttachmentKey_NotSync,
 831                                       (const void **)&not_sync))
 832     {
 833         *is_key_frame = !CFBooleanGetValue(not_sync);
 834     } else {
 835         *is_key_frame = true;
 836     }
 837 }
 838
 839 /**
 840  * Replaces length codes with H.264 Annex B start codes.
 841  * length_code_size must equal sizeof(start_code).
 842  * On failure, the contents of data may have been modified.
 843  *
 844  * @param length_code_size Byte length of each length code
 845  * @param data Call with NAL units prefixed with length codes.
 846  *             On success, the length codes are replace with
 847  *             start codes.
 848  * @param size Length of data, excluding any padding.
 849  * @return 0 on success
 850  *         AVERROR_BUFFER_TOO_SMALL if length code size is smaller
 851  *         than a start code or if a length_code in data specifies
 852  *         data beyond the end of its buffer.
 853  */
 854 static int replace_length_codes(size_t  length_code_size,
 855                                 uint8_t *data,
 856                                 size_t  size)
 857 {
 858     size_t remaining_size = size;
 859
 860     if (length_code_size != sizeof(start_code)) {
 861         av_log(NULL, AV_LOG_ERROR, "Start code size and length code size not equal.\n");
 862         return AVERROR_BUFFER_TOO_SMALL;
 863     }
 864
 865     while (remaining_size > 0) {
 866         size_t box_len = 0;
 867         size_t i;
 868
 869         for (i = 0; i < length_code_size; i++) {
 870             box_len <<= 8;
 871             box_len |= data[i];
 872         }
 873
 874         if (remaining_size < box_len + sizeof(start_code)) {
 875             av_log(NULL, AV_LOG_ERROR, "Length is out of range.\n");
 876             AVERROR_BUFFER_TOO_SMALL;
 877         }
 878
 879         memcpy(data, start_code, sizeof(start_code));
 880         data += box_len + sizeof(start_code);
 881         remaining_size -= box_len + sizeof(start_code);
 882     }
 883
 884     return 0;
 885 }
 886
 887 /**
 888  * Copies NAL units and replaces length codes with
 889  * H.264 Annex B start codes. On failure, the contents of
 890  * dst_data may have been modified.
 891  *
 892  * @param length_code_size Byte length of each length code
 893  * @param src_data NAL units prefixed with length codes.
 894  * @param src_size Length of buffer, excluding any padding.
 895  * @param dst_data Must be zeroed before calling this function.
 896  *                 Contains the copied NAL units prefixed with
 897  *                 start codes when the function returns
 898  *                 successfully.
 899  * @param dst_size Length of dst_data
 900  * @return 0 on success
 901  *         AVERROR_INVALIDDATA if length_code_size is invalid
 902  *         AVERROR_BUFFER_TOO_SMALL if dst_data is too small
 903  *         or if a length_code in src_data specifies data beyond
 904  *         the end of its buffer.
 905  */
 906 static int copy_replace_length_codes(
 907     size_t        length_code_size,
 908     const uint8_t *src_data,
 909     size_t        src_size,
 910     uint8_t       *dst_data,
 911     size_t        dst_size)
 912 {
 913     size_t remaining_src_size = src_size;
 914     size_t remaining_dst_size = dst_size;
 915
 916     if (length_code_size > 4) {
 917         return AVERROR_INVALIDDATA;
 918     }
 919
 920     while (remaining_src_size > 0) {
 921         size_t curr_src_len;
 922         size_t curr_dst_len;
 923         size_t box_len = 0;
 924         size_t i;
 925
 926         uint8_t       *dst_box;
 927         const uint8_t *src_box;
 928
 929         for (i = 0; i < length_code_size; i++) {
 930             box_len <<= 8;
 931             box_len |= src_data[i];
 932         }
 933
 934         curr_src_len = box_len + length_code_size;
 935         curr_dst_len = box_len + sizeof(start_code);
 936
 937         if (remaining_src_size < curr_src_len) {
 938             return AVERROR_BUFFER_TOO_SMALL;
 939         }
 940
 941         if (remaining_dst_size < curr_dst_len) {
 942             return AVERROR_BUFFER_TOO_SMALL;
 943         }
 944
 945         dst_box = dst_data + sizeof(start_code);
 946         src_box = src_data + length_code_size;
 947
 948         memcpy(dst_data, start_code, sizeof(start_code));
 949         memcpy(dst_box,  src_box,    box_len);
 950
 951         src_data += curr_src_len;
 952         dst_data += curr_dst_len;
 953
 954         remaining_src_size -= curr_src_len;
 955         remaining_dst_size -= curr_dst_len;
 956     }
 957
 958     return 0;
 959 }
 960
 961 static int vtenc_cm_to_avpacket(
 962     AVCodecContext    *avctx,
 963     CMSampleBufferRef sample_buffer,
 964     AVPacket          *pkt)
 965 {
 966     VTEncContext *vtctx = avctx->priv_data;
 967
 968     int     status;
 969     bool    is_key_frame;
 970     bool    add_header;
 971     char    *buf_data;
 972     size_t  length_code_size;
 973     size_t  header_size = 0;
 974     size_t  in_buf_size;
 975     int64_t dts_delta;
 976     int64_t time_base_num;
 977     CMTime  pts;
 978     CMTime  dts;
 979
 980     CMBlockBufferRef            block;
 981     CMVideoFormatDescriptionRef vid_fmt;
 982
 983
 984     vtenc_get_frame_info(sample_buffer, &is_key_frame);
 985     status = get_length_code_size(avctx, sample_buffer, &length_code_size);
 986     if (status) return status;
 987
 988     add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
 989
 990     if (add_header) {
 991         vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
 992         if (!vid_fmt) {
 993             av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
 994         }
 995
 996         int status = get_params_size(avctx, vid_fmt, &header_size);
 997         if (status) return status;
 998     }
 999
1000     block = CMSampleBufferGetDataBuffer(sample_buffer);
1001     if (!block) {
1002         av_log(avctx, AV_LOG_ERROR, "Could not get block buffer from sample buffer.\n");
1003         return AVERROR_EXTERNAL;
1004     }
1005
1006
1007     status = CMBlockBufferGetDataPointer(block, 0, &in_buf_size, NULL, &buf_data);
1008     if (status) {
1009         av_log(avctx, AV_LOG_ERROR, "Error: cannot get data pointer: %d\n", status);
1010         return AVERROR_EXTERNAL;
1011     }
1012
1013     size_t out_buf_size = header_size + in_buf_size;
1014     bool can_reuse_cmbuffer = !add_header &&
1015                               !pkt->data  &&
1016                               length_code_size == sizeof(start_code);
1017
1018     av_init_packet(pkt);
1019
1020     if (can_reuse_cmbuffer) {
1021         AVBufferRef* buf_ref = av_buffer_create(
1022             buf_data,
1023             out_buf_size,
1024             vtenc_free_block,
1025             block,
1026             0
1027         );
1028
1029         if (!buf_ref) return AVERROR(ENOMEM);
1030
1031         CFRetain(block);
1032
1033         pkt->buf  = buf_ref;
1034         pkt->data = buf_data;
1035         pkt->size = in_buf_size;
1036
1037         status = replace_length_codes(length_code_size, pkt->data, pkt->size);
1038         if (status) {
1039             av_log(avctx, AV_LOG_ERROR, "Error replacing length codes: %d\n", status);
1040             return status;
1041         }
1042     } else {
1043         if (!pkt->data) {
1044             status = av_new_packet(pkt, out_buf_size);
1045             if(status) return status;
1046         }
1047
1048         if (pkt->size < out_buf_size) {
1049             av_log(avctx, AV_LOG_ERROR, "Error: packet's buffer is too small.\n");
1050             return AVERROR_BUFFER_TOO_SMALL;
1051         }
1052
1053         if (add_header) {
1054             status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
1055             if(status) return status;
1056         }
1057
1058         status = copy_replace_length_codes(
1059             length_code_size,
1060             buf_data,
1061             in_buf_size,
1062             pkt->data + header_size,
1063             pkt->size - header_size
1064         );
1065
1066         if (status) {
1067             av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d", status);
1068             return status;
1069         }
1070     }
1071
1072     if (is_key_frame) {
1073         pkt->flags |= AV_PKT_FLAG_KEY;
1074     }
1075
1076     pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
1077     dts = CMSampleBufferGetDecodeTimeStamp      (sample_buffer);
1078
1079     if (CMTIME_IS_INVALID(dts)) {
1080         if (!vtctx->has_b_frames) {
1081             dts = pts;
1082         } else {
1083             av_log(avctx, AV_LOG_ERROR, "DTS is invalid.\n");
1084             return AVERROR_EXTERNAL;
1085         }
1086     }
1087
1088     dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
1089     time_base_num = avctx->time_base.num;
1090     pkt->pts = pts.value / time_base_num;
1091     pkt->dts = dts.value / time_base_num - dts_delta;
1092
1093     return 0;
1094 }
1095
1096 /*
1097  * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
1098  * containing all planes if so.
1099  */
1100 static int get_cv_pixel_info(
1101     AVCodecContext *avctx,
1102     const AVFrame  *frame,
1103     int            *color,
1104     int            *plane_count,
1105     size_t         *widths,
1106     size_t         *heights,
1107     size_t         *strides,
1108     size_t         *contiguous_buf_size)
1109 {
1110     VTEncContext *vtctx = avctx->priv_data;
1111     int av_format       = frame->format;
1112     int av_color_range  = av_frame_get_color_range(frame);
1113     int i;
1114     int range_guessed;
1115     int status;
1116
1117     status = get_cv_pixel_format(avctx, av_format, av_color_range, color, &range_guessed);
1118     if (status) {
1119         av_log(avctx,
1120             AV_LOG_ERROR,
1121             "Could not get pixel format for color format '%s' range '%s'.\n",
1122             av_get_pix_fmt_name(av_format),
1123             av_color_range > AVCOL_RANGE_UNSPECIFIED &&
1124             av_color_range < AVCOL_RANGE_NB ?
1125                av_color_range_name(av_color_range) :
1126                "Unknown");
1127
1128         return AVERROR(EINVAL);
1129     }
1130
1131     if (range_guessed) {
1132         if (!vtctx->warned_color_range) {
1133             vtctx->warned_color_range = true;
1134             av_log(avctx,
1135                    AV_LOG_WARNING,
1136                    "Color range not set for %s. Using MPEG range.\n",
1137                    av_get_pix_fmt_name(av_format));
1138         }
1139
1140         av_log(avctx, AV_LOG_WARNING, "");
1141     }
1142
1143     switch (av_format) {
1144     case AV_PIX_FMT_NV12:
1145         *plane_count = 2;
1146
1147         widths [0] = avctx->width;
1148         heights[0] = avctx->height;
1149         strides[0] = frame ? frame->linesize[0] : avctx->width;
1150
1151         widths [1] = (avctx->width  + 1) / 2;
1152         heights[1] = (avctx->height + 1) / 2;
1153         strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) & -2;
1154         break;
1155
1156     case AV_PIX_FMT_YUV420P:
1157         *plane_count = 3;
1158
1159         widths [0] = avctx->width;
1160         heights[0] = avctx->height;
1161         strides[0] = frame ? frame->linesize[0] : avctx->width;
1162
1163         widths [1] = (avctx->width  + 1) / 2;
1164         heights[1] = (avctx->height + 1) / 2;
1165         strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) / 2;
1166
1167         widths [2] = (avctx->width  + 1) / 2;
1168         heights[2] = (avctx->height + 1) / 2;
1169         strides[2] = frame ? frame->linesize[2] : (avctx->width + 1) / 2;
1170         break;
1171
1172     default:
1173         av_log(
1174                avctx,
1175                AV_LOG_ERROR,
1176                "Could not get frame format info for color %d range %d.\n",
1177                av_format,
1178                av_color_range);
1179
1180         return AVERROR(EINVAL);
1181     }
1182
1183     *contiguous_buf_size = 0;
1184     for (i = 0; i < *plane_count; i++) {
1185         if (i < *plane_count - 1 &&
1186             frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
1187             *contiguous_buf_size = 0;
1188             break;
1189         }
1190
1191         *contiguous_buf_size += strides[i] * heights[i];
1192     }
1193
1194     return 0;
1195 }
1196
1197 #if !TARGET_OS_IPHONE
1198 //Not used on iOS - frame is always copied.
1199 static void free_avframe(
1200     void       *release_ctx,
1201     const void *data,
1202     size_t      size,
1203     size_t      plane_count,
1204     const void *plane_addresses[])
1205 {
1206     AVFrame *frame = release_ctx;
1207     av_frame_free(&frame);
1208 }
1209 #else
1210 //Not used on OSX - frame is never copied.
1211 static int copy_avframe_to_pixel_buffer(AVCodecContext   *avctx,
1212                                         const AVFrame    *frame,
1213                                         CVPixelBufferRef cv_img,
1214                                         const size_t     *plane_strides,
1215                                         const size_t     *plane_rows)
1216 {
1217     int i, j;
1218     size_t plane_count;
1219     int status;
1220     int rows;
1221     int src_stride;
1222     int dst_stride;
1223     uint8_t *src_addr;
1224     uint8_t *dst_addr;
1225     size_t copy_bytes;
1226
1227     status = CVPixelBufferLockBaseAddress(cv_img, 0);
1228     if (status) {
1229         av_log(
1230             avctx,
1231             AV_LOG_ERROR,
1232             "Error: Could not lock base address of CVPixelBuffer: %d.\n",
1233             status
1234         );
1235     }
1236
1237     if (CVPixelBufferIsPlanar(cv_img)) {
1238         plane_count = CVPixelBufferGetPlaneCount(cv_img);
1239         for (i = 0; frame->data[i]; i++) {
1240             if (i == plane_count) {
1241                 CVPixelBufferUnlockBaseAddress(cv_img, 0);
1242                 av_log(avctx,
1243                     AV_LOG_ERROR,
1244                     "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
1245                 );
1246
1247                 return AVERROR_EXTERNAL;
1248             }
1249
1250             dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
1251             src_addr = (uint8_t*)frame->data[i];
1252             dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
1253             src_stride = plane_strides[i];
1254             rows = plane_rows[i];
1255
1256             if (dst_stride == src_stride) {
1257                 memcpy(dst_addr, src_addr, src_stride * rows);
1258             } else {
1259                 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1260
1261                 for (j = 0; j < rows; j++) {
1262                     memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1263                 }
1264             }
1265         }
1266     } else {
1267         if (frame->data[1]) {
1268             CVPixelBufferUnlockBaseAddress(cv_img, 0);
1269             av_log(avctx,
1270                 AV_LOG_ERROR,
1271                 "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
1272             );
1273
1274             return AVERROR_EXTERNAL;
1275         }
1276
1277         dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
1278         src_addr = (uint8_t*)frame->data[0];
1279         dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
1280         src_stride = plane_strides[0];
1281         rows = plane_rows[0];
1282
1283         if (dst_stride == src_stride) {
1284             memcpy(dst_addr, src_addr, src_stride * rows);
1285         } else {
1286             copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1287
1288             for (j = 0; j < rows; j++) {
1289                 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1290             }
1291         }
1292     }
1293
1294     status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
1295     if (status) {
1296         av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
1297         return AVERROR_EXTERNAL;
1298     }
1299
1300     return 0;
1301 }
1302 #endif //!TARGET_OS_IPHONE
1303
1304 static int create_cv_pixel_buffer(AVCodecContext   *avctx,
1305                                   const AVFrame    *frame,
1306                                   CVPixelBufferRef *cv_img)
1307 {
1308     int plane_count;
1309     int color;
1310     size_t widths [AV_NUM_DATA_POINTERS];
1311     size_t heights[AV_NUM_DATA_POINTERS];
1312     size_t strides[AV_NUM_DATA_POINTERS];
1313     int status;
1314     size_t contiguous_buf_size;
1315     CVPixelBufferPoolRef pix_buf_pool;
1316     VTEncContext* vtctx = avctx->priv_data;
1317
1318
1319     if (avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX) {
1320         av_assert0(frame->format == AV_PIX_FMT_VIDEOTOOLBOX);
1321
1322         *cv_img = (CVPixelBufferRef)frame->data[3];
1323         av_assert0(*cv_img);
1324
1325         CFRetain(*cv_img);
1326         return 0;
1327     }
1328
1329     memset(widths,  0, sizeof(widths));
1330     memset(heights, 0, sizeof(heights));
1331     memset(strides, 0, sizeof(strides));
1332
1333     status = get_cv_pixel_info(
1334         avctx,
1335         frame,
1336         &color,
1337         &plane_count,
1338         widths,
1339         heights,
1340         strides,
1341         &contiguous_buf_size
1342     );
1343
1344     if (status) {
1345         av_log(
1346             avctx,
1347             AV_LOG_ERROR,
1348             "Error: Cannot convert format %d color_range %d: %d\n",
1349             frame->format,
1350             av_frame_get_color_range(frame),
1351             status
1352         );
1353
1354         return AVERROR_EXTERNAL;
1355     }
1356
1357 #if TARGET_OS_IPHONE
1358     pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
1359     if (!pix_buf_pool) {
1360         av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
1361         return AVERROR_EXTERNAL;
1362     }
1363
1364     status = CVPixelBufferPoolCreatePixelBuffer(NULL,
1365                                                 pix_buf_pool,
1366                                                 cv_img);
1367
1368
1369     if (status) {
1370         av_log(avctx, AV_LOG_ERROR, "Could not create pixel buffer from pool: %d.\n", status);
1371         return AVERROR_EXTERNAL;
1372     }
1373
1374     status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
1375     if (status) {
1376         CFRelease(*cv_img);
1377         *cv_img = NULL;
1378         return status;
1379     }
1380 #else
1381     AVFrame *enc_frame = av_frame_alloc();
1382     if (!enc_frame) return AVERROR(ENOMEM);
1383
1384     status = av_frame_ref(enc_frame, frame);
1385     if (status) {
1386         av_frame_free(&enc_frame);
1387         return status;
1388     }
1389
1390     status = CVPixelBufferCreateWithPlanarBytes(
1391         kCFAllocatorDefault,
1392         enc_frame->width,
1393         enc_frame->height,
1394         color,
1395         NULL,
1396         contiguous_buf_size,
1397         plane_count,
1398         (void **)enc_frame->data,
1399         widths,
1400         heights,
1401         strides,
1402         free_avframe,
1403         enc_frame,
1404         NULL,
1405         cv_img
1406     );
1407
1408     if (status) {
1409         av_log(avctx, AV_LOG_ERROR, "Error: Could not create CVPixelBuffer: %d\n", status);
1410         return AVERROR_EXTERNAL;
1411     }
1412 #endif
1413
1414     return 0;
1415 }
1416
1417 static int vtenc_send_frame(AVCodecContext *avctx,
1418                             VTEncContext   *vtctx,
1419                             const AVFrame  *frame)
1420 {
1421     CMTime time;
1422     CVPixelBufferRef cv_img = NULL;
1423     int status = create_cv_pixel_buffer(avctx, frame, &cv_img);
1424
1425     if (status) return status;
1426
1427     time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
1428     status = VTCompressionSessionEncodeFrame(
1429         vtctx->session,
1430         cv_img,
1431         time,
1432         kCMTimeInvalid,
1433         NULL,
1434         NULL,
1435         NULL
1436     );
1437
1438     CFRelease(cv_img);
1439
1440     if (status) {
1441         av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
1442         return AVERROR_EXTERNAL;
1443     }
1444
1445     return 0;
1446 }
1447
1448 static av_cold int vtenc_frame(
1449     AVCodecContext *avctx,
1450     AVPacket       *pkt,
1451     const AVFrame  *frame,
1452     int            *got_packet)
1453 {
1454     VTEncContext *vtctx = avctx->priv_data;
1455     bool get_frame;
1456     int status;
1457     CMSampleBufferRef buf = NULL;
1458
1459     if (frame) {
1460         status = vtenc_send_frame(avctx, vtctx, frame);
1461
1462         if (status) {
1463             status = AVERROR_EXTERNAL;
1464             goto end_nopkt;
1465         }
1466
1467         if (vtctx->frame_ct_in == 0) {
1468             vtctx->first_pts = frame->pts;
1469         } else if(vtctx->frame_ct_in == 1 && vtctx->has_b_frames) {
1470             vtctx->dts_delta = frame->pts - vtctx->first_pts;
1471         }
1472
1473         vtctx->frame_ct_in++;
1474     } else if(!vtctx->flushing) {
1475         vtctx->flushing = true;
1476
1477         status = VTCompressionSessionCompleteFrames(vtctx->session,
1478                                                     kCMTimeIndefinite);
1479
1480         if (status) {
1481             av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
1482             status = AVERROR_EXTERNAL;
1483             goto end_nopkt;
1484         }
1485     }
1486
1487     *got_packet = 0;
1488     get_frame = vtctx->dts_delta >= 0 || !frame;
1489     if (!get_frame) {
1490         status = 0;
1491         goto end_nopkt;
1492     }
1493
1494     status = vtenc_q_pop(vtctx, !frame, &buf);
1495     if (status) goto end_nopkt;
1496     if (!buf)   goto end_nopkt;
1497
1498     status = vtenc_cm_to_avpacket(avctx, buf, pkt);
1499     CFRelease(buf);
1500     if (status) goto end_nopkt;
1501
1502     *got_packet = 1;
1503     return 0;
1504
1505 end_nopkt:
1506     av_packet_unref(pkt);
1507     return status;
1508 }
1509
1510 static av_cold int vtenc_close(AVCodecContext *avctx)
1511 {
1512     VTEncContext *vtctx = avctx->priv_data;
1513
1514     if(!vtctx->session) return 0;
1515
1516     pthread_cond_destroy(&vtctx->cv_sample_sent);
1517     pthread_mutex_destroy(&vtctx->lock);
1518     CFRelease(vtctx->session);
1519     vtctx->session = NULL;
1520
1521     return 0;
1522 }
1523
1524 static const enum AVPixelFormat pix_fmts[] = {
1525     AV_PIX_FMT_VIDEOTOOLBOX,
1526     AV_PIX_FMT_NV12,
1527     AV_PIX_FMT_YUV420P,
1528     AV_PIX_FMT_NONE
1529 };
1530
1531 #define OFFSET(x) offsetof(VTEncContext, x)
1532 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1533 static const AVOption options[] = {
1534     { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
1535     { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
1536     { "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN     }, INT_MIN, INT_MAX, VE, "profile" },
1537     { "high",     "High Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH     }, INT_MIN, INT_MAX, VE, "profile" },
1538
1539     { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },
1540     { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },
1541     { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },
1542     { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },
1543     { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },
1544     { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },
1545     { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },
1546     { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },
1547     { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },
1548     { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
1549     { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
1550
1551     { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL,
1552         { .i64 = 0 }, 0, 1, VE },
1553
1554     { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },
1555     { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
1556     { "vlc",   "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
1557     { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
1558     { "ac",    "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
1559
1560     { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).",
1561         OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
1562
1563     { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.",
1564         OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
1565     { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.",
1566         OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
1567
1568     { NULL },
1569 };
1570
1571 static const AVClass h264_videotoolbox_class = {
1572     .class_name = "h264_videotoolbox",
1573     .item_name  = av_default_item_name,
1574     .option     = options,
1575     .version    = LIBAVUTIL_VERSION_INT,
1576 };
1577
1578 AVCodec ff_h264_videotoolbox_encoder = {
1579     .name             = "h264_videotoolbox",
1580     .long_name        = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),
1581     .type             = AVMEDIA_TYPE_VIDEO,
1582     .id               = AV_CODEC_ID_H264,
1583     .priv_data_size   = sizeof(VTEncContext),
1584     .pix_fmts         = pix_fmts,
1585     .init             = vtenc_init,
1586     .encode2          = vtenc_frame,
1587     .close            = vtenc_close,
1588     .capabilities     = AV_CODEC_CAP_DELAY,
1589     .priv_class       = &h264_videotoolbox_class,
1590     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
1591                         FF_CODEC_CAP_INIT_CLEANUP,
1592 };