]> git.sesse.net Git - ffmpeg/blob - libavcodec/videotoolboxenc.c
ffmpeg, ffprobe: don't "merge" side data into packet data by default
[ffmpeg] / libavcodec / videotoolboxenc.c
1 /*
2  * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20
21 #include <VideoToolbox/VideoToolbox.h>
22 #include <CoreVideo/CoreVideo.h>
23 #include <CoreMedia/CoreMedia.h>
24 #include <TargetConditionals.h>
25 #include <Availability.h>
26 #include "avcodec.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/avassert.h"
29 #include "libavutil/atomic.h"
30 #include "libavutil/avstring.h"
31 #include "libavcodec/avcodec.h"
32 #include "libavutil/pixdesc.h"
33 #include "internal.h"
34 #include <pthread.h>
35 #include "h264.h"
36 #include "h264_sei.h"
37 #include <dlfcn.h>
38
39 //These symbols may not be present
40 static struct{
41     CFStringRef kCVImageBufferColorPrimaries_ITU_R_2020;
42     CFStringRef kCVImageBufferTransferFunction_ITU_R_2020;
43     CFStringRef kCVImageBufferYCbCrMatrix_ITU_R_2020;
44
45     CFStringRef kVTCompressionPropertyKey_H264EntropyMode;
46     CFStringRef kVTH264EntropyMode_CAVLC;
47     CFStringRef kVTH264EntropyMode_CABAC;
48
49     CFStringRef kVTProfileLevel_H264_Baseline_4_0;
50     CFStringRef kVTProfileLevel_H264_Baseline_4_2;
51     CFStringRef kVTProfileLevel_H264_Baseline_5_0;
52     CFStringRef kVTProfileLevel_H264_Baseline_5_1;
53     CFStringRef kVTProfileLevel_H264_Baseline_5_2;
54     CFStringRef kVTProfileLevel_H264_Baseline_AutoLevel;
55     CFStringRef kVTProfileLevel_H264_Main_4_2;
56     CFStringRef kVTProfileLevel_H264_Main_5_1;
57     CFStringRef kVTProfileLevel_H264_Main_5_2;
58     CFStringRef kVTProfileLevel_H264_Main_AutoLevel;
59     CFStringRef kVTProfileLevel_H264_High_3_0;
60     CFStringRef kVTProfileLevel_H264_High_3_1;
61     CFStringRef kVTProfileLevel_H264_High_3_2;
62     CFStringRef kVTProfileLevel_H264_High_4_0;
63     CFStringRef kVTProfileLevel_H264_High_4_1;
64     CFStringRef kVTProfileLevel_H264_High_4_2;
65     CFStringRef kVTProfileLevel_H264_High_5_1;
66     CFStringRef kVTProfileLevel_H264_High_5_2;
67     CFStringRef kVTProfileLevel_H264_High_AutoLevel;
68
69     CFStringRef kVTCompressionPropertyKey_RealTime;
70
71     CFStringRef kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder;
72     CFStringRef kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder;
73 } compat_keys;
74
75 #define GET_SYM(symbol, defaultVal)                                     \
76 do{                                                                     \
77     CFStringRef cfstr = *(CFStringRef*)dlsym(RTLD_DEFAULT, #symbol);    \
78     if(!cfstr)                                                          \
79         compat_keys.symbol = CFSTR(defaultVal);                         \
80     else                                                                \
81         compat_keys.symbol = cfstr;                                     \
82 }while(0)
83
84 static pthread_once_t once_ctrl = PTHREAD_ONCE_INIT;
85
86 static void loadVTEncSymbols(){
87     GET_SYM(kCVImageBufferColorPrimaries_ITU_R_2020,   "ITU_R_2020");
88     GET_SYM(kCVImageBufferTransferFunction_ITU_R_2020, "ITU_R_2020");
89     GET_SYM(kCVImageBufferYCbCrMatrix_ITU_R_2020,      "ITU_R_2020");
90
91     GET_SYM(kVTCompressionPropertyKey_H264EntropyMode, "H264EntropyMode");
92     GET_SYM(kVTH264EntropyMode_CAVLC, "CAVLC");
93     GET_SYM(kVTH264EntropyMode_CABAC, "CABAC");
94
95     GET_SYM(kVTProfileLevel_H264_Baseline_4_0,       "H264_Baseline_4_0");
96     GET_SYM(kVTProfileLevel_H264_Baseline_4_2,       "H264_Baseline_4_2");
97     GET_SYM(kVTProfileLevel_H264_Baseline_5_0,       "H264_Baseline_5_0");
98     GET_SYM(kVTProfileLevel_H264_Baseline_5_1,       "H264_Baseline_5_1");
99     GET_SYM(kVTProfileLevel_H264_Baseline_5_2,       "H264_Baseline_5_2");
100     GET_SYM(kVTProfileLevel_H264_Baseline_AutoLevel, "H264_Baseline_AutoLevel");
101     GET_SYM(kVTProfileLevel_H264_Main_4_2,           "H264_Main_4_2");
102     GET_SYM(kVTProfileLevel_H264_Main_5_1,           "H264_Main_5_1");
103     GET_SYM(kVTProfileLevel_H264_Main_5_2,           "H264_Main_5_2");
104     GET_SYM(kVTProfileLevel_H264_Main_AutoLevel,     "H264_Main_AutoLevel");
105     GET_SYM(kVTProfileLevel_H264_High_3_0,           "H264_High_3_0");
106     GET_SYM(kVTProfileLevel_H264_High_3_1,           "H264_High_3_1");
107     GET_SYM(kVTProfileLevel_H264_High_3_2,           "H264_High_3_2");
108     GET_SYM(kVTProfileLevel_H264_High_4_0,           "H264_High_4_0");
109     GET_SYM(kVTProfileLevel_H264_High_4_1,           "H264_High_4_1");
110     GET_SYM(kVTProfileLevel_H264_High_4_2,           "H264_High_4_2");
111     GET_SYM(kVTProfileLevel_H264_High_5_1,           "H264_High_5_1");
112     GET_SYM(kVTProfileLevel_H264_High_5_2,           "H264_High_5_2");
113     GET_SYM(kVTProfileLevel_H264_High_AutoLevel,     "H264_High_AutoLevel");
114
115     GET_SYM(kVTCompressionPropertyKey_RealTime, "RealTime");
116
117     GET_SYM(kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
118             "EnableHardwareAcceleratedVideoEncoder");
119     GET_SYM(kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
120             "RequireHardwareAcceleratedVideoEncoder");
121 }
122
123 typedef enum VT_H264Profile {
124     H264_PROF_AUTO,
125     H264_PROF_BASELINE,
126     H264_PROF_MAIN,
127     H264_PROF_HIGH,
128     H264_PROF_COUNT
129 } VT_H264Profile;
130
131 typedef enum VTH264Entropy{
132     VT_ENTROPY_NOT_SET,
133     VT_CAVLC,
134     VT_CABAC
135 } VTH264Entropy;
136
137 static const uint8_t start_code[] = { 0, 0, 0, 1 };
138
139 typedef struct ExtraSEI {
140   void *data;
141   size_t size;
142 } ExtraSEI;
143
144 typedef struct BufNode {
145     CMSampleBufferRef cm_buffer;
146     ExtraSEI *sei;
147     struct BufNode* next;
148     int error;
149 } BufNode;
150
151 typedef struct VTEncContext {
152     AVClass *class;
153     VTCompressionSessionRef session;
154     CFStringRef ycbcr_matrix;
155     CFStringRef color_primaries;
156     CFStringRef transfer_function;
157
158     pthread_mutex_t lock;
159     pthread_cond_t  cv_sample_sent;
160
161     int async_error;
162
163     BufNode *q_head;
164     BufNode *q_tail;
165
166     int64_t frame_ct_out;
167     int64_t frame_ct_in;
168
169     int64_t first_pts;
170     int64_t dts_delta;
171
172     int64_t profile;
173     int64_t level;
174     int64_t entropy;
175     int64_t realtime;
176     int64_t frames_before;
177     int64_t frames_after;
178
179     int64_t allow_sw;
180
181     bool flushing;
182     bool has_b_frames;
183     bool warned_color_range;
184     bool a53_cc;
185 } VTEncContext;
186
187 static int vtenc_populate_extradata(AVCodecContext   *avctx,
188                                     CMVideoCodecType codec_type,
189                                     CFStringRef      profile_level,
190                                     CFNumberRef      gamma_level,
191                                     CFDictionaryRef  enc_info,
192                                     CFDictionaryRef  pixel_buffer_info);
193
194 /**
195  * NULL-safe release of *refPtr, and sets value to NULL.
196  */
197 static void vt_release_num(CFNumberRef* refPtr){
198     if (!*refPtr) {
199         return;
200     }
201
202     CFRelease(*refPtr);
203     *refPtr = NULL;
204 }
205
206 static void set_async_error(VTEncContext *vtctx, int err)
207 {
208     BufNode *info;
209
210     pthread_mutex_lock(&vtctx->lock);
211
212     vtctx->async_error = err;
213
214     info = vtctx->q_head;
215     vtctx->q_head = vtctx->q_tail = NULL;
216
217     while (info) {
218         BufNode *next = info->next;
219         CFRelease(info->cm_buffer);
220         av_free(info);
221         info = next;
222     }
223
224     pthread_mutex_unlock(&vtctx->lock);
225 }
226
227 static void clear_frame_queue(VTEncContext *vtctx)
228 {
229     set_async_error(vtctx, 0);
230 }
231
232 static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf, ExtraSEI **sei)
233 {
234     BufNode *info;
235
236     pthread_mutex_lock(&vtctx->lock);
237
238     if (vtctx->async_error) {
239         pthread_mutex_unlock(&vtctx->lock);
240         return vtctx->async_error;
241     }
242
243     if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
244         *buf = NULL;
245
246         pthread_mutex_unlock(&vtctx->lock);
247         return 0;
248     }
249
250     while (!vtctx->q_head && !vtctx->async_error && wait) {
251         pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
252     }
253
254     if (!vtctx->q_head) {
255         pthread_mutex_unlock(&vtctx->lock);
256         *buf = NULL;
257         return 0;
258     }
259
260     info = vtctx->q_head;
261     vtctx->q_head = vtctx->q_head->next;
262     if (!vtctx->q_head) {
263         vtctx->q_tail = NULL;
264     }
265
266     pthread_mutex_unlock(&vtctx->lock);
267
268     *buf = info->cm_buffer;
269     if (sei && *buf) {
270         *sei = info->sei;
271     } else if (info->sei) {
272         if (info->sei->data) av_free(info->sei->data);
273         av_free(info->sei);
274     }
275     av_free(info);
276
277     vtctx->frame_ct_out++;
278
279     return 0;
280 }
281
282 static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer, ExtraSEI *sei)
283 {
284     BufNode *info = av_malloc(sizeof(BufNode));
285     if (!info) {
286         set_async_error(vtctx, AVERROR(ENOMEM));
287         return;
288     }
289
290     CFRetain(buffer);
291     info->cm_buffer = buffer;
292     info->sei = sei;
293     info->next = NULL;
294
295     pthread_mutex_lock(&vtctx->lock);
296     pthread_cond_signal(&vtctx->cv_sample_sent);
297
298     if (!vtctx->q_head) {
299         vtctx->q_head = info;
300     } else {
301         vtctx->q_tail->next = info;
302     }
303
304     vtctx->q_tail = info;
305
306     pthread_mutex_unlock(&vtctx->lock);
307 }
308
309 static int count_nalus(size_t length_code_size,
310                        CMSampleBufferRef sample_buffer,
311                        int *count)
312 {
313     size_t offset = 0;
314     int status;
315     int nalu_ct = 0;
316     uint8_t size_buf[4];
317     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
318     CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
319
320     if (length_code_size > 4)
321         return AVERROR_INVALIDDATA;
322
323     while (offset < src_size) {
324         size_t curr_src_len;
325         size_t box_len = 0;
326         size_t i;
327
328         status = CMBlockBufferCopyDataBytes(block,
329                                             offset,
330                                             length_code_size,
331                                             size_buf);
332
333         for (i = 0; i < length_code_size; i++) {
334             box_len <<= 8;
335             box_len |= size_buf[i];
336         }
337
338         curr_src_len = box_len + length_code_size;
339         offset += curr_src_len;
340
341         nalu_ct++;
342     }
343
344     *count = nalu_ct;
345     return 0;
346 }
347
348 static CMVideoCodecType get_cm_codec_type(enum AVCodecID id)
349 {
350     switch (id) {
351     case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
352     default:               return 0;
353     }
354 }
355
356 /**
357  * Get the parameter sets from a CMSampleBufferRef.
358  * @param dst If *dst isn't NULL, the parameters are copied into existing
359  *            memory. *dst_size must be set accordingly when *dst != NULL.
360  *            If *dst is NULL, it will be allocated.
361  *            In all cases, *dst_size is set to the number of bytes used starting
362  *            at *dst.
363  */
364 static int get_params_size(
365     AVCodecContext              *avctx,
366     CMVideoFormatDescriptionRef vid_fmt,
367     size_t                      *size)
368 {
369     size_t total_size = 0;
370     size_t ps_count;
371     int is_count_bad = 0;
372     size_t i;
373     int status;
374     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
375                                                                 0,
376                                                                 NULL,
377                                                                 NULL,
378                                                                 &ps_count,
379                                                                 NULL);
380     if (status) {
381         is_count_bad = 1;
382         ps_count     = 0;
383         status       = 0;
384     }
385
386     for (i = 0; i < ps_count || is_count_bad; i++) {
387         const uint8_t *ps;
388         size_t ps_size;
389         status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
390                                                                     i,
391                                                                     &ps,
392                                                                     &ps_size,
393                                                                     NULL,
394                                                                     NULL);
395         if (status) {
396             /*
397              * When ps_count is invalid, status != 0 ends the loop normally
398              * unless we didn't get any parameter sets.
399              */
400             if (i > 0 && is_count_bad) status = 0;
401
402             break;
403         }
404
405         total_size += ps_size + sizeof(start_code);
406     }
407
408     if (status) {
409         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
410         return AVERROR_EXTERNAL;
411     }
412
413     *size = total_size;
414     return 0;
415 }
416
417 static int copy_param_sets(
418     AVCodecContext              *avctx,
419     CMVideoFormatDescriptionRef vid_fmt,
420     uint8_t                     *dst,
421     size_t                      dst_size)
422 {
423     size_t ps_count;
424     int is_count_bad = 0;
425     int status;
426     size_t offset = 0;
427     size_t i;
428
429     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
430                                                                 0,
431                                                                 NULL,
432                                                                 NULL,
433                                                                 &ps_count,
434                                                                 NULL);
435     if (status) {
436         is_count_bad = 1;
437         ps_count     = 0;
438         status       = 0;
439     }
440
441
442     for (i = 0; i < ps_count || is_count_bad; i++) {
443         const uint8_t *ps;
444         size_t ps_size;
445         size_t next_offset;
446
447         status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
448                                                                     i,
449                                                                     &ps,
450                                                                     &ps_size,
451                                                                     NULL,
452                                                                     NULL);
453         if (status) {
454             if (i > 0 && is_count_bad) status = 0;
455
456             break;
457         }
458
459         next_offset = offset + sizeof(start_code) + ps_size;
460         if (dst_size < next_offset) {
461             av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
462             return AVERROR_BUFFER_TOO_SMALL;
463         }
464
465         memcpy(dst + offset, start_code, sizeof(start_code));
466         offset += sizeof(start_code);
467
468         memcpy(dst + offset, ps, ps_size);
469         offset = next_offset;
470     }
471
472     if (status) {
473         av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
474         return AVERROR_EXTERNAL;
475     }
476
477     return 0;
478 }
479
480 static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
481 {
482     CMVideoFormatDescriptionRef vid_fmt;
483     size_t total_size;
484     int status;
485
486     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
487     if (!vid_fmt) {
488         av_log(avctx, AV_LOG_ERROR, "No video format.\n");
489         return AVERROR_EXTERNAL;
490     }
491
492     status = get_params_size(avctx, vid_fmt, &total_size);
493     if (status) {
494         av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
495         return status;
496     }
497
498     avctx->extradata = av_mallocz(total_size + AV_INPUT_BUFFER_PADDING_SIZE);
499     if (!avctx->extradata) {
500         return AVERROR(ENOMEM);
501     }
502     avctx->extradata_size = total_size;
503
504     status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
505
506     if (status) {
507         av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
508         return status;
509     }
510
511     return 0;
512 }
513
514 static void vtenc_output_callback(
515     void *ctx,
516     void *sourceFrameCtx,
517     OSStatus status,
518     VTEncodeInfoFlags flags,
519     CMSampleBufferRef sample_buffer)
520 {
521     AVCodecContext *avctx = ctx;
522     VTEncContext   *vtctx = avctx->priv_data;
523     ExtraSEI *sei = sourceFrameCtx;
524
525     if (vtctx->async_error) {
526         if(sample_buffer) CFRelease(sample_buffer);
527         return;
528     }
529
530     if (status || !sample_buffer) {
531         av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
532         set_async_error(vtctx, AVERROR_EXTERNAL);
533         return;
534     }
535
536     if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
537         int set_status = set_extradata(avctx, sample_buffer);
538         if (set_status) {
539             set_async_error(vtctx, set_status);
540             return;
541         }
542     }
543
544     vtenc_q_push(vtctx, sample_buffer, sei);
545 }
546
547 static int get_length_code_size(
548     AVCodecContext    *avctx,
549     CMSampleBufferRef sample_buffer,
550     size_t            *size)
551 {
552     CMVideoFormatDescriptionRef vid_fmt;
553     int isize;
554     int status;
555
556     vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
557     if (!vid_fmt) {
558         av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
559         return AVERROR_EXTERNAL;
560     }
561
562     status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
563                                                                 0,
564                                                                 NULL,
565                                                                 NULL,
566                                                                 NULL,
567                                                                 &isize);
568     if (status) {
569         av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
570         return AVERROR_EXTERNAL;
571     }
572
573     *size = isize;
574     return 0;
575 }
576
577 /*
578  * Returns true on success.
579  *
580  * If profile_level_val is NULL and this method returns true, don't specify the
581  * profile/level to the encoder.
582  */
583 static bool get_vt_profile_level(AVCodecContext *avctx,
584                                  CFStringRef    *profile_level_val)
585 {
586     VTEncContext *vtctx = avctx->priv_data;
587     int64_t profile = vtctx->profile;
588
589     if (profile == H264_PROF_AUTO && vtctx->level) {
590         //Need to pick a profile if level is not auto-selected.
591         profile = vtctx->has_b_frames ? H264_PROF_MAIN : H264_PROF_BASELINE;
592     }
593
594     *profile_level_val = NULL;
595
596     switch (profile) {
597         case H264_PROF_AUTO:
598             return true;
599
600         case H264_PROF_BASELINE:
601             switch (vtctx->level) {
602                 case  0: *profile_level_val =
603                                   compat_keys.kVTProfileLevel_H264_Baseline_AutoLevel; break;
604                 case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3;       break;
605                 case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0;       break;
606                 case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1;       break;
607                 case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2;       break;
608                 case 40: *profile_level_val =
609                                   compat_keys.kVTProfileLevel_H264_Baseline_4_0;       break;
610                 case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1;       break;
611                 case 42: *profile_level_val =
612                                   compat_keys.kVTProfileLevel_H264_Baseline_4_2;       break;
613                 case 50: *profile_level_val =
614                                   compat_keys.kVTProfileLevel_H264_Baseline_5_0;       break;
615                 case 51: *profile_level_val =
616                                   compat_keys.kVTProfileLevel_H264_Baseline_5_1;       break;
617                 case 52: *profile_level_val =
618                                   compat_keys.kVTProfileLevel_H264_Baseline_5_2;       break;
619             }
620             break;
621
622         case H264_PROF_MAIN:
623             switch (vtctx->level) {
624                 case  0: *profile_level_val =
625                                   compat_keys.kVTProfileLevel_H264_Main_AutoLevel; break;
626                 case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0;       break;
627                 case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1;       break;
628                 case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2;       break;
629                 case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0;       break;
630                 case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1;       break;
631                 case 42: *profile_level_val =
632                                   compat_keys.kVTProfileLevel_H264_Main_4_2;       break;
633                 case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0;       break;
634                 case 51: *profile_level_val =
635                                   compat_keys.kVTProfileLevel_H264_Main_5_1;       break;
636                 case 52: *profile_level_val =
637                                   compat_keys.kVTProfileLevel_H264_Main_5_2;       break;
638             }
639             break;
640
641         case H264_PROF_HIGH:
642             switch (vtctx->level) {
643                 case  0: *profile_level_val =
644                                   compat_keys.kVTProfileLevel_H264_High_AutoLevel; break;
645                 case 30: *profile_level_val =
646                                   compat_keys.kVTProfileLevel_H264_High_3_0;       break;
647                 case 31: *profile_level_val =
648                                   compat_keys.kVTProfileLevel_H264_High_3_1;       break;
649                 case 32: *profile_level_val =
650                                   compat_keys.kVTProfileLevel_H264_High_3_2;       break;
651                 case 40: *profile_level_val =
652                                   compat_keys.kVTProfileLevel_H264_High_4_0;       break;
653                 case 41: *profile_level_val =
654                                   compat_keys.kVTProfileLevel_H264_High_4_1;       break;
655                 case 42: *profile_level_val =
656                                   compat_keys.kVTProfileLevel_H264_High_4_2;       break;
657                 case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0;       break;
658                 case 51: *profile_level_val =
659                                   compat_keys.kVTProfileLevel_H264_High_5_1;       break;
660                 case 52: *profile_level_val =
661                                   compat_keys.kVTProfileLevel_H264_High_5_2;       break;
662             }
663             break;
664     }
665
666     if (!*profile_level_val) {
667         av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
668         return false;
669     }
670
671     return true;
672 }
673
674 static int get_cv_pixel_format(AVCodecContext* avctx,
675                                enum AVPixelFormat fmt,
676                                enum AVColorRange range,
677                                int* av_pixel_format,
678                                int* range_guessed)
679 {
680     if (range_guessed) *range_guessed = range != AVCOL_RANGE_MPEG &&
681                                         range != AVCOL_RANGE_JPEG;
682
683     //MPEG range is used when no range is set
684     if (fmt == AV_PIX_FMT_NV12) {
685         *av_pixel_format = range == AVCOL_RANGE_JPEG ?
686                                         kCVPixelFormatType_420YpCbCr8BiPlanarFullRange :
687                                         kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
688     } else if (fmt == AV_PIX_FMT_YUV420P) {
689         *av_pixel_format = range == AVCOL_RANGE_JPEG ?
690                                         kCVPixelFormatType_420YpCbCr8PlanarFullRange :
691                                         kCVPixelFormatType_420YpCbCr8Planar;
692     } else {
693         return AVERROR(EINVAL);
694     }
695
696     return 0;
697 }
698
699 static void add_color_attr(AVCodecContext *avctx, CFMutableDictionaryRef dict) {
700     VTEncContext *vtctx = avctx->priv_data;
701
702     if (vtctx->color_primaries) {
703         CFDictionarySetValue(dict,
704                              kCVImageBufferColorPrimariesKey,
705                              vtctx->color_primaries);
706     }
707
708     if (vtctx->transfer_function) {
709         CFDictionarySetValue(dict,
710                              kCVImageBufferTransferFunctionKey,
711                              vtctx->transfer_function);
712     }
713
714     if (vtctx->ycbcr_matrix) {
715         CFDictionarySetValue(dict,
716                              kCVImageBufferYCbCrMatrixKey,
717                              vtctx->ycbcr_matrix);
718     }
719 }
720
721 static int create_cv_pixel_buffer_info(AVCodecContext* avctx,
722                                        CFMutableDictionaryRef* dict)
723 {
724     CFNumberRef cv_color_format_num = NULL;
725     CFNumberRef width_num = NULL;
726     CFNumberRef height_num = NULL;
727     CFMutableDictionaryRef pixel_buffer_info = NULL;
728     int cv_color_format;
729     int status = get_cv_pixel_format(avctx,
730                                      avctx->pix_fmt,
731                                      avctx->color_range,
732                                      &cv_color_format,
733                                      NULL);
734     if (status) return status;
735
736     pixel_buffer_info = CFDictionaryCreateMutable(
737                             kCFAllocatorDefault,
738                             20,
739                             &kCFCopyStringDictionaryKeyCallBacks,
740                             &kCFTypeDictionaryValueCallBacks);
741
742     if (!pixel_buffer_info) goto pbinfo_nomem;
743
744     cv_color_format_num = CFNumberCreate(kCFAllocatorDefault,
745                                          kCFNumberSInt32Type,
746                                          &cv_color_format);
747     if (!cv_color_format_num) goto pbinfo_nomem;
748
749     CFDictionarySetValue(pixel_buffer_info,
750                          kCVPixelBufferPixelFormatTypeKey,
751                          cv_color_format_num);
752     vt_release_num(&cv_color_format_num);
753
754     width_num = CFNumberCreate(kCFAllocatorDefault,
755                                kCFNumberSInt32Type,
756                                &avctx->width);
757     if (!width_num) return AVERROR(ENOMEM);
758
759     CFDictionarySetValue(pixel_buffer_info,
760                          kCVPixelBufferWidthKey,
761                          width_num);
762     vt_release_num(&width_num);
763
764     height_num = CFNumberCreate(kCFAllocatorDefault,
765                                 kCFNumberSInt32Type,
766                                 &avctx->height);
767     if (!height_num) goto pbinfo_nomem;
768
769     CFDictionarySetValue(pixel_buffer_info,
770                          kCVPixelBufferHeightKey,
771                          height_num);
772     vt_release_num(&height_num);
773
774     add_color_attr(avctx, pixel_buffer_info);
775
776     *dict = pixel_buffer_info;
777     return 0;
778
779 pbinfo_nomem:
780     vt_release_num(&cv_color_format_num);
781     vt_release_num(&width_num);
782     vt_release_num(&height_num);
783     if (pixel_buffer_info) CFRelease(pixel_buffer_info);
784
785     return AVERROR(ENOMEM);
786 }
787
788 static int get_cv_color_primaries(AVCodecContext *avctx,
789                                   CFStringRef *primaries)
790 {
791     enum AVColorPrimaries pri = avctx->color_primaries;
792     switch (pri) {
793         case AVCOL_PRI_UNSPECIFIED:
794             *primaries = NULL;
795             break;
796
797         case AVCOL_PRI_BT709:
798             *primaries = kCVImageBufferColorPrimaries_ITU_R_709_2;
799             break;
800
801         case AVCOL_PRI_BT2020:
802             *primaries = compat_keys.kCVImageBufferColorPrimaries_ITU_R_2020;
803             break;
804
805         default:
806             av_log(avctx, AV_LOG_ERROR, "Color primaries %s is not supported.\n", av_color_primaries_name(pri));
807             *primaries = NULL;
808             return -1;
809     }
810
811     return 0;
812 }
813
814 static int get_cv_transfer_function(AVCodecContext *avctx,
815                                     CFStringRef *transfer_fnc,
816                                     CFNumberRef *gamma_level)
817 {
818     enum AVColorTransferCharacteristic trc = avctx->color_trc;
819     Float32 gamma;
820     *gamma_level = NULL;
821
822     switch (trc) {
823         case AVCOL_TRC_UNSPECIFIED:
824             *transfer_fnc = NULL;
825             break;
826
827         case AVCOL_TRC_BT709:
828             *transfer_fnc = kCVImageBufferTransferFunction_ITU_R_709_2;
829             break;
830
831         case AVCOL_TRC_SMPTE240M:
832             *transfer_fnc = kCVImageBufferTransferFunction_SMPTE_240M_1995;
833             break;
834
835         case AVCOL_TRC_GAMMA22:
836             gamma = 2.2;
837             *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
838             *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
839             break;
840
841         case AVCOL_TRC_GAMMA28:
842             gamma = 2.8;
843             *transfer_fnc = kCVImageBufferTransferFunction_UseGamma;
844             *gamma_level = CFNumberCreate(NULL, kCFNumberFloat32Type, &gamma);
845             break;
846
847         case AVCOL_TRC_BT2020_10:
848         case AVCOL_TRC_BT2020_12:
849             *transfer_fnc = compat_keys.kCVImageBufferTransferFunction_ITU_R_2020;
850             break;
851
852         default:
853             av_log(avctx, AV_LOG_ERROR, "Transfer function %s is not supported.\n", av_color_transfer_name(trc));
854             return -1;
855     }
856
857     return 0;
858 }
859
860 static int get_cv_ycbcr_matrix(AVCodecContext *avctx, CFStringRef *matrix) {
861     switch(avctx->colorspace) {
862         case AVCOL_SPC_BT709:
863             *matrix = kCVImageBufferYCbCrMatrix_ITU_R_709_2;
864             break;
865
866         case AVCOL_SPC_UNSPECIFIED:
867             *matrix = NULL;
868             break;
869
870         case AVCOL_SPC_BT470BG:
871         case AVCOL_SPC_SMPTE170M:
872             *matrix = kCVImageBufferYCbCrMatrix_ITU_R_601_4;
873             break;
874
875         case AVCOL_SPC_SMPTE240M:
876             *matrix = kCVImageBufferYCbCrMatrix_SMPTE_240M_1995;
877             break;
878
879         case AVCOL_SPC_BT2020_NCL:
880             *matrix = compat_keys.kCVImageBufferYCbCrMatrix_ITU_R_2020;
881             break;
882
883         default:
884             av_log(avctx, AV_LOG_ERROR, "Color space %s is not supported.\n", av_color_space_name(avctx->colorspace));
885             return -1;
886     }
887
888     return 0;
889 }
890
891 static int vtenc_create_encoder(AVCodecContext   *avctx,
892                                 CMVideoCodecType codec_type,
893                                 CFStringRef      profile_level,
894                                 CFNumberRef      gamma_level,
895                                 CFDictionaryRef  enc_info,
896                                 CFDictionaryRef  pixel_buffer_info,
897                                 VTCompressionSessionRef *session)
898 {
899     VTEncContext *vtctx = avctx->priv_data;
900     SInt32       bit_rate = avctx->bit_rate;
901     CFNumberRef  bit_rate_num;
902
903     int status = VTCompressionSessionCreate(kCFAllocatorDefault,
904                                             avctx->width,
905                                             avctx->height,
906                                             codec_type,
907                                             enc_info,
908                                             pixel_buffer_info,
909                                             kCFAllocatorDefault,
910                                             vtenc_output_callback,
911                                             avctx,
912                                             session);
913
914     if (status || !vtctx->session) {
915         av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
916
917 #if !TARGET_OS_IPHONE
918         if (!vtctx->allow_sw) {
919             av_log(avctx, AV_LOG_ERROR, "Try -allow_sw 1. The hardware encoder may be busy, or not supported.\n");
920         }
921 #endif
922
923         return AVERROR_EXTERNAL;
924     }
925
926     bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
927                                   kCFNumberSInt32Type,
928                                   &bit_rate);
929     if (!bit_rate_num) return AVERROR(ENOMEM);
930
931     status = VTSessionSetProperty(vtctx->session,
932                                   kVTCompressionPropertyKey_AverageBitRate,
933                                   bit_rate_num);
934     CFRelease(bit_rate_num);
935
936     if (status) {
937         av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
938         return AVERROR_EXTERNAL;
939     }
940
941     if (profile_level) {
942         status = VTSessionSetProperty(vtctx->session,
943                                       kVTCompressionPropertyKey_ProfileLevel,
944                                       profile_level);
945         if (status) {
946             av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
947         }
948     }
949
950     if (avctx->gop_size > 0) {
951         CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
952                                               kCFNumberIntType,
953                                               &avctx->gop_size);
954         if (!interval) {
955             return AVERROR(ENOMEM);
956         }
957
958         status = VTSessionSetProperty(vtctx->session,
959                                       kVTCompressionPropertyKey_MaxKeyFrameInterval,
960                                       interval);
961         CFRelease(interval);
962
963         if (status) {
964             av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
965             return AVERROR_EXTERNAL;
966         }
967     }
968
969     if (vtctx->frames_before) {
970         status = VTSessionSetProperty(vtctx->session,
971                                       kVTCompressionPropertyKey_MoreFramesBeforeStart,
972                                       kCFBooleanTrue);
973
974         if (status == kVTPropertyNotSupportedErr) {
975             av_log(avctx, AV_LOG_WARNING, "frames_before property is not supported on this device. Ignoring.\n");
976         } else if (status) {
977             av_log(avctx, AV_LOG_ERROR, "Error setting frames_before property: %d\n", status);
978         }
979     }
980
981     if (vtctx->frames_after) {
982         status = VTSessionSetProperty(vtctx->session,
983                                       kVTCompressionPropertyKey_MoreFramesAfterEnd,
984                                       kCFBooleanTrue);
985
986         if (status == kVTPropertyNotSupportedErr) {
987             av_log(avctx, AV_LOG_WARNING, "frames_after property is not supported on this device. Ignoring.\n");
988         } else if (status) {
989             av_log(avctx, AV_LOG_ERROR, "Error setting frames_after property: %d\n", status);
990         }
991     }
992
993     if (avctx->sample_aspect_ratio.num != 0) {
994         CFNumberRef num;
995         CFNumberRef den;
996         CFMutableDictionaryRef par;
997         AVRational *avpar = &avctx->sample_aspect_ratio;
998
999         av_reduce(&avpar->num, &avpar->den,
1000                    avpar->num,  avpar->den,
1001                   0xFFFFFFFF);
1002
1003         num = CFNumberCreate(kCFAllocatorDefault,
1004                              kCFNumberIntType,
1005                              &avpar->num);
1006
1007         den = CFNumberCreate(kCFAllocatorDefault,
1008                              kCFNumberIntType,
1009                              &avpar->den);
1010
1011
1012
1013         par = CFDictionaryCreateMutable(kCFAllocatorDefault,
1014                                         2,
1015                                         &kCFCopyStringDictionaryKeyCallBacks,
1016                                         &kCFTypeDictionaryValueCallBacks);
1017
1018         if (!par || !num || !den) {
1019             if (par) CFRelease(par);
1020             if (num) CFRelease(num);
1021             if (den) CFRelease(den);
1022
1023             return AVERROR(ENOMEM);
1024         }
1025
1026         CFDictionarySetValue(
1027             par,
1028             kCMFormatDescriptionKey_PixelAspectRatioHorizontalSpacing,
1029             num);
1030
1031         CFDictionarySetValue(
1032             par,
1033             kCMFormatDescriptionKey_PixelAspectRatioVerticalSpacing,
1034             den);
1035
1036         status = VTSessionSetProperty(vtctx->session,
1037                                       kVTCompressionPropertyKey_PixelAspectRatio,
1038                                       par);
1039
1040         CFRelease(par);
1041         CFRelease(num);
1042         CFRelease(den);
1043
1044         if (status) {
1045             av_log(avctx,
1046                    AV_LOG_ERROR,
1047                    "Error setting pixel aspect ratio to %d:%d: %d.\n",
1048                    avctx->sample_aspect_ratio.num,
1049                    avctx->sample_aspect_ratio.den,
1050                    status);
1051
1052             return AVERROR_EXTERNAL;
1053         }
1054     }
1055
1056
1057     if (vtctx->transfer_function) {
1058         status = VTSessionSetProperty(vtctx->session,
1059                                       kVTCompressionPropertyKey_TransferFunction,
1060                                       vtctx->transfer_function);
1061
1062         if (status) {
1063             av_log(avctx, AV_LOG_WARNING, "Could not set transfer function: %d\n", status);
1064         }
1065     }
1066
1067
1068     if (vtctx->ycbcr_matrix) {
1069         status = VTSessionSetProperty(vtctx->session,
1070                                       kVTCompressionPropertyKey_YCbCrMatrix,
1071                                       vtctx->ycbcr_matrix);
1072
1073         if (status) {
1074             av_log(avctx, AV_LOG_WARNING, "Could not set ycbcr matrix: %d\n", status);
1075         }
1076     }
1077
1078
1079     if (vtctx->color_primaries) {
1080         status = VTSessionSetProperty(vtctx->session,
1081                                       kVTCompressionPropertyKey_ColorPrimaries,
1082                                       vtctx->color_primaries);
1083
1084         if (status) {
1085             av_log(avctx, AV_LOG_WARNING, "Could not set color primaries: %d\n", status);
1086         }
1087     }
1088
1089     if (gamma_level) {
1090         status = VTSessionSetProperty(vtctx->session,
1091                                       kCVImageBufferGammaLevelKey,
1092                                       gamma_level);
1093
1094         if (status) {
1095             av_log(avctx, AV_LOG_WARNING, "Could not set gamma level: %d\n", status);
1096         }
1097     }
1098
1099     if (!vtctx->has_b_frames) {
1100         status = VTSessionSetProperty(vtctx->session,
1101                                       kVTCompressionPropertyKey_AllowFrameReordering,
1102                                       kCFBooleanFalse);
1103
1104         if (status) {
1105             av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
1106             return AVERROR_EXTERNAL;
1107         }
1108     }
1109
1110     if (vtctx->entropy != VT_ENTROPY_NOT_SET) {
1111         CFStringRef entropy = vtctx->entropy == VT_CABAC ?
1112                                 compat_keys.kVTH264EntropyMode_CABAC:
1113                                 compat_keys.kVTH264EntropyMode_CAVLC;
1114
1115         status = VTSessionSetProperty(vtctx->session,
1116                                       compat_keys.kVTCompressionPropertyKey_H264EntropyMode,
1117                                       entropy);
1118
1119         if (status) {
1120             av_log(avctx, AV_LOG_ERROR, "Error setting entropy property: %d\n", status);
1121         }
1122     }
1123
1124     if (vtctx->realtime) {
1125         status = VTSessionSetProperty(vtctx->session,
1126                                       compat_keys.kVTCompressionPropertyKey_RealTime,
1127                                       kCFBooleanTrue);
1128
1129         if (status) {
1130             av_log(avctx, AV_LOG_ERROR, "Error setting realtime property: %d\n", status);
1131         }
1132     }
1133
1134     status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
1135     if (status) {
1136         av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
1137         return AVERROR_EXTERNAL;
1138     }
1139
1140     return 0;
1141 }
1142
1143 static av_cold int vtenc_init(AVCodecContext *avctx)
1144 {
1145     CFMutableDictionaryRef enc_info;
1146     CFMutableDictionaryRef pixel_buffer_info;
1147     CMVideoCodecType       codec_type;
1148     VTEncContext           *vtctx = avctx->priv_data;
1149     CFStringRef            profile_level;
1150     CFBooleanRef           has_b_frames_cfbool;
1151     CFNumberRef            gamma_level = NULL;
1152     int                    status;
1153
1154     pthread_once(&once_ctrl, loadVTEncSymbols);
1155
1156     codec_type = get_cm_codec_type(avctx->codec_id);
1157     if (!codec_type) {
1158         av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
1159         return AVERROR(EINVAL);
1160     }
1161
1162     vtctx->has_b_frames = avctx->max_b_frames > 0;
1163     if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
1164         av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
1165         vtctx->has_b_frames = false;
1166     }
1167
1168     if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
1169         av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
1170         vtctx->entropy = VT_ENTROPY_NOT_SET;
1171     }
1172
1173     if (!get_vt_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
1174
1175     vtctx->session = NULL;
1176
1177     enc_info = CFDictionaryCreateMutable(
1178         kCFAllocatorDefault,
1179         20,
1180         &kCFCopyStringDictionaryKeyCallBacks,
1181         &kCFTypeDictionaryValueCallBacks
1182     );
1183
1184     if (!enc_info) return AVERROR(ENOMEM);
1185
1186 #if !TARGET_OS_IPHONE
1187     if (!vtctx->allow_sw) {
1188         CFDictionarySetValue(enc_info,
1189                              compat_keys.kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
1190                              kCFBooleanTrue);
1191     } else {
1192         CFDictionarySetValue(enc_info,
1193                              compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,
1194                              kCFBooleanTrue);
1195     }
1196 #endif
1197
1198     if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {
1199         status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);
1200         if (status)
1201             goto init_cleanup;
1202     } else {
1203         pixel_buffer_info = NULL;
1204     }
1205
1206     pthread_mutex_init(&vtctx->lock, NULL);
1207     pthread_cond_init(&vtctx->cv_sample_sent, NULL);
1208     vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
1209
1210     get_cv_transfer_function(avctx, &vtctx->transfer_function, &gamma_level);
1211     get_cv_ycbcr_matrix(avctx, &vtctx->ycbcr_matrix);
1212     get_cv_color_primaries(avctx, &vtctx->color_primaries);
1213
1214
1215     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
1216         status = vtenc_populate_extradata(avctx,
1217                                           codec_type,
1218                                           profile_level,
1219                                           gamma_level,
1220                                           enc_info,
1221                                           pixel_buffer_info);
1222         if (status)
1223             goto init_cleanup;
1224     }
1225
1226     status = vtenc_create_encoder(avctx,
1227                                   codec_type,
1228                                   profile_level,
1229                                   gamma_level,
1230                                   enc_info,
1231                                   pixel_buffer_info,
1232                                   &vtctx->session);
1233
1234     if (status < 0)
1235         goto init_cleanup;
1236
1237     status = VTSessionCopyProperty(vtctx->session,
1238                                    kVTCompressionPropertyKey_AllowFrameReordering,
1239                                    kCFAllocatorDefault,
1240                                    &has_b_frames_cfbool);
1241
1242     if (!status) {
1243         //Some devices don't output B-frames for main profile, even if requested.
1244         vtctx->has_b_frames = CFBooleanGetValue(has_b_frames_cfbool);
1245         CFRelease(has_b_frames_cfbool);
1246     }
1247     avctx->has_b_frames = vtctx->has_b_frames;
1248
1249 init_cleanup:
1250     if (gamma_level)
1251         CFRelease(gamma_level);
1252
1253     if (pixel_buffer_info)
1254         CFRelease(pixel_buffer_info);
1255
1256     CFRelease(enc_info);
1257
1258     return status;
1259 }
1260
1261 static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
1262 {
1263     CFArrayRef      attachments;
1264     CFDictionaryRef attachment;
1265     CFBooleanRef    not_sync;
1266     CFIndex         len;
1267
1268     attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
1269     len = !attachments ? 0 : CFArrayGetCount(attachments);
1270
1271     if (!len) {
1272         *is_key_frame = true;
1273         return;
1274     }
1275
1276     attachment = CFArrayGetValueAtIndex(attachments, 0);
1277
1278     if (CFDictionaryGetValueIfPresent(attachment,
1279                                       kCMSampleAttachmentKey_NotSync,
1280                                       (const void **)&not_sync))
1281     {
1282         *is_key_frame = !CFBooleanGetValue(not_sync);
1283     } else {
1284         *is_key_frame = true;
1285     }
1286 }
1287
1288 static int is_post_sei_nal_type(int nal_type){
1289     return nal_type != H264_NAL_SEI &&
1290            nal_type != H264_NAL_SPS &&
1291            nal_type != H264_NAL_PPS &&
1292            nal_type != H264_NAL_AUD;
1293 }
1294
1295 /*
1296  * Finds the sei message start/size of type find_sei_type.
1297  * If more than one of that type exists, the last one is returned.
1298  */
1299 static int find_sei_end(AVCodecContext *avctx,
1300                         uint8_t        *nal_data,
1301                         size_t          nal_size,
1302                         uint8_t       **sei_end)
1303 {
1304     int nal_type;
1305     size_t sei_payload_size = 0;
1306     int sei_payload_type = 0;
1307     *sei_end = NULL;
1308     uint8_t *nal_start = nal_data;
1309
1310     if (!nal_size)
1311         return 0;
1312
1313     nal_type = *nal_data & 0x1F;
1314     if (nal_type != H264_NAL_SEI)
1315         return 0;
1316
1317     nal_data++;
1318     nal_size--;
1319
1320     if (nal_data[nal_size - 1] == 0x80)
1321         nal_size--;
1322
1323     while (nal_size > 0 && *nal_data > 0) {
1324         do{
1325             sei_payload_type += *nal_data;
1326             nal_data++;
1327             nal_size--;
1328         } while (nal_size > 0 && *nal_data == 0xFF);
1329
1330         if (!nal_size) {
1331             av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing type.\n");
1332             return AVERROR_INVALIDDATA;
1333         }
1334
1335         do{
1336             sei_payload_size += *nal_data;
1337             nal_data++;
1338             nal_size--;
1339         } while (nal_size > 0 && *nal_data == 0xFF);
1340
1341         if (nal_size < sei_payload_size) {
1342             av_log(avctx, AV_LOG_ERROR, "Unexpected end of SEI NAL Unit parsing size.\n");
1343             return AVERROR_INVALIDDATA;
1344         }
1345
1346         nal_data += sei_payload_size;
1347         nal_size -= sei_payload_size;
1348     }
1349
1350     *sei_end = nal_data;
1351
1352     return nal_data - nal_start + 1;
1353 }
1354
1355 /**
1356  * Copies the data inserting emulation prevention bytes as needed.
1357  * Existing data in the destination can be taken into account by providing
1358  * dst with a dst_offset > 0.
1359  *
1360  * @return The number of bytes copied on success. On failure, the negative of
1361  *         the number of bytes needed to copy src is returned.
1362  */
1363 static int copy_emulation_prev(const uint8_t *src,
1364                                size_t         src_size,
1365                                uint8_t       *dst,
1366                                ssize_t        dst_offset,
1367                                size_t         dst_size)
1368 {
1369     int zeros = 0;
1370     int wrote_bytes;
1371     uint8_t* dst_start;
1372     uint8_t* dst_end = dst + dst_size;
1373     const uint8_t* src_end = src + src_size;
1374     int start_at = dst_offset > 2 ? dst_offset - 2 : 0;
1375     int i;
1376     for (i = start_at; i < dst_offset && i < dst_size; i++) {
1377         if (!dst[i])
1378             zeros++;
1379         else
1380             zeros = 0;
1381     }
1382
1383     dst += dst_offset;
1384     dst_start = dst;
1385     for (; src < src_end; src++, dst++) {
1386         if (zeros == 2) {
1387             int insert_ep3_byte = *src <= 3;
1388             if (insert_ep3_byte) {
1389                 if (dst < dst_end)
1390                     *dst = 3;
1391                 dst++;
1392             }
1393
1394             zeros = 0;
1395         }
1396
1397         if (dst < dst_end)
1398             *dst = *src;
1399
1400         if (!*src)
1401             zeros++;
1402         else
1403             zeros = 0;
1404     }
1405
1406     wrote_bytes = dst - dst_start;
1407
1408     if (dst > dst_end)
1409         return -wrote_bytes;
1410
1411     return wrote_bytes;
1412 }
1413
1414 static int write_sei(const ExtraSEI *sei,
1415                      int             sei_type,
1416                      uint8_t        *dst,
1417                      size_t          dst_size)
1418 {
1419     uint8_t *sei_start = dst;
1420     size_t remaining_sei_size = sei->size;
1421     size_t remaining_dst_size = dst_size;
1422     int header_bytes;
1423     int bytes_written;
1424     ssize_t offset;
1425
1426     if (!remaining_dst_size)
1427         return AVERROR_BUFFER_TOO_SMALL;
1428
1429     while (sei_type && remaining_dst_size != 0) {
1430         int sei_byte = sei_type > 255 ? 255 : sei_type;
1431         *dst = sei_byte;
1432
1433         sei_type -= sei_byte;
1434         dst++;
1435         remaining_dst_size--;
1436     }
1437
1438     if (!dst_size)
1439         return AVERROR_BUFFER_TOO_SMALL;
1440
1441     while (remaining_sei_size && remaining_dst_size != 0) {
1442         int size_byte = remaining_sei_size > 255 ? 255 : remaining_sei_size;
1443         *dst = size_byte;
1444
1445         remaining_sei_size -= size_byte;
1446         dst++;
1447         remaining_dst_size--;
1448     }
1449
1450     if (remaining_dst_size < sei->size)
1451         return AVERROR_BUFFER_TOO_SMALL;
1452
1453     header_bytes = dst - sei_start;
1454
1455     offset = header_bytes;
1456     bytes_written = copy_emulation_prev(sei->data,
1457                                         sei->size,
1458                                         sei_start,
1459                                         offset,
1460                                         dst_size);
1461     if (bytes_written < 0)
1462         return AVERROR_BUFFER_TOO_SMALL;
1463
1464     bytes_written += header_bytes;
1465     return bytes_written;
1466 }
1467
1468 /**
1469  * Copies NAL units and replaces length codes with
1470  * H.264 Annex B start codes. On failure, the contents of
1471  * dst_data may have been modified.
1472  *
1473  * @param length_code_size Byte length of each length code
1474  * @param sample_buffer NAL units prefixed with length codes.
1475  * @param sei Optional A53 closed captions SEI data.
1476  * @param dst_data Must be zeroed before calling this function.
1477  *                 Contains the copied NAL units prefixed with
1478  *                 start codes when the function returns
1479  *                 successfully.
1480  * @param dst_size Length of dst_data
1481  * @return 0 on success
1482  *         AVERROR_INVALIDDATA if length_code_size is invalid
1483  *         AVERROR_BUFFER_TOO_SMALL if dst_data is too small
1484  *         or if a length_code in src_data specifies data beyond
1485  *         the end of its buffer.
1486  */
1487 static int copy_replace_length_codes(
1488     AVCodecContext *avctx,
1489     size_t        length_code_size,
1490     CMSampleBufferRef sample_buffer,
1491     ExtraSEI      *sei,
1492     uint8_t       *dst_data,
1493     size_t        dst_size)
1494 {
1495     size_t src_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1496     size_t remaining_src_size = src_size;
1497     size_t remaining_dst_size = dst_size;
1498     size_t src_offset = 0;
1499     int wrote_sei = 0;
1500     int status;
1501     uint8_t size_buf[4];
1502     uint8_t nal_type;
1503     CMBlockBufferRef block = CMSampleBufferGetDataBuffer(sample_buffer);
1504
1505     if (length_code_size > 4) {
1506         return AVERROR_INVALIDDATA;
1507     }
1508
1509     while (remaining_src_size > 0) {
1510         size_t curr_src_len;
1511         size_t curr_dst_len;
1512         size_t box_len = 0;
1513         size_t i;
1514
1515         uint8_t       *dst_box;
1516
1517         status = CMBlockBufferCopyDataBytes(block,
1518                                             src_offset,
1519                                             length_code_size,
1520                                             size_buf);
1521         if (status) {
1522             av_log(avctx, AV_LOG_ERROR, "Cannot copy length: %d\n", status);
1523             return AVERROR_EXTERNAL;
1524         }
1525
1526         status = CMBlockBufferCopyDataBytes(block,
1527                                             src_offset + length_code_size,
1528                                             1,
1529                                             &nal_type);
1530
1531         if (status) {
1532             av_log(avctx, AV_LOG_ERROR, "Cannot copy type: %d\n", status);
1533             return AVERROR_EXTERNAL;
1534         }
1535
1536         nal_type &= 0x1F;
1537
1538         for (i = 0; i < length_code_size; i++) {
1539             box_len <<= 8;
1540             box_len |= size_buf[i];
1541         }
1542
1543         if (sei && !wrote_sei && is_post_sei_nal_type(nal_type)) {
1544             //No SEI NAL unit - insert.
1545             int wrote_bytes;
1546
1547             memcpy(dst_data, start_code, sizeof(start_code));
1548             dst_data += sizeof(start_code);
1549             remaining_dst_size -= sizeof(start_code);
1550
1551             *dst_data = H264_NAL_SEI;
1552             dst_data++;
1553             remaining_dst_size--;
1554
1555             wrote_bytes = write_sei(sei,
1556                                     SEI_TYPE_USER_DATA_REGISTERED,
1557                                     dst_data,
1558                                     remaining_dst_size);
1559
1560             if (wrote_bytes < 0)
1561                 return wrote_bytes;
1562
1563             remaining_dst_size -= wrote_bytes;
1564             dst_data += wrote_bytes;
1565
1566             if (remaining_dst_size <= 0)
1567                 return AVERROR_BUFFER_TOO_SMALL;
1568
1569             *dst_data = 0x80;
1570
1571             dst_data++;
1572             remaining_dst_size--;
1573
1574             wrote_sei = 1;
1575         }
1576
1577         curr_src_len = box_len + length_code_size;
1578         curr_dst_len = box_len + sizeof(start_code);
1579
1580         if (remaining_src_size < curr_src_len) {
1581             return AVERROR_BUFFER_TOO_SMALL;
1582         }
1583
1584         if (remaining_dst_size < curr_dst_len) {
1585             return AVERROR_BUFFER_TOO_SMALL;
1586         }
1587
1588         dst_box = dst_data + sizeof(start_code);
1589
1590         memcpy(dst_data, start_code, sizeof(start_code));
1591         status = CMBlockBufferCopyDataBytes(block,
1592                                             src_offset + length_code_size,
1593                                             box_len,
1594                                             dst_box);
1595
1596         if (status) {
1597             av_log(avctx, AV_LOG_ERROR, "Cannot copy data: %d\n", status);
1598             return AVERROR_EXTERNAL;
1599         }
1600
1601         if (sei && !wrote_sei && nal_type == H264_NAL_SEI) {
1602             //Found SEI NAL unit - append.
1603             int wrote_bytes;
1604             int old_sei_length;
1605             int extra_bytes;
1606             uint8_t *new_sei;
1607             old_sei_length = find_sei_end(avctx, dst_box, box_len, &new_sei);
1608             if (old_sei_length < 0)
1609                 return status;
1610
1611             wrote_bytes = write_sei(sei,
1612                                     SEI_TYPE_USER_DATA_REGISTERED,
1613                                     new_sei,
1614                                     remaining_dst_size - old_sei_length);
1615             if (wrote_bytes < 0)
1616                 return wrote_bytes;
1617
1618             if (new_sei + wrote_bytes >= dst_data + remaining_dst_size)
1619                 return AVERROR_BUFFER_TOO_SMALL;
1620
1621             new_sei[wrote_bytes++] = 0x80;
1622             extra_bytes = wrote_bytes - (dst_box + box_len - new_sei);
1623
1624             dst_data += extra_bytes;
1625             remaining_dst_size -= extra_bytes;
1626
1627             wrote_sei = 1;
1628         }
1629
1630         src_offset += curr_src_len;
1631         dst_data += curr_dst_len;
1632
1633         remaining_src_size -= curr_src_len;
1634         remaining_dst_size -= curr_dst_len;
1635     }
1636
1637     return 0;
1638 }
1639
1640 /**
1641  * Returns a sufficient number of bytes to contain the sei data.
1642  * It may be greater than the minimum required.
1643  */
1644 static int get_sei_msg_bytes(const ExtraSEI* sei, int type){
1645     int copied_size;
1646     if (sei->size == 0)
1647         return 0;
1648
1649     copied_size = -copy_emulation_prev(sei->data,
1650                                        sei->size,
1651                                        NULL,
1652                                        0,
1653                                        0);
1654
1655     if ((sei->size % 255) == 0) //may result in an extra byte
1656         copied_size++;
1657
1658     return copied_size + sei->size / 255 + 1 + type / 255 + 1;
1659 }
1660
1661 static int vtenc_cm_to_avpacket(
1662     AVCodecContext    *avctx,
1663     CMSampleBufferRef sample_buffer,
1664     AVPacket          *pkt,
1665     ExtraSEI          *sei)
1666 {
1667     VTEncContext *vtctx = avctx->priv_data;
1668
1669     int     status;
1670     bool    is_key_frame;
1671     bool    add_header;
1672     size_t  length_code_size;
1673     size_t  header_size = 0;
1674     size_t  in_buf_size;
1675     size_t  out_buf_size;
1676     size_t  sei_nalu_size = 0;
1677     int64_t dts_delta;
1678     int64_t time_base_num;
1679     int nalu_count;
1680     CMTime  pts;
1681     CMTime  dts;
1682     CMVideoFormatDescriptionRef vid_fmt;
1683
1684
1685     vtenc_get_frame_info(sample_buffer, &is_key_frame);
1686     status = get_length_code_size(avctx, sample_buffer, &length_code_size);
1687     if (status) return status;
1688
1689     add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
1690
1691     if (add_header) {
1692         vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
1693         if (!vid_fmt) {
1694             av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
1695             return AVERROR_EXTERNAL;
1696         }
1697
1698         int status = get_params_size(avctx, vid_fmt, &header_size);
1699         if (status) return status;
1700     }
1701
1702     status = count_nalus(length_code_size, sample_buffer, &nalu_count);
1703     if(status)
1704         return status;
1705
1706     if (sei) {
1707         size_t msg_size = get_sei_msg_bytes(sei,
1708                                             SEI_TYPE_USER_DATA_REGISTERED);
1709
1710         sei_nalu_size = sizeof(start_code) + 1 + msg_size + 1;
1711     }
1712
1713     in_buf_size = CMSampleBufferGetTotalSampleSize(sample_buffer);
1714     out_buf_size = header_size +
1715                    in_buf_size +
1716                    sei_nalu_size +
1717                    nalu_count * ((int)sizeof(start_code) - (int)length_code_size);
1718
1719     status = ff_alloc_packet2(avctx, pkt, out_buf_size, out_buf_size);
1720     if (status < 0)
1721         return status;
1722
1723     if (add_header) {
1724         status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
1725         if(status) return status;
1726     }
1727
1728     status = copy_replace_length_codes(
1729         avctx,
1730         length_code_size,
1731         sample_buffer,
1732         sei,
1733         pkt->data + header_size,
1734         pkt->size - header_size
1735     );
1736
1737     if (status) {
1738         av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d\n", status);
1739         return status;
1740     }
1741
1742     if (is_key_frame) {
1743         pkt->flags |= AV_PKT_FLAG_KEY;
1744     }
1745
1746     pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
1747     dts = CMSampleBufferGetDecodeTimeStamp      (sample_buffer);
1748
1749     if (CMTIME_IS_INVALID(dts)) {
1750         if (!vtctx->has_b_frames) {
1751             dts = pts;
1752         } else {
1753             av_log(avctx, AV_LOG_ERROR, "DTS is invalid.\n");
1754             return AVERROR_EXTERNAL;
1755         }
1756     }
1757
1758     dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
1759     time_base_num = avctx->time_base.num;
1760     pkt->pts = pts.value / time_base_num;
1761     pkt->dts = dts.value / time_base_num - dts_delta;
1762     pkt->size = out_buf_size;
1763
1764     return 0;
1765 }
1766
1767 /*
1768  * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
1769  * containing all planes if so.
1770  */
1771 static int get_cv_pixel_info(
1772     AVCodecContext *avctx,
1773     const AVFrame  *frame,
1774     int            *color,
1775     int            *plane_count,
1776     size_t         *widths,
1777     size_t         *heights,
1778     size_t         *strides,
1779     size_t         *contiguous_buf_size)
1780 {
1781     VTEncContext *vtctx = avctx->priv_data;
1782     int av_format       = frame->format;
1783     int av_color_range  = av_frame_get_color_range(frame);
1784     int i;
1785     int range_guessed;
1786     int status;
1787
1788     status = get_cv_pixel_format(avctx, av_format, av_color_range, color, &range_guessed);
1789     if (status) {
1790         av_log(avctx,
1791             AV_LOG_ERROR,
1792             "Could not get pixel format for color format '%s' range '%s'.\n",
1793             av_get_pix_fmt_name(av_format),
1794             av_color_range > AVCOL_RANGE_UNSPECIFIED &&
1795             av_color_range < AVCOL_RANGE_NB ?
1796                av_color_range_name(av_color_range) :
1797                "Unknown");
1798
1799         return AVERROR(EINVAL);
1800     }
1801
1802     if (range_guessed) {
1803         if (!vtctx->warned_color_range) {
1804             vtctx->warned_color_range = true;
1805             av_log(avctx,
1806                    AV_LOG_WARNING,
1807                    "Color range not set for %s. Using MPEG range.\n",
1808                    av_get_pix_fmt_name(av_format));
1809         }
1810
1811         av_log(avctx, AV_LOG_WARNING, "");
1812     }
1813
1814     switch (av_format) {
1815     case AV_PIX_FMT_NV12:
1816         *plane_count = 2;
1817
1818         widths [0] = avctx->width;
1819         heights[0] = avctx->height;
1820         strides[0] = frame ? frame->linesize[0] : avctx->width;
1821
1822         widths [1] = (avctx->width  + 1) / 2;
1823         heights[1] = (avctx->height + 1) / 2;
1824         strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) & -2;
1825         break;
1826
1827     case AV_PIX_FMT_YUV420P:
1828         *plane_count = 3;
1829
1830         widths [0] = avctx->width;
1831         heights[0] = avctx->height;
1832         strides[0] = frame ? frame->linesize[0] : avctx->width;
1833
1834         widths [1] = (avctx->width  + 1) / 2;
1835         heights[1] = (avctx->height + 1) / 2;
1836         strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) / 2;
1837
1838         widths [2] = (avctx->width  + 1) / 2;
1839         heights[2] = (avctx->height + 1) / 2;
1840         strides[2] = frame ? frame->linesize[2] : (avctx->width + 1) / 2;
1841         break;
1842
1843     default:
1844         av_log(
1845                avctx,
1846                AV_LOG_ERROR,
1847                "Could not get frame format info for color %d range %d.\n",
1848                av_format,
1849                av_color_range);
1850
1851         return AVERROR(EINVAL);
1852     }
1853
1854     *contiguous_buf_size = 0;
1855     for (i = 0; i < *plane_count; i++) {
1856         if (i < *plane_count - 1 &&
1857             frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
1858             *contiguous_buf_size = 0;
1859             break;
1860         }
1861
1862         *contiguous_buf_size += strides[i] * heights[i];
1863     }
1864
1865     return 0;
1866 }
1867
1868 #if !TARGET_OS_IPHONE
1869 //Not used on iOS - frame is always copied.
1870 static void free_avframe(
1871     void       *release_ctx,
1872     const void *data,
1873     size_t      size,
1874     size_t      plane_count,
1875     const void *plane_addresses[])
1876 {
1877     AVFrame *frame = release_ctx;
1878     av_frame_free(&frame);
1879 }
1880 #else
1881 //Not used on OSX - frame is never copied.
1882 static int copy_avframe_to_pixel_buffer(AVCodecContext   *avctx,
1883                                         const AVFrame    *frame,
1884                                         CVPixelBufferRef cv_img,
1885                                         const size_t     *plane_strides,
1886                                         const size_t     *plane_rows)
1887 {
1888     int i, j;
1889     size_t plane_count;
1890     int status;
1891     int rows;
1892     int src_stride;
1893     int dst_stride;
1894     uint8_t *src_addr;
1895     uint8_t *dst_addr;
1896     size_t copy_bytes;
1897
1898     status = CVPixelBufferLockBaseAddress(cv_img, 0);
1899     if (status) {
1900         av_log(
1901             avctx,
1902             AV_LOG_ERROR,
1903             "Error: Could not lock base address of CVPixelBuffer: %d.\n",
1904             status
1905         );
1906     }
1907
1908     if (CVPixelBufferIsPlanar(cv_img)) {
1909         plane_count = CVPixelBufferGetPlaneCount(cv_img);
1910         for (i = 0; frame->data[i]; i++) {
1911             if (i == plane_count) {
1912                 CVPixelBufferUnlockBaseAddress(cv_img, 0);
1913                 av_log(avctx,
1914                     AV_LOG_ERROR,
1915                     "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
1916                 );
1917
1918                 return AVERROR_EXTERNAL;
1919             }
1920
1921             dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
1922             src_addr = (uint8_t*)frame->data[i];
1923             dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
1924             src_stride = plane_strides[i];
1925             rows = plane_rows[i];
1926
1927             if (dst_stride == src_stride) {
1928                 memcpy(dst_addr, src_addr, src_stride * rows);
1929             } else {
1930                 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1931
1932                 for (j = 0; j < rows; j++) {
1933                     memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1934                 }
1935             }
1936         }
1937     } else {
1938         if (frame->data[1]) {
1939             CVPixelBufferUnlockBaseAddress(cv_img, 0);
1940             av_log(avctx,
1941                 AV_LOG_ERROR,
1942                 "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
1943             );
1944
1945             return AVERROR_EXTERNAL;
1946         }
1947
1948         dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
1949         src_addr = (uint8_t*)frame->data[0];
1950         dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
1951         src_stride = plane_strides[0];
1952         rows = plane_rows[0];
1953
1954         if (dst_stride == src_stride) {
1955             memcpy(dst_addr, src_addr, src_stride * rows);
1956         } else {
1957             copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1958
1959             for (j = 0; j < rows; j++) {
1960                 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1961             }
1962         }
1963     }
1964
1965     status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
1966     if (status) {
1967         av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
1968         return AVERROR_EXTERNAL;
1969     }
1970
1971     return 0;
1972 }
1973 #endif //!TARGET_OS_IPHONE
1974
1975 static int create_cv_pixel_buffer(AVCodecContext   *avctx,
1976                                   const AVFrame    *frame,
1977                                   CVPixelBufferRef *cv_img)
1978 {
1979     int plane_count;
1980     int color;
1981     size_t widths [AV_NUM_DATA_POINTERS];
1982     size_t heights[AV_NUM_DATA_POINTERS];
1983     size_t strides[AV_NUM_DATA_POINTERS];
1984     int status;
1985     size_t contiguous_buf_size;
1986 #if TARGET_OS_IPHONE
1987     CVPixelBufferPoolRef pix_buf_pool;
1988     VTEncContext* vtctx = avctx->priv_data;
1989 #else
1990     CFMutableDictionaryRef pix_buf_attachments = CFDictionaryCreateMutable(
1991                                                    kCFAllocatorDefault,
1992                                                    10,
1993                                                    &kCFCopyStringDictionaryKeyCallBacks,
1994                                                    &kCFTypeDictionaryValueCallBacks);
1995
1996     if (!pix_buf_attachments) return AVERROR(ENOMEM);
1997 #endif
1998
1999     if (avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX) {
2000         av_assert0(frame->format == AV_PIX_FMT_VIDEOTOOLBOX);
2001
2002         *cv_img = (CVPixelBufferRef)frame->data[3];
2003         av_assert0(*cv_img);
2004
2005         CFRetain(*cv_img);
2006         return 0;
2007     }
2008
2009     memset(widths,  0, sizeof(widths));
2010     memset(heights, 0, sizeof(heights));
2011     memset(strides, 0, sizeof(strides));
2012
2013     status = get_cv_pixel_info(
2014         avctx,
2015         frame,
2016         &color,
2017         &plane_count,
2018         widths,
2019         heights,
2020         strides,
2021         &contiguous_buf_size
2022     );
2023
2024     if (status) {
2025         av_log(
2026             avctx,
2027             AV_LOG_ERROR,
2028             "Error: Cannot convert format %d color_range %d: %d\n",
2029             frame->format,
2030             av_frame_get_color_range(frame),
2031             status
2032         );
2033
2034         return AVERROR_EXTERNAL;
2035     }
2036
2037 #if TARGET_OS_IPHONE
2038     pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
2039     if (!pix_buf_pool) {
2040         av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
2041         return AVERROR_EXTERNAL;
2042     }
2043
2044     status = CVPixelBufferPoolCreatePixelBuffer(NULL,
2045                                                 pix_buf_pool,
2046                                                 cv_img);
2047
2048
2049     if (status) {
2050         av_log(avctx, AV_LOG_ERROR, "Could not create pixel buffer from pool: %d.\n", status);
2051         return AVERROR_EXTERNAL;
2052     }
2053
2054     status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
2055     if (status) {
2056         CFRelease(*cv_img);
2057         *cv_img = NULL;
2058         return status;
2059     }
2060 #else
2061     AVFrame *enc_frame = av_frame_alloc();
2062     if (!enc_frame) return AVERROR(ENOMEM);
2063
2064     status = av_frame_ref(enc_frame, frame);
2065     if (status) {
2066         av_frame_free(&enc_frame);
2067         return status;
2068     }
2069
2070     status = CVPixelBufferCreateWithPlanarBytes(
2071         kCFAllocatorDefault,
2072         enc_frame->width,
2073         enc_frame->height,
2074         color,
2075         NULL,
2076         contiguous_buf_size,
2077         plane_count,
2078         (void **)enc_frame->data,
2079         widths,
2080         heights,
2081         strides,
2082         free_avframe,
2083         enc_frame,
2084         NULL,
2085         cv_img
2086     );
2087
2088     add_color_attr(avctx, pix_buf_attachments);
2089     CVBufferSetAttachments(*cv_img, pix_buf_attachments, kCVAttachmentMode_ShouldPropagate);
2090     CFRelease(pix_buf_attachments);
2091
2092     if (status) {
2093         av_log(avctx, AV_LOG_ERROR, "Error: Could not create CVPixelBuffer: %d\n", status);
2094         return AVERROR_EXTERNAL;
2095     }
2096 #endif
2097
2098     return 0;
2099 }
2100
2101 static int create_encoder_dict_h264(const AVFrame *frame,
2102                                     CFDictionaryRef* dict_out)
2103 {
2104     CFDictionaryRef dict = NULL;
2105     if (frame->pict_type == AV_PICTURE_TYPE_I) {
2106         const void *keys[] = { kVTEncodeFrameOptionKey_ForceKeyFrame };
2107         const void *vals[] = { kCFBooleanTrue };
2108
2109         dict = CFDictionaryCreate(NULL, keys, vals, 1, NULL, NULL);
2110         if(!dict) return AVERROR(ENOMEM);
2111     }
2112
2113     *dict_out = dict;
2114     return 0;
2115 }
2116
2117 static int vtenc_send_frame(AVCodecContext *avctx,
2118                             VTEncContext   *vtctx,
2119                             const AVFrame  *frame)
2120 {
2121     CMTime time;
2122     CFDictionaryRef frame_dict;
2123     CVPixelBufferRef cv_img = NULL;
2124     AVFrameSideData *side_data = NULL;
2125     ExtraSEI *sei = NULL;
2126     int status = create_cv_pixel_buffer(avctx, frame, &cv_img);
2127
2128     if (status) return status;
2129
2130     status = create_encoder_dict_h264(frame, &frame_dict);
2131     if (status) {
2132         CFRelease(cv_img);
2133         return status;
2134     }
2135
2136     side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
2137     if (vtctx->a53_cc && side_data && side_data->size) {
2138         sei = av_mallocz(sizeof(*sei));
2139         if (!sei) {
2140             av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2141         } else {
2142             int ret = ff_alloc_a53_sei(frame, 0, &sei->data, &sei->size);
2143             if (ret < 0) {
2144                 av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
2145                 av_free(sei);
2146                 sei = NULL;
2147             }
2148         }
2149     }
2150
2151     time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
2152     status = VTCompressionSessionEncodeFrame(
2153         vtctx->session,
2154         cv_img,
2155         time,
2156         kCMTimeInvalid,
2157         frame_dict,
2158         sei,
2159         NULL
2160     );
2161
2162     if (frame_dict) CFRelease(frame_dict);
2163     CFRelease(cv_img);
2164
2165     if (status) {
2166         av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
2167         return AVERROR_EXTERNAL;
2168     }
2169
2170     return 0;
2171 }
2172
2173 static av_cold int vtenc_frame(
2174     AVCodecContext *avctx,
2175     AVPacket       *pkt,
2176     const AVFrame  *frame,
2177     int            *got_packet)
2178 {
2179     VTEncContext *vtctx = avctx->priv_data;
2180     bool get_frame;
2181     int status;
2182     CMSampleBufferRef buf = NULL;
2183     ExtraSEI *sei = NULL;
2184
2185     if (frame) {
2186         status = vtenc_send_frame(avctx, vtctx, frame);
2187
2188         if (status) {
2189             status = AVERROR_EXTERNAL;
2190             goto end_nopkt;
2191         }
2192
2193         if (vtctx->frame_ct_in == 0) {
2194             vtctx->first_pts = frame->pts;
2195         } else if(vtctx->frame_ct_in == 1 && vtctx->has_b_frames) {
2196             vtctx->dts_delta = frame->pts - vtctx->first_pts;
2197         }
2198
2199         vtctx->frame_ct_in++;
2200     } else if(!vtctx->flushing) {
2201         vtctx->flushing = true;
2202
2203         status = VTCompressionSessionCompleteFrames(vtctx->session,
2204                                                     kCMTimeIndefinite);
2205
2206         if (status) {
2207             av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
2208             status = AVERROR_EXTERNAL;
2209             goto end_nopkt;
2210         }
2211     }
2212
2213     *got_packet = 0;
2214     get_frame = vtctx->dts_delta >= 0 || !frame;
2215     if (!get_frame) {
2216         status = 0;
2217         goto end_nopkt;
2218     }
2219
2220     status = vtenc_q_pop(vtctx, !frame, &buf, &sei);
2221     if (status) goto end_nopkt;
2222     if (!buf)   goto end_nopkt;
2223
2224     status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei);
2225     if (sei) {
2226         if (sei->data) av_free(sei->data);
2227         av_free(sei);
2228     }
2229     CFRelease(buf);
2230     if (status) goto end_nopkt;
2231
2232     *got_packet = 1;
2233     return 0;
2234
2235 end_nopkt:
2236     av_packet_unref(pkt);
2237     return status;
2238 }
2239
2240 static int vtenc_populate_extradata(AVCodecContext   *avctx,
2241                                     CMVideoCodecType codec_type,
2242                                     CFStringRef      profile_level,
2243                                     CFNumberRef      gamma_level,
2244                                     CFDictionaryRef  enc_info,
2245                                     CFDictionaryRef  pixel_buffer_info)
2246 {
2247     VTEncContext *vtctx = avctx->priv_data;
2248     AVFrame *frame = av_frame_alloc();
2249     int y_size = avctx->width * avctx->height;
2250     int chroma_size = (avctx->width / 2) * (avctx->height / 2);
2251     CMSampleBufferRef buf = NULL;
2252     int status;
2253
2254     if (!frame)
2255         return AVERROR(ENOMEM);
2256
2257     frame->buf[0] = av_buffer_alloc(y_size + 2 * chroma_size);
2258
2259     if(!frame->buf[0]){
2260         status = AVERROR(ENOMEM);
2261         goto pe_cleanup;
2262     }
2263
2264     status = vtenc_create_encoder(avctx,
2265                                   codec_type,
2266                                   profile_level,
2267                                   gamma_level,
2268                                   enc_info,
2269                                   pixel_buffer_info,
2270                                   &vtctx->session);
2271     if (status)
2272         goto pe_cleanup;
2273
2274     frame->data[0] = frame->buf[0]->data;
2275     memset(frame->data[0],   0,      y_size);
2276
2277     frame->data[1] = frame->buf[0]->data + y_size;
2278     memset(frame->data[1], 128, chroma_size);
2279
2280
2281     if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
2282         frame->data[2] = frame->buf[0]->data + y_size + chroma_size;
2283         memset(frame->data[2], 128, chroma_size);
2284     }
2285
2286     frame->linesize[0] = avctx->width;
2287
2288     if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
2289         frame->linesize[1] =
2290         frame->linesize[2] = (avctx->width + 1) / 2;
2291     } else {
2292         frame->linesize[1] = (avctx->width + 1) / 2;
2293     }
2294
2295     frame->format          = avctx->pix_fmt;
2296     frame->width           = avctx->width;
2297     frame->height          = avctx->height;
2298     av_frame_set_colorspace(frame, avctx->colorspace);
2299     av_frame_set_color_range(frame, avctx->color_range);
2300     frame->color_trc       = avctx->color_trc;
2301     frame->color_primaries = avctx->color_primaries;
2302
2303     frame->pts = 0;
2304     status = vtenc_send_frame(avctx, vtctx, frame);
2305     if (status) {
2306         av_log(avctx, AV_LOG_ERROR, "Error sending frame: %d\n", status);
2307         goto pe_cleanup;
2308     }
2309
2310     //Populates extradata - output frames are flushed and param sets are available.
2311     status = VTCompressionSessionCompleteFrames(vtctx->session,
2312                                                 kCMTimeIndefinite);
2313
2314     if (status)
2315         goto pe_cleanup;
2316
2317     status = vtenc_q_pop(vtctx, 0, &buf, NULL);
2318     if (status) {
2319         av_log(avctx, AV_LOG_ERROR, "popping: %d\n", status);
2320         goto pe_cleanup;
2321     }
2322
2323     CFRelease(buf);
2324
2325
2326
2327 pe_cleanup:
2328     if(vtctx->session)
2329         CFRelease(vtctx->session);
2330
2331     vtctx->session = NULL;
2332     vtctx->frame_ct_out = 0;
2333
2334     av_frame_unref(frame);
2335     av_frame_free(&frame);
2336
2337     av_assert0(status != 0 || (avctx->extradata && avctx->extradata_size > 0));
2338
2339     return status;
2340 }
2341
2342 static av_cold int vtenc_close(AVCodecContext *avctx)
2343 {
2344     VTEncContext *vtctx = avctx->priv_data;
2345
2346     if(!vtctx->session) return 0;
2347
2348     VTCompressionSessionCompleteFrames(vtctx->session,
2349                                        kCMTimeIndefinite);
2350     clear_frame_queue(vtctx);
2351     pthread_cond_destroy(&vtctx->cv_sample_sent);
2352     pthread_mutex_destroy(&vtctx->lock);
2353     CFRelease(vtctx->session);
2354     vtctx->session = NULL;
2355
2356     if (vtctx->color_primaries) {
2357         CFRelease(vtctx->color_primaries);
2358         vtctx->color_primaries = NULL;
2359     }
2360
2361     if (vtctx->transfer_function) {
2362         CFRelease(vtctx->transfer_function);
2363         vtctx->transfer_function = NULL;
2364     }
2365
2366     if (vtctx->ycbcr_matrix) {
2367         CFRelease(vtctx->ycbcr_matrix);
2368         vtctx->ycbcr_matrix = NULL;
2369     }
2370
2371     return 0;
2372 }
2373
2374 static const enum AVPixelFormat pix_fmts[] = {
2375     AV_PIX_FMT_VIDEOTOOLBOX,
2376     AV_PIX_FMT_NV12,
2377     AV_PIX_FMT_YUV420P,
2378     AV_PIX_FMT_NONE
2379 };
2380
2381 #define OFFSET(x) offsetof(VTEncContext, x)
2382 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
2383 static const AVOption options[] = {
2384     { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
2385     { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
2386     { "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN     }, INT_MIN, INT_MAX, VE, "profile" },
2387     { "high",     "High Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH     }, INT_MIN, INT_MAX, VE, "profile" },
2388
2389     { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },
2390     { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },
2391     { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },
2392     { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },
2393     { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },
2394     { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },
2395     { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },
2396     { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },
2397     { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },
2398     { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
2399     { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
2400
2401     { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL,
2402         { .i64 = 0 }, 0, 1, VE },
2403
2404     { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },
2405     { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2406     { "vlc",   "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
2407     { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2408     { "ac",    "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
2409
2410     { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).",
2411         OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2412
2413     { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.",
2414         OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2415     { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.",
2416         OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
2417
2418     { "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, VE },
2419
2420     { NULL },
2421 };
2422
2423 static const AVClass h264_videotoolbox_class = {
2424     .class_name = "h264_videotoolbox",
2425     .item_name  = av_default_item_name,
2426     .option     = options,
2427     .version    = LIBAVUTIL_VERSION_INT,
2428 };
2429
2430 AVCodec ff_h264_videotoolbox_encoder = {
2431     .name             = "h264_videotoolbox",
2432     .long_name        = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),
2433     .type             = AVMEDIA_TYPE_VIDEO,
2434     .id               = AV_CODEC_ID_H264,
2435     .priv_data_size   = sizeof(VTEncContext),
2436     .pix_fmts         = pix_fmts,
2437     .init             = vtenc_init,
2438     .encode2          = vtenc_frame,
2439     .close            = vtenc_close,
2440     .capabilities     = AV_CODEC_CAP_DELAY,
2441     .priv_class       = &h264_videotoolbox_class,
2442     .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
2443                         FF_CODEC_CAP_INIT_CLEANUP,
2444 };