2 * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include <VideoToolbox/VideoToolbox.h>
22 #include <CoreVideo/CoreVideo.h>
23 #include <CoreMedia/CoreMedia.h>
24 #include <TargetConditionals.h>
25 #include <Availability.h>
27 #include "libavutil/opt.h"
28 #include "libavutil/avassert.h"
29 #include "libavutil/atomic.h"
30 #include "libavutil/avstring.h"
31 #include "libavcodec/avcodec.h"
32 #include "libavutil/pixdesc.h"
36 typedef enum VT_H264Profile {
44 typedef enum VTH264Entropy{
50 static const uint8_t start_code[] = { 0, 0, 0, 1 };
52 typedef struct BufNode {
53 CMSampleBufferRef cm_buffer;
58 typedef struct VTEncContext {
60 VTCompressionSessionRef session;
63 pthread_cond_t cv_sample_sent;
80 int64_t frames_before;
87 bool warned_color_range;
91 * NULL-safe release of *refPtr, and sets value to NULL.
93 static void vt_release_num(CFNumberRef* refPtr){
102 static void set_async_error(VTEncContext *vtctx, int err)
106 pthread_mutex_lock(&vtctx->lock);
108 vtctx->async_error = err;
110 info = vtctx->q_head;
111 vtctx->q_head = vtctx->q_tail = NULL;
114 BufNode *next = info->next;
115 CFRelease(info->cm_buffer);
120 pthread_mutex_unlock(&vtctx->lock);
123 static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf)
127 pthread_mutex_lock(&vtctx->lock);
129 if (vtctx->async_error) {
130 pthread_mutex_unlock(&vtctx->lock);
131 return vtctx->async_error;
134 if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
137 pthread_mutex_unlock(&vtctx->lock);
141 while (!vtctx->q_head && !vtctx->async_error && wait) {
142 pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
145 if (!vtctx->q_head) {
146 pthread_mutex_unlock(&vtctx->lock);
151 info = vtctx->q_head;
152 vtctx->q_head = vtctx->q_head->next;
153 if (!vtctx->q_head) {
154 vtctx->q_tail = NULL;
157 pthread_mutex_unlock(&vtctx->lock);
159 *buf = info->cm_buffer;
162 vtctx->frame_ct_out++;
167 static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer)
169 BufNode *info = av_malloc(sizeof(BufNode));
171 set_async_error(vtctx, AVERROR(ENOMEM));
176 info->cm_buffer = buffer;
179 pthread_mutex_lock(&vtctx->lock);
180 pthread_cond_signal(&vtctx->cv_sample_sent);
182 if (!vtctx->q_head) {
183 vtctx->q_head = info;
185 vtctx->q_tail->next = info;
188 vtctx->q_tail = info;
190 pthread_mutex_unlock(&vtctx->lock);
193 static CMVideoCodecType get_cm_codec_type(enum AVCodecID id)
196 case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
201 static void vtenc_free_block(void *opaque, uint8_t *data)
203 CMBlockBufferRef block = opaque;
208 * Get the parameter sets from a CMSampleBufferRef.
209 * @param dst If *dst isn't NULL, the parameters are copied into existing
210 * memory. *dst_size must be set accordingly when *dst != NULL.
211 * If *dst is NULL, it will be allocated.
212 * In all cases, *dst_size is set to the number of bytes used starting
215 static int get_params_size(
216 AVCodecContext *avctx,
217 CMVideoFormatDescriptionRef vid_fmt,
220 size_t total_size = 0;
222 int is_count_bad = 0;
225 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
237 for (i = 0; i < ps_count || is_count_bad; i++) {
240 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
248 * When ps_count is invalid, status != 0 ends the loop normally
249 * unless we didn't get any parameter sets.
251 if (i > 0 && is_count_bad) status = 0;
256 total_size += ps_size + sizeof(start_code);
260 av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
261 return AVERROR_EXTERNAL;
268 static int copy_param_sets(
269 AVCodecContext *avctx,
270 CMVideoFormatDescriptionRef vid_fmt,
275 int is_count_bad = 0;
280 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
293 for (i = 0; i < ps_count || is_count_bad; i++) {
298 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
305 if (i > 0 && is_count_bad) status = 0;
310 next_offset = offset + sizeof(start_code) + ps_size;
311 if (dst_size < next_offset) {
312 av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
313 return AVERROR_BUFFER_TOO_SMALL;
316 memcpy(dst + offset, start_code, sizeof(start_code));
317 offset += sizeof(start_code);
319 memcpy(dst + offset, ps, ps_size);
320 offset = next_offset;
324 av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
325 return AVERROR_EXTERNAL;
331 static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
333 CMVideoFormatDescriptionRef vid_fmt;
337 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
339 av_log(avctx, AV_LOG_ERROR, "No video format.\n");
340 return AVERROR_EXTERNAL;
343 status = get_params_size(avctx, vid_fmt, &total_size);
345 av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
349 avctx->extradata = av_malloc(total_size);
350 if (!avctx->extradata) {
351 return AVERROR(ENOMEM);
353 avctx->extradata_size = total_size;
355 status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
358 av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
365 static void vtenc_output_callback(
367 void *sourceFrameCtx,
369 VTEncodeInfoFlags flags,
370 CMSampleBufferRef sample_buffer)
372 AVCodecContext *avctx = ctx;
373 VTEncContext *vtctx = avctx->priv_data;
375 if (vtctx->async_error) {
376 if(sample_buffer) CFRelease(sample_buffer);
380 if (status || !sample_buffer) {
381 av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
382 set_async_error(vtctx, AVERROR_EXTERNAL);
386 if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
387 int set_status = set_extradata(avctx, sample_buffer);
389 set_async_error(vtctx, set_status);
394 vtenc_q_push(vtctx, sample_buffer);
397 static int get_length_code_size(
398 AVCodecContext *avctx,
399 CMSampleBufferRef sample_buffer,
402 CMVideoFormatDescriptionRef vid_fmt;
406 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
408 av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
409 return AVERROR_EXTERNAL;
412 status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
419 av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
420 return AVERROR_EXTERNAL;
428 * Returns true on success.
430 * If profile_level_val is NULL and this method returns true, don't specify the
431 * profile/level to the encoder.
433 static bool get_vt_profile_level(AVCodecContext *avctx,
434 CFStringRef *profile_level_val)
436 VTEncContext *vtctx = avctx->priv_data;
437 int64_t profile = vtctx->profile;
439 if (profile == H264_PROF_AUTO && vtctx->level) {
440 //Need to pick a profile if level is not auto-selected.
441 profile = vtctx->has_b_frames ? H264_PROF_MAIN : H264_PROF_BASELINE;
444 *profile_level_val = NULL;
450 case H264_PROF_BASELINE:
451 switch (vtctx->level) {
452 case 0: *profile_level_val = kVTProfileLevel_H264_Baseline_AutoLevel; break;
453 case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3; break;
454 case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0; break;
455 case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1; break;
456 case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2; break;
457 case 40: *profile_level_val = kVTProfileLevel_H264_Baseline_4_0; break;
458 case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1; break;
459 case 42: *profile_level_val = kVTProfileLevel_H264_Baseline_4_2; break;
460 case 50: *profile_level_val = kVTProfileLevel_H264_Baseline_5_0; break;
461 case 51: *profile_level_val = kVTProfileLevel_H264_Baseline_5_1; break;
462 case 52: *profile_level_val = kVTProfileLevel_H264_Baseline_5_2; break;
467 switch (vtctx->level) {
468 case 0: *profile_level_val = kVTProfileLevel_H264_Main_AutoLevel; break;
469 case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0; break;
470 case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1; break;
471 case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2; break;
472 case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0; break;
473 case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1; break;
474 case 42: *profile_level_val = kVTProfileLevel_H264_Main_4_2; break;
475 case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0; break;
476 case 51: *profile_level_val = kVTProfileLevel_H264_Main_5_1; break;
477 case 52: *profile_level_val = kVTProfileLevel_H264_Main_5_2; break;
482 switch (vtctx->level) {
483 case 0: *profile_level_val = kVTProfileLevel_H264_High_AutoLevel; break;
484 case 30: *profile_level_val = kVTProfileLevel_H264_High_3_0; break;
485 case 31: *profile_level_val = kVTProfileLevel_H264_High_3_1; break;
486 case 32: *profile_level_val = kVTProfileLevel_H264_High_3_2; break;
487 case 40: *profile_level_val = kVTProfileLevel_H264_High_4_0; break;
488 case 41: *profile_level_val = kVTProfileLevel_H264_High_4_1; break;
489 case 42: *profile_level_val = kVTProfileLevel_H264_High_4_2; break;
490 case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0; break;
491 case 51: *profile_level_val = kVTProfileLevel_H264_High_5_1; break;
492 case 52: *profile_level_val = kVTProfileLevel_H264_High_5_2; break;
497 if (!*profile_level_val) {
498 av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
505 static int get_cv_pixel_format(AVCodecContext* avctx,
506 enum AVPixelFormat fmt,
507 enum AVColorRange range,
508 int* av_pixel_format,
511 if (range_guessed) *range_guessed = range != AVCOL_RANGE_MPEG &&
512 range != AVCOL_RANGE_JPEG;
514 //MPEG range is used when no range is set
515 if (fmt == AV_PIX_FMT_NV12) {
516 *av_pixel_format = range == AVCOL_RANGE_JPEG ?
517 kCVPixelFormatType_420YpCbCr8BiPlanarFullRange :
518 kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
519 } else if (fmt == AV_PIX_FMT_YUV420P) {
520 *av_pixel_format = range == AVCOL_RANGE_JPEG ?
521 kCVPixelFormatType_420YpCbCr8PlanarFullRange :
522 kCVPixelFormatType_420YpCbCr8Planar;
524 return AVERROR(EINVAL);
530 static int create_cv_pixel_buffer_info(AVCodecContext* avctx,
531 CFMutableDictionaryRef* dict)
533 CFNumberRef cv_color_format_num = NULL;
534 CFNumberRef width_num = NULL;
535 CFNumberRef height_num = NULL;
536 CFMutableDictionaryRef pixel_buffer_info = NULL;
538 int status = get_cv_pixel_format(avctx,
543 if (status) return status;
545 pixel_buffer_info = CFDictionaryCreateMutable(
548 &kCFCopyStringDictionaryKeyCallBacks,
549 &kCFTypeDictionaryValueCallBacks);
551 if (!pixel_buffer_info) goto pbinfo_nomem;
553 cv_color_format_num = CFNumberCreate(kCFAllocatorDefault,
556 if (!cv_color_format_num) goto pbinfo_nomem;
558 CFDictionarySetValue(pixel_buffer_info,
559 kCVPixelBufferPixelFormatTypeKey,
560 cv_color_format_num);
561 vt_release_num(&cv_color_format_num);
563 width_num = CFNumberCreate(kCFAllocatorDefault,
566 if (!width_num) return AVERROR(ENOMEM);
568 CFDictionarySetValue(pixel_buffer_info,
569 kCVPixelBufferWidthKey,
571 vt_release_num(&width_num);
573 height_num = CFNumberCreate(kCFAllocatorDefault,
576 if (!height_num) goto pbinfo_nomem;
578 CFDictionarySetValue(pixel_buffer_info,
579 kCVPixelBufferHeightKey,
581 vt_release_num(&height_num);
583 *dict = pixel_buffer_info;
587 vt_release_num(&cv_color_format_num);
588 vt_release_num(&width_num);
589 vt_release_num(&height_num);
590 if (pixel_buffer_info) CFRelease(pixel_buffer_info);
592 return AVERROR(ENOMEM);
595 static av_cold int vtenc_init(AVCodecContext *avctx)
597 CFMutableDictionaryRef enc_info;
598 CFMutableDictionaryRef pixel_buffer_info;
599 CMVideoCodecType codec_type;
600 VTEncContext *vtctx = avctx->priv_data;
601 CFStringRef profile_level;
602 SInt32 bit_rate = avctx->bit_rate;
603 CFNumberRef bit_rate_num;
604 CFBooleanRef has_b_frames_cfbool;
607 codec_type = get_cm_codec_type(avctx->codec_id);
609 av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
610 return AVERROR(EINVAL);
613 vtctx->has_b_frames = avctx->max_b_frames > 0;
614 if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
615 av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
616 vtctx->has_b_frames = false;
619 if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {
620 av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");
621 vtctx->entropy = VT_ENTROPY_NOT_SET;
624 if (!get_vt_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
626 vtctx->session = NULL;
628 enc_info = CFDictionaryCreateMutable(
631 &kCFCopyStringDictionaryKeyCallBacks,
632 &kCFTypeDictionaryValueCallBacks
635 if (!enc_info) return AVERROR(ENOMEM);
637 #if !TARGET_OS_IPHONE
638 if (!vtctx->allow_sw) {
639 CFDictionarySetValue(enc_info, kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder, kCFBooleanTrue);
641 CFDictionarySetValue(enc_info, kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, kCFBooleanTrue);
645 if (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {
646 status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);
652 pixel_buffer_info = NULL;
655 status = VTCompressionSessionCreate(
663 vtenc_output_callback,
668 if (pixel_buffer_info) CFRelease(pixel_buffer_info);
671 if (status || !vtctx->session) {
672 av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
674 #if !TARGET_OS_IPHONE
675 if (!vtctx->allow_sw) {
676 av_log(avctx, AV_LOG_ERROR, "Try -allow_sw 1. The hardware encoder may be busy, or not supported.\n");
680 return AVERROR_EXTERNAL;
683 bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
686 if (!bit_rate_num) return AVERROR(ENOMEM);
688 status = VTSessionSetProperty(vtctx->session,
689 kVTCompressionPropertyKey_AverageBitRate,
691 CFRelease(bit_rate_num);
694 av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
695 return AVERROR_EXTERNAL;
699 status = VTSessionSetProperty(vtctx->session,
700 kVTCompressionPropertyKey_ProfileLevel,
703 av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
704 return AVERROR_EXTERNAL;
708 if (avctx->gop_size > 0) {
709 CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
713 return AVERROR(ENOMEM);
716 status = VTSessionSetProperty(vtctx->session,
717 kVTCompressionPropertyKey_MaxKeyFrameInterval,
722 av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
723 return AVERROR_EXTERNAL;
727 if (vtctx->frames_before) {
728 status = VTSessionSetProperty(vtctx->session,
729 kVTCompressionPropertyKey_MoreFramesBeforeStart,
732 if (status == kVTPropertyNotSupportedErr) {
733 av_log(avctx, AV_LOG_WARNING, "frames_before property is not supported on this device. Ignoring.\n");
735 av_log(avctx, AV_LOG_ERROR, "Error setting frames_before property: %d\n", status);
739 if (vtctx->frames_after) {
740 status = VTSessionSetProperty(vtctx->session,
741 kVTCompressionPropertyKey_MoreFramesAfterEnd,
744 if (status == kVTPropertyNotSupportedErr) {
745 av_log(avctx, AV_LOG_WARNING, "frames_after property is not supported on this device. Ignoring.\n");
747 av_log(avctx, AV_LOG_ERROR, "Error setting frames_after property: %d\n", status);
751 if (!vtctx->has_b_frames) {
752 status = VTSessionSetProperty(vtctx->session,
753 kVTCompressionPropertyKey_AllowFrameReordering,
757 av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
758 return AVERROR_EXTERNAL;
762 if (vtctx->entropy != VT_ENTROPY_NOT_SET) {
763 CFStringRef entropy = vtctx->entropy == VT_CABAC ?
764 kVTH264EntropyMode_CABAC:
765 kVTH264EntropyMode_CAVLC;
767 status = VTSessionSetProperty(vtctx->session,
768 kVTCompressionPropertyKey_H264EntropyMode,
772 av_log(avctx, AV_LOG_ERROR, "Error setting entropy property: %d\n", status);
773 return AVERROR_EXTERNAL;
777 if (vtctx->realtime) {
778 status = VTSessionSetProperty(vtctx->session,
779 kVTCompressionPropertyKey_RealTime,
783 av_log(avctx, AV_LOG_ERROR, "Error setting realtime property: %d\n", status);
787 status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
789 av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
790 return AVERROR_EXTERNAL;
793 pthread_mutex_init(&vtctx->lock, NULL);
794 pthread_cond_init(&vtctx->cv_sample_sent, NULL);
795 vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
797 status = VTSessionCopyProperty(vtctx->session,
798 kVTCompressionPropertyKey_AllowFrameReordering,
800 &has_b_frames_cfbool);
803 //Some devices don't output B-frames for main profile, even if requested.
804 vtctx->has_b_frames = CFBooleanGetValue(has_b_frames_cfbool);
805 CFRelease(has_b_frames_cfbool);
807 avctx->has_b_frames = vtctx->has_b_frames;
812 static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
814 CFArrayRef attachments;
815 CFDictionaryRef attachment;
816 CFBooleanRef not_sync;
819 attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
820 len = !attachments ? 0 : CFArrayGetCount(attachments);
823 *is_key_frame = true;
827 attachment = CFArrayGetValueAtIndex(attachments, 0);
829 if (CFDictionaryGetValueIfPresent(attachment,
830 kCMSampleAttachmentKey_NotSync,
831 (const void **)¬_sync))
833 *is_key_frame = !CFBooleanGetValue(not_sync);
835 *is_key_frame = true;
840 * Replaces length codes with H.264 Annex B start codes.
841 * length_code_size must equal sizeof(start_code).
842 * On failure, the contents of data may have been modified.
844 * @param length_code_size Byte length of each length code
845 * @param data Call with NAL units prefixed with length codes.
846 * On success, the length codes are replace with
848 * @param size Length of data, excluding any padding.
849 * @return 0 on success
850 * AVERROR_BUFFER_TOO_SMALL if length code size is smaller
851 * than a start code or if a length_code in data specifies
852 * data beyond the end of its buffer.
854 static int replace_length_codes(size_t length_code_size,
858 size_t remaining_size = size;
860 if (length_code_size != sizeof(start_code)) {
861 av_log(NULL, AV_LOG_ERROR, "Start code size and length code size not equal.\n");
862 return AVERROR_BUFFER_TOO_SMALL;
865 while (remaining_size > 0) {
869 for (i = 0; i < length_code_size; i++) {
874 if (remaining_size < box_len + sizeof(start_code)) {
875 av_log(NULL, AV_LOG_ERROR, "Length is out of range.\n");
876 AVERROR_BUFFER_TOO_SMALL;
879 memcpy(data, start_code, sizeof(start_code));
880 data += box_len + sizeof(start_code);
881 remaining_size -= box_len + sizeof(start_code);
888 * Copies NAL units and replaces length codes with
889 * H.264 Annex B start codes. On failure, the contents of
890 * dst_data may have been modified.
892 * @param length_code_size Byte length of each length code
893 * @param src_data NAL units prefixed with length codes.
894 * @param src_size Length of buffer, excluding any padding.
895 * @param dst_data Must be zeroed before calling this function.
896 * Contains the copied NAL units prefixed with
897 * start codes when the function returns
899 * @param dst_size Length of dst_data
900 * @return 0 on success
901 * AVERROR_INVALIDDATA if length_code_size is invalid
902 * AVERROR_BUFFER_TOO_SMALL if dst_data is too small
903 * or if a length_code in src_data specifies data beyond
904 * the end of its buffer.
906 static int copy_replace_length_codes(
907 size_t length_code_size,
908 const uint8_t *src_data,
913 size_t remaining_src_size = src_size;
914 size_t remaining_dst_size = dst_size;
916 if (length_code_size > 4) {
917 return AVERROR_INVALIDDATA;
920 while (remaining_src_size > 0) {
927 const uint8_t *src_box;
929 for (i = 0; i < length_code_size; i++) {
931 box_len |= src_data[i];
934 curr_src_len = box_len + length_code_size;
935 curr_dst_len = box_len + sizeof(start_code);
937 if (remaining_src_size < curr_src_len) {
938 return AVERROR_BUFFER_TOO_SMALL;
941 if (remaining_dst_size < curr_dst_len) {
942 return AVERROR_BUFFER_TOO_SMALL;
945 dst_box = dst_data + sizeof(start_code);
946 src_box = src_data + length_code_size;
948 memcpy(dst_data, start_code, sizeof(start_code));
949 memcpy(dst_box, src_box, box_len);
951 src_data += curr_src_len;
952 dst_data += curr_dst_len;
954 remaining_src_size -= curr_src_len;
955 remaining_dst_size -= curr_dst_len;
961 static int vtenc_cm_to_avpacket(
962 AVCodecContext *avctx,
963 CMSampleBufferRef sample_buffer,
966 VTEncContext *vtctx = avctx->priv_data;
972 size_t length_code_size;
973 size_t header_size = 0;
976 int64_t time_base_num;
980 CMBlockBufferRef block;
981 CMVideoFormatDescriptionRef vid_fmt;
984 vtenc_get_frame_info(sample_buffer, &is_key_frame);
985 status = get_length_code_size(avctx, sample_buffer, &length_code_size);
986 if (status) return status;
988 add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
991 vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
993 av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
996 int status = get_params_size(avctx, vid_fmt, &header_size);
997 if (status) return status;
1000 block = CMSampleBufferGetDataBuffer(sample_buffer);
1002 av_log(avctx, AV_LOG_ERROR, "Could not get block buffer from sample buffer.\n");
1003 return AVERROR_EXTERNAL;
1007 status = CMBlockBufferGetDataPointer(block, 0, &in_buf_size, NULL, &buf_data);
1009 av_log(avctx, AV_LOG_ERROR, "Error: cannot get data pointer: %d\n", status);
1010 return AVERROR_EXTERNAL;
1013 size_t out_buf_size = header_size + in_buf_size;
1014 bool can_reuse_cmbuffer = !add_header &&
1016 length_code_size == sizeof(start_code);
1018 av_init_packet(pkt);
1020 if (can_reuse_cmbuffer) {
1021 AVBufferRef* buf_ref = av_buffer_create(
1029 if (!buf_ref) return AVERROR(ENOMEM);
1034 pkt->data = buf_data;
1035 pkt->size = in_buf_size;
1037 status = replace_length_codes(length_code_size, pkt->data, pkt->size);
1039 av_log(avctx, AV_LOG_ERROR, "Error replacing length codes: %d\n", status);
1044 status = av_new_packet(pkt, out_buf_size);
1045 if(status) return status;
1048 if (pkt->size < out_buf_size) {
1049 av_log(avctx, AV_LOG_ERROR, "Error: packet's buffer is too small.\n");
1050 return AVERROR_BUFFER_TOO_SMALL;
1054 status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
1055 if(status) return status;
1058 status = copy_replace_length_codes(
1062 pkt->data + header_size,
1063 pkt->size - header_size
1067 av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d", status);
1073 pkt->flags |= AV_PKT_FLAG_KEY;
1076 pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
1077 dts = CMSampleBufferGetDecodeTimeStamp (sample_buffer);
1079 if (CMTIME_IS_INVALID(dts)) {
1080 if (!vtctx->has_b_frames) {
1083 av_log(avctx, AV_LOG_ERROR, "DTS is invalid.\n");
1084 return AVERROR_EXTERNAL;
1088 dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
1089 time_base_num = avctx->time_base.num;
1090 pkt->pts = pts.value / time_base_num;
1091 pkt->dts = dts.value / time_base_num - dts_delta;
1097 * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
1098 * containing all planes if so.
1100 static int get_cv_pixel_info(
1101 AVCodecContext *avctx,
1102 const AVFrame *frame,
1108 size_t *contiguous_buf_size)
1110 VTEncContext *vtctx = avctx->priv_data;
1111 int av_format = frame->format;
1112 int av_color_range = av_frame_get_color_range(frame);
1117 status = get_cv_pixel_format(avctx, av_format, av_color_range, color, &range_guessed);
1121 "Could not get pixel format for color format '%s' range '%s'.\n",
1122 av_get_pix_fmt_name(av_format),
1123 av_color_range > AVCOL_RANGE_UNSPECIFIED &&
1124 av_color_range < AVCOL_RANGE_NB ?
1125 av_color_range_name(av_color_range) :
1128 return AVERROR(EINVAL);
1131 if (range_guessed) {
1132 if (!vtctx->warned_color_range) {
1133 vtctx->warned_color_range = true;
1136 "Color range not set for %s. Using MPEG range.\n",
1137 av_get_pix_fmt_name(av_format));
1140 av_log(avctx, AV_LOG_WARNING, "");
1143 switch (av_format) {
1144 case AV_PIX_FMT_NV12:
1147 widths [0] = avctx->width;
1148 heights[0] = avctx->height;
1149 strides[0] = frame ? frame->linesize[0] : avctx->width;
1151 widths [1] = (avctx->width + 1) / 2;
1152 heights[1] = (avctx->height + 1) / 2;
1153 strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) & -2;
1156 case AV_PIX_FMT_YUV420P:
1159 widths [0] = avctx->width;
1160 heights[0] = avctx->height;
1161 strides[0] = frame ? frame->linesize[0] : avctx->width;
1163 widths [1] = (avctx->width + 1) / 2;
1164 heights[1] = (avctx->height + 1) / 2;
1165 strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) / 2;
1167 widths [2] = (avctx->width + 1) / 2;
1168 heights[2] = (avctx->height + 1) / 2;
1169 strides[2] = frame ? frame->linesize[2] : (avctx->width + 1) / 2;
1176 "Could not get frame format info for color %d range %d.\n",
1180 return AVERROR(EINVAL);
1183 *contiguous_buf_size = 0;
1184 for (i = 0; i < *plane_count; i++) {
1185 if (i < *plane_count - 1 &&
1186 frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
1187 *contiguous_buf_size = 0;
1191 *contiguous_buf_size += strides[i] * heights[i];
1197 #if !TARGET_OS_IPHONE
1198 //Not used on iOS - frame is always copied.
1199 static void free_avframe(
1204 const void *plane_addresses[])
1206 AVFrame *frame = release_ctx;
1207 av_frame_free(&frame);
1210 //Not used on OSX - frame is never copied.
1211 static int copy_avframe_to_pixel_buffer(AVCodecContext *avctx,
1212 const AVFrame *frame,
1213 CVPixelBufferRef cv_img,
1214 const size_t *plane_strides,
1215 const size_t *plane_rows)
1227 status = CVPixelBufferLockBaseAddress(cv_img, 0);
1232 "Error: Could not lock base address of CVPixelBuffer: %d.\n",
1237 if (CVPixelBufferIsPlanar(cv_img)) {
1238 plane_count = CVPixelBufferGetPlaneCount(cv_img);
1239 for (i = 0; frame->data[i]; i++) {
1240 if (i == plane_count) {
1241 CVPixelBufferUnlockBaseAddress(cv_img, 0);
1244 "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
1247 return AVERROR_EXTERNAL;
1250 dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
1251 src_addr = (uint8_t*)frame->data[i];
1252 dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
1253 src_stride = plane_strides[i];
1254 rows = plane_rows[i];
1256 if (dst_stride == src_stride) {
1257 memcpy(dst_addr, src_addr, src_stride * rows);
1259 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1261 for (j = 0; j < rows; j++) {
1262 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1267 if (frame->data[1]) {
1268 CVPixelBufferUnlockBaseAddress(cv_img, 0);
1271 "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
1274 return AVERROR_EXTERNAL;
1277 dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
1278 src_addr = (uint8_t*)frame->data[0];
1279 dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
1280 src_stride = plane_strides[0];
1281 rows = plane_rows[0];
1283 if (dst_stride == src_stride) {
1284 memcpy(dst_addr, src_addr, src_stride * rows);
1286 copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
1288 for (j = 0; j < rows; j++) {
1289 memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
1294 status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
1296 av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
1297 return AVERROR_EXTERNAL;
1302 #endif //!TARGET_OS_IPHONE
1304 static int create_cv_pixel_buffer(AVCodecContext *avctx,
1305 const AVFrame *frame,
1306 CVPixelBufferRef *cv_img)
1310 size_t widths [AV_NUM_DATA_POINTERS];
1311 size_t heights[AV_NUM_DATA_POINTERS];
1312 size_t strides[AV_NUM_DATA_POINTERS];
1314 size_t contiguous_buf_size;
1315 CVPixelBufferPoolRef pix_buf_pool;
1316 VTEncContext* vtctx = avctx->priv_data;
1319 if (avctx->pix_fmt == AV_PIX_FMT_VIDEOTOOLBOX) {
1320 av_assert0(frame->format == AV_PIX_FMT_VIDEOTOOLBOX);
1322 *cv_img = (CVPixelBufferRef)frame->data[3];
1323 av_assert0(*cv_img);
1329 memset(widths, 0, sizeof(widths));
1330 memset(heights, 0, sizeof(heights));
1331 memset(strides, 0, sizeof(strides));
1333 status = get_cv_pixel_info(
1341 &contiguous_buf_size
1348 "Error: Cannot convert format %d color_range %d: %d\n",
1350 av_frame_get_color_range(frame),
1354 return AVERROR_EXTERNAL;
1357 #if TARGET_OS_IPHONE
1358 pix_buf_pool = VTCompressionSessionGetPixelBufferPool(vtctx->session);
1359 if (!pix_buf_pool) {
1360 av_log(avctx, AV_LOG_ERROR, "Could not get pixel buffer pool.\n");
1361 return AVERROR_EXTERNAL;
1364 status = CVPixelBufferPoolCreatePixelBuffer(NULL,
1370 av_log(avctx, AV_LOG_ERROR, "Could not create pixel buffer from pool: %d.\n", status);
1371 return AVERROR_EXTERNAL;
1374 status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
1381 AVFrame *enc_frame = av_frame_alloc();
1382 if (!enc_frame) return AVERROR(ENOMEM);
1384 status = av_frame_ref(enc_frame, frame);
1386 av_frame_free(&enc_frame);
1390 status = CVPixelBufferCreateWithPlanarBytes(
1391 kCFAllocatorDefault,
1396 contiguous_buf_size,
1398 (void **)enc_frame->data,
1409 av_log(avctx, AV_LOG_ERROR, "Error: Could not create CVPixelBuffer: %d\n", status);
1410 return AVERROR_EXTERNAL;
1417 static int vtenc_send_frame(AVCodecContext *avctx,
1418 VTEncContext *vtctx,
1419 const AVFrame *frame)
1422 CVPixelBufferRef cv_img = NULL;
1423 int status = create_cv_pixel_buffer(avctx, frame, &cv_img);
1425 if (status) return status;
1427 time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
1428 status = VTCompressionSessionEncodeFrame(
1441 av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
1442 return AVERROR_EXTERNAL;
1448 static av_cold int vtenc_frame(
1449 AVCodecContext *avctx,
1451 const AVFrame *frame,
1454 VTEncContext *vtctx = avctx->priv_data;
1457 CMSampleBufferRef buf = NULL;
1460 status = vtenc_send_frame(avctx, vtctx, frame);
1463 status = AVERROR_EXTERNAL;
1467 if (vtctx->frame_ct_in == 0) {
1468 vtctx->first_pts = frame->pts;
1469 } else if(vtctx->frame_ct_in == 1 && vtctx->has_b_frames) {
1470 vtctx->dts_delta = frame->pts - vtctx->first_pts;
1473 vtctx->frame_ct_in++;
1474 } else if(!vtctx->flushing) {
1475 vtctx->flushing = true;
1477 status = VTCompressionSessionCompleteFrames(vtctx->session,
1481 av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
1482 status = AVERROR_EXTERNAL;
1488 get_frame = vtctx->dts_delta >= 0 || !frame;
1494 status = vtenc_q_pop(vtctx, !frame, &buf);
1495 if (status) goto end_nopkt;
1496 if (!buf) goto end_nopkt;
1498 status = vtenc_cm_to_avpacket(avctx, buf, pkt);
1500 if (status) goto end_nopkt;
1506 av_packet_unref(pkt);
1510 static av_cold int vtenc_close(AVCodecContext *avctx)
1512 VTEncContext *vtctx = avctx->priv_data;
1514 if(!vtctx->session) return 0;
1516 pthread_cond_destroy(&vtctx->cv_sample_sent);
1517 pthread_mutex_destroy(&vtctx->lock);
1518 CFRelease(vtctx->session);
1519 vtctx->session = NULL;
1524 static const enum AVPixelFormat pix_fmts[] = {
1525 AV_PIX_FMT_VIDEOTOOLBOX,
1531 #define OFFSET(x) offsetof(VTEncContext, x)
1532 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1533 static const AVOption options[] = {
1534 { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
1535 { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
1536 { "main", "Main Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN }, INT_MIN, INT_MAX, VE, "profile" },
1537 { "high", "High Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH }, INT_MIN, INT_MAX, VE, "profile" },
1539 { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },
1540 { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },
1541 { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },
1542 { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },
1543 { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },
1544 { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },
1545 { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },
1546 { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },
1547 { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },
1548 { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
1549 { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
1551 { "allow_sw", "Allow software encoding", OFFSET(allow_sw), AV_OPT_TYPE_BOOL,
1552 { .i64 = 0 }, 0, 1, VE },
1554 { "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },
1555 { "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
1556 { "vlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },
1557 { "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
1558 { "ac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },
1560 { "realtime", "Hint that encoding should happen in real-time if not faster (e.g. capturing from camera).",
1561 OFFSET(realtime), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
1563 { "frames_before", "Other frames will come before the frames in this session. This helps smooth concatenation issues.",
1564 OFFSET(frames_before), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
1565 { "frames_after", "Other frames will come after the frames in this session. This helps smooth concatenation issues.",
1566 OFFSET(frames_after), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
1571 static const AVClass h264_videotoolbox_class = {
1572 .class_name = "h264_videotoolbox",
1573 .item_name = av_default_item_name,
1575 .version = LIBAVUTIL_VERSION_INT,
1578 AVCodec ff_h264_videotoolbox_encoder = {
1579 .name = "h264_videotoolbox",
1580 .long_name = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),
1581 .type = AVMEDIA_TYPE_VIDEO,
1582 .id = AV_CODEC_ID_H264,
1583 .priv_data_size = sizeof(VTEncContext),
1584 .pix_fmts = pix_fmts,
1586 .encode2 = vtenc_frame,
1587 .close = vtenc_close,
1588 .capabilities = AV_CODEC_CAP_DELAY,
1589 .priv_class = &h264_videotoolbox_class,
1590 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE |
1591 FF_CODEC_CAP_INIT_CLEANUP,