]> git.sesse.net Git - ffmpeg/blob - libavcodec/videotoolbox.c
avcodec/videotoolbox: fix opaque_ref handling
[ffmpeg] / libavcodec / videotoolbox.c
1 /*
2  * Videotoolbox hardware acceleration
3  *
4  * copyright (c) 2012 Sebastien Zwickert
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22
23 #include "config.h"
24 #include "videotoolbox.h"
25 #include "libavutil/hwcontext_videotoolbox.h"
26 #include "vt_internal.h"
27 #include "libavutil/avutil.h"
28 #include "libavutil/hwcontext.h"
29 #include "bytestream.h"
30 #include "decode.h"
31 #include "h264dec.h"
32 #include "hevcdec.h"
33 #include "mpegvideo.h"
34 #include <TargetConditionals.h>
35
36 #ifndef kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder
37 #  define kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder CFSTR("RequireHardwareAcceleratedVideoDecoder")
38 #endif
39
40 #if !HAVE_KCMVIDEOCODECTYPE_HEVC
41 enum { kCMVideoCodecType_HEVC = 'hvc1' };
42 #endif
43
44 #define VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING  12
45
46 static void videotoolbox_buffer_release(void *opaque, uint8_t *data)
47 {
48     CVPixelBufferRef cv_buffer = (CVImageBufferRef)data;
49     CVPixelBufferRelease(cv_buffer);
50 }
51
52 static int videotoolbox_buffer_copy(VTContext *vtctx,
53                                     const uint8_t *buffer,
54                                     uint32_t size)
55 {
56     void *tmp;
57
58     tmp = av_fast_realloc(vtctx->bitstream,
59                          &vtctx->allocated_size,
60                          size);
61
62     if (!tmp)
63         return AVERROR(ENOMEM);
64
65     vtctx->bitstream = tmp;
66     memcpy(vtctx->bitstream, buffer, size);
67     vtctx->bitstream_size = size;
68
69     return 0;
70 }
71
72 int ff_videotoolbox_alloc_frame(AVCodecContext *avctx, AVFrame *frame)
73 {
74     int ret = ff_attach_decode_data(frame);
75     if (ret < 0)
76         return ret;
77
78     frame->width  = avctx->width;
79     frame->height = avctx->height;
80     frame->format = avctx->pix_fmt;
81     frame->buf[0] = av_buffer_alloc(1);
82
83     if (!frame->buf[0])
84         return AVERROR(ENOMEM);
85
86     return 0;
87 }
88
89 #define AV_W8(p, v) *(p) = (v)
90
91 CFDataRef ff_videotoolbox_avcc_extradata_create(AVCodecContext *avctx)
92 {
93     H264Context *h = avctx->priv_data;
94     CFDataRef data = NULL;
95     uint8_t *p;
96     int vt_extradata_size = 6 + 2 + h->ps.sps->data_size + 3 + h->ps.pps->data_size;
97     uint8_t *vt_extradata = av_malloc(vt_extradata_size);
98     if (!vt_extradata)
99         return NULL;
100
101     p = vt_extradata;
102
103     AV_W8(p + 0, 1); /* version */
104     AV_W8(p + 1, h->ps.sps->data[1]); /* profile */
105     AV_W8(p + 2, h->ps.sps->data[2]); /* profile compat */
106     AV_W8(p + 3, h->ps.sps->data[3]); /* level */
107     AV_W8(p + 4, 0xff); /* 6 bits reserved (111111) + 2 bits nal size length - 3 (11) */
108     AV_W8(p + 5, 0xe1); /* 3 bits reserved (111) + 5 bits number of sps (00001) */
109     AV_WB16(p + 6, h->ps.sps->data_size);
110     memcpy(p + 8, h->ps.sps->data, h->ps.sps->data_size);
111     p += 8 + h->ps.sps->data_size;
112     AV_W8(p + 0, 1); /* number of pps */
113     AV_WB16(p + 1, h->ps.pps->data_size);
114     memcpy(p + 3, h->ps.pps->data, h->ps.pps->data_size);
115
116     p += 3 + h->ps.pps->data_size;
117     av_assert0(p - vt_extradata == vt_extradata_size);
118
119     data = CFDataCreate(kCFAllocatorDefault, vt_extradata, vt_extradata_size);
120     av_free(vt_extradata);
121     return data;
122 }
123
124 CFDataRef ff_videotoolbox_hvcc_extradata_create(AVCodecContext *avctx)
125 {
126     HEVCContext *h = avctx->priv_data;
127     const HEVCVPS *vps = (const HEVCVPS *)h->ps.vps_list[0]->data;
128     const HEVCSPS *sps = (const HEVCSPS *)h->ps.sps_list[0]->data;
129     int i, num_pps = 0;
130     const HEVCPPS *pps = h->ps.pps;
131     PTLCommon ptlc = vps->ptl.general_ptl;
132     VUI vui = sps->vui;
133     uint8_t parallelismType;
134     CFDataRef data = NULL;
135     uint8_t *p;
136     int vt_extradata_size = 23 + 5 + vps->data_size + 5 + sps->data_size + 3;
137     uint8_t *vt_extradata;
138
139     for (i = 0; i < HEVC_MAX_PPS_COUNT; i++) {
140         if (h->ps.pps_list[i]) {
141             const HEVCPPS *pps = (const HEVCPPS *)h->ps.pps_list[i]->data;
142             vt_extradata_size += 2 + pps->data_size;
143             num_pps++;
144         }
145     }
146
147     vt_extradata = av_malloc(vt_extradata_size);
148     if (!vt_extradata)
149         return NULL;
150     p = vt_extradata;
151
152     /* unsigned int(8) configurationVersion = 1; */
153     AV_W8(p + 0, 1);
154
155     /*
156      * unsigned int(2) general_profile_space;
157      * unsigned int(1) general_tier_flag;
158      * unsigned int(5) general_profile_idc;
159      */
160     AV_W8(p + 1, ptlc.profile_space << 6 |
161                  ptlc.tier_flag     << 5 |
162                  ptlc.profile_idc);
163
164     /* unsigned int(32) general_profile_compatibility_flags; */
165     memcpy(p + 2, ptlc.profile_compatibility_flag, 4);
166
167     /* unsigned int(48) general_constraint_indicator_flags; */
168     AV_W8(p + 6, ptlc.progressive_source_flag    << 7 |
169                  ptlc.interlaced_source_flag     << 6 |
170                  ptlc.non_packed_constraint_flag << 5 |
171                  ptlc.frame_only_constraint_flag << 4);
172     AV_W8(p + 7, 0);
173     AV_WN32(p + 8, 0);
174
175     /* unsigned int(8) general_level_idc; */
176     AV_W8(p + 12, ptlc.level_idc);
177
178     /*
179      * bit(4) reserved = ‘1111’b;
180      * unsigned int(12) min_spatial_segmentation_idc;
181      */
182     AV_W8(p + 13, 0xf0 | (vui.min_spatial_segmentation_idc >> 4));
183     AV_W8(p + 14, vui.min_spatial_segmentation_idc & 0xff);
184
185     /*
186      * bit(6) reserved = ‘111111’b;
187      * unsigned int(2) parallelismType;
188      */
189     if (!vui.min_spatial_segmentation_idc)
190         parallelismType = 0;
191     else if (pps->entropy_coding_sync_enabled_flag && pps->tiles_enabled_flag)
192         parallelismType = 0;
193     else if (pps->entropy_coding_sync_enabled_flag)
194         parallelismType = 3;
195     else if (pps->tiles_enabled_flag)
196         parallelismType = 2;
197     else
198         parallelismType = 1;
199     AV_W8(p + 15, 0xfc | parallelismType);
200
201     /*
202      * bit(6) reserved = ‘111111’b;
203      * unsigned int(2) chromaFormat;
204      */
205     AV_W8(p + 16, sps->chroma_format_idc | 0xfc);
206
207     /*
208      * bit(5) reserved = ‘11111’b;
209      * unsigned int(3) bitDepthLumaMinus8;
210      */
211     AV_W8(p + 17, (sps->bit_depth - 8) | 0xfc);
212
213     /*
214      * bit(5) reserved = ‘11111’b;
215      * unsigned int(3) bitDepthChromaMinus8;
216      */
217     AV_W8(p + 18, (sps->bit_depth_chroma - 8) | 0xfc);
218
219     /* bit(16) avgFrameRate; */
220     AV_WB16(p + 19, 0);
221
222     /*
223      * bit(2) constantFrameRate;
224      * bit(3) numTemporalLayers;
225      * bit(1) temporalIdNested;
226      * unsigned int(2) lengthSizeMinusOne;
227      */
228     AV_W8(p + 21, 0                             << 6 |
229                   sps->max_sub_layers           << 3 |
230                   sps->temporal_id_nesting_flag << 2 |
231                   3);
232
233     /* unsigned int(8) numOfArrays; */
234     AV_W8(p + 22, 3);
235
236     p += 23;
237     /* vps */
238     /*
239      * bit(1) array_completeness;
240      * unsigned int(1) reserved = 0;
241      * unsigned int(6) NAL_unit_type;
242      */
243     AV_W8(p, 1 << 7 |
244              HEVC_NAL_VPS & 0x3f);
245     /* unsigned int(16) numNalus; */
246     AV_WB16(p + 1, 1);
247     /* unsigned int(16) nalUnitLength; */
248     AV_WB16(p + 3, vps->data_size);
249     /* bit(8*nalUnitLength) nalUnit; */
250     memcpy(p + 5, vps->data, vps->data_size);
251     p += 5 + vps->data_size;
252
253     /* sps */
254     AV_W8(p, 1 << 7 |
255              HEVC_NAL_SPS & 0x3f);
256     AV_WB16(p + 1, 1);
257     AV_WB16(p + 3, sps->data_size);
258     memcpy(p + 5, sps->data, sps->data_size);
259     p += 5 + sps->data_size;
260
261     /* pps */
262     AV_W8(p, 1 << 7 |
263              HEVC_NAL_PPS & 0x3f);
264     AV_WB16(p + 1, num_pps);
265     p += 3;
266     for (i = 0; i < HEVC_MAX_PPS_COUNT; i++) {
267         if (h->ps.pps_list[i]) {
268             const HEVCPPS *pps = (const HEVCPPS *)h->ps.pps_list[i]->data;
269             AV_WB16(p, pps->data_size);
270             memcpy(p + 2, pps->data, pps->data_size);
271             p += 2 + pps->data_size;
272         }
273     }
274
275     av_assert0(p - vt_extradata == vt_extradata_size);
276
277     data = CFDataCreate(kCFAllocatorDefault, vt_extradata, vt_extradata_size);
278     av_free(vt_extradata);
279     return data;
280 }
281
282 int ff_videotoolbox_buffer_create(VTContext *vtctx, AVFrame *frame)
283 {
284     av_buffer_unref(&frame->buf[0]);
285
286     frame->buf[0] = av_buffer_create((uint8_t*)vtctx->frame,
287                                      sizeof(vtctx->frame),
288                                      videotoolbox_buffer_release,
289                                      NULL,
290                                      AV_BUFFER_FLAG_READONLY);
291     if (!frame->buf[0]) {
292         return AVERROR(ENOMEM);
293     }
294
295     frame->data[3] = (uint8_t*)vtctx->frame;
296     vtctx->frame = NULL;
297
298     return 0;
299 }
300
301 int ff_videotoolbox_h264_start_frame(AVCodecContext *avctx,
302                                      const uint8_t *buffer,
303                                      uint32_t size)
304 {
305     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
306     H264Context *h  = avctx->priv_data;
307
308     if (h->is_avc == 1) {
309         return videotoolbox_buffer_copy(vtctx, buffer, size);
310     }
311
312     return 0;
313 }
314
315 static int videotoolbox_h264_decode_params(AVCodecContext *avctx,
316                                            int type,
317                                            const uint8_t *buffer,
318                                            uint32_t size)
319 {
320     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
321
322     if (type == H264_NAL_SPS) {
323         if (!vtctx->sps || vtctx->sps_len != size || memcmp(buffer, vtctx->sps, size) != 0) {
324             vtctx->sps = av_fast_realloc(vtctx->sps, &vtctx->sps_capa, size);
325             if (vtctx->sps)
326                 memcpy(vtctx->sps, buffer, size);
327             vtctx->reconfig_needed = true;
328             vtctx->sps_len = size;
329         }
330     }
331
332     // pass-through new PPS to the decoder
333     return ff_videotoolbox_h264_decode_slice(avctx, buffer, size);
334 }
335
336 int ff_videotoolbox_h264_decode_slice(AVCodecContext *avctx,
337                                       const uint8_t *buffer,
338                                       uint32_t size)
339 {
340     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
341     H264Context *h  = avctx->priv_data;
342     void *tmp;
343
344     if (h->is_avc == 1)
345         return 0;
346
347     tmp = av_fast_realloc(vtctx->bitstream,
348                           &vtctx->allocated_size,
349                           vtctx->bitstream_size+size+4);
350     if (!tmp)
351         return AVERROR(ENOMEM);
352
353     vtctx->bitstream = tmp;
354
355     AV_WB32(vtctx->bitstream + vtctx->bitstream_size, size);
356     memcpy(vtctx->bitstream + vtctx->bitstream_size + 4, buffer, size);
357
358     vtctx->bitstream_size += size + 4;
359
360     return 0;
361 }
362
363 int ff_videotoolbox_uninit(AVCodecContext *avctx)
364 {
365     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
366     if (vtctx) {
367         av_freep(&vtctx->bitstream);
368         av_freep(&vtctx->sps);
369         if (vtctx->frame)
370             CVPixelBufferRelease(vtctx->frame);
371     }
372
373     return 0;
374 }
375
376 #if CONFIG_VIDEOTOOLBOX
377 // Return the AVVideotoolboxContext that matters currently. Where it comes from
378 // depends on the API used.
379 static AVVideotoolboxContext *videotoolbox_get_context(AVCodecContext *avctx)
380 {
381     // Somewhat tricky because the user can call av_videotoolbox_default_free()
382     // at any time, even when the codec is closed.
383     if (avctx->internal && avctx->internal->hwaccel_priv_data) {
384         VTContext *vtctx = avctx->internal->hwaccel_priv_data;
385         if (vtctx->vt_ctx)
386             return vtctx->vt_ctx;
387     }
388     return avctx->hwaccel_context;
389 }
390
391 static int videotoolbox_buffer_create(AVCodecContext *avctx, AVFrame *frame)
392 {
393     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
394     CVPixelBufferRef pixbuf = (CVPixelBufferRef)vtctx->frame;
395     OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
396     enum AVPixelFormat sw_format = av_map_videotoolbox_format_to_pixfmt(pixel_format);
397     int width = CVPixelBufferGetWidth(pixbuf);
398     int height = CVPixelBufferGetHeight(pixbuf);
399     AVHWFramesContext *cached_frames;
400     int ret;
401
402     ret = ff_videotoolbox_buffer_create(vtctx, frame);
403     if (ret < 0)
404         return ret;
405
406     // Old API code path.
407     if (!vtctx->cached_hw_frames_ctx)
408         return 0;
409
410     cached_frames = (AVHWFramesContext*)vtctx->cached_hw_frames_ctx->data;
411
412     if (cached_frames->sw_format != sw_format ||
413         cached_frames->width != width ||
414         cached_frames->height != height) {
415         AVBufferRef *hw_frames_ctx = av_hwframe_ctx_alloc(cached_frames->device_ref);
416         AVHWFramesContext *hw_frames;
417         if (!hw_frames_ctx)
418             return AVERROR(ENOMEM);
419
420         hw_frames = (AVHWFramesContext*)hw_frames_ctx->data;
421         hw_frames->format = cached_frames->format;
422         hw_frames->sw_format = sw_format;
423         hw_frames->width = width;
424         hw_frames->height = height;
425
426         ret = av_hwframe_ctx_init(hw_frames_ctx);
427         if (ret < 0) {
428             av_buffer_unref(&hw_frames_ctx);
429             return ret;
430         }
431
432         av_buffer_unref(&vtctx->cached_hw_frames_ctx);
433         vtctx->cached_hw_frames_ctx = hw_frames_ctx;
434     }
435
436     av_buffer_unref(&frame->hw_frames_ctx);
437     frame->hw_frames_ctx = av_buffer_ref(vtctx->cached_hw_frames_ctx);
438     if (!frame->hw_frames_ctx)
439         return AVERROR(ENOMEM);
440
441     return 0;
442 }
443
444 static void videotoolbox_write_mp4_descr_length(PutByteContext *pb, int length)
445 {
446     int i;
447     uint8_t b;
448
449     for (i = 3; i >= 0; i--) {
450         b = (length >> (i * 7)) & 0x7F;
451         if (i != 0)
452             b |= 0x80;
453
454         bytestream2_put_byteu(pb, b);
455     }
456 }
457
458 static CFDataRef videotoolbox_esds_extradata_create(AVCodecContext *avctx)
459 {
460     CFDataRef data;
461     uint8_t *rw_extradata;
462     PutByteContext pb;
463     int full_size = 3 + 5 + 13 + 5 + avctx->extradata_size + 3;
464     // ES_DescrTag data + DecoderConfigDescrTag + data + DecSpecificInfoTag + size + SLConfigDescriptor
465     int config_size = 13 + 5 + avctx->extradata_size;
466     int s;
467
468     if (!(rw_extradata = av_mallocz(full_size + VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING)))
469         return NULL;
470
471     bytestream2_init_writer(&pb, rw_extradata, full_size + VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING);
472     bytestream2_put_byteu(&pb, 0);        // version
473     bytestream2_put_ne24(&pb, 0);         // flags
474
475     // elementary stream descriptor
476     bytestream2_put_byteu(&pb, 0x03);     // ES_DescrTag
477     videotoolbox_write_mp4_descr_length(&pb, full_size);
478     bytestream2_put_ne16(&pb, 0);         // esid
479     bytestream2_put_byteu(&pb, 0);        // stream priority (0-32)
480
481     // decoder configuration descriptor
482     bytestream2_put_byteu(&pb, 0x04);     // DecoderConfigDescrTag
483     videotoolbox_write_mp4_descr_length(&pb, config_size);
484     bytestream2_put_byteu(&pb, 32);       // object type indication. 32 = AV_CODEC_ID_MPEG4
485     bytestream2_put_byteu(&pb, 0x11);     // stream type
486     bytestream2_put_ne24(&pb, 0);         // buffer size
487     bytestream2_put_ne32(&pb, 0);         // max bitrate
488     bytestream2_put_ne32(&pb, 0);         // avg bitrate
489
490     // decoder specific descriptor
491     bytestream2_put_byteu(&pb, 0x05);     ///< DecSpecificInfoTag
492     videotoolbox_write_mp4_descr_length(&pb, avctx->extradata_size);
493
494     bytestream2_put_buffer(&pb, avctx->extradata, avctx->extradata_size);
495
496     // SLConfigDescriptor
497     bytestream2_put_byteu(&pb, 0x06);     // SLConfigDescrTag
498     bytestream2_put_byteu(&pb, 0x01);     // length
499     bytestream2_put_byteu(&pb, 0x02);     //
500
501     s = bytestream2_size_p(&pb);
502
503     data = CFDataCreate(kCFAllocatorDefault, rw_extradata, s);
504
505     av_freep(&rw_extradata);
506     return data;
507 }
508
509 static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRef fmt_desc,
510                                                            void *buffer,
511                                                            int size)
512 {
513     OSStatus status;
514     CMBlockBufferRef  block_buf;
515     CMSampleBufferRef sample_buf;
516
517     block_buf  = NULL;
518     sample_buf = NULL;
519
520     status = CMBlockBufferCreateWithMemoryBlock(kCFAllocatorDefault,// structureAllocator
521                                                 buffer,             // memoryBlock
522                                                 size,               // blockLength
523                                                 kCFAllocatorNull,   // blockAllocator
524                                                 NULL,               // customBlockSource
525                                                 0,                  // offsetToData
526                                                 size,               // dataLength
527                                                 0,                  // flags
528                                                 &block_buf);
529
530     if (!status) {
531         status = CMSampleBufferCreate(kCFAllocatorDefault,  // allocator
532                                       block_buf,            // dataBuffer
533                                       TRUE,                 // dataReady
534                                       0,                    // makeDataReadyCallback
535                                       0,                    // makeDataReadyRefcon
536                                       fmt_desc,             // formatDescription
537                                       1,                    // numSamples
538                                       0,                    // numSampleTimingEntries
539                                       NULL,                 // sampleTimingArray
540                                       0,                    // numSampleSizeEntries
541                                       NULL,                 // sampleSizeArray
542                                       &sample_buf);
543     }
544
545     if (block_buf)
546         CFRelease(block_buf);
547
548     return sample_buf;
549 }
550
551 static void videotoolbox_decoder_callback(void *opaque,
552                                           void *sourceFrameRefCon,
553                                           OSStatus status,
554                                           VTDecodeInfoFlags flags,
555                                           CVImageBufferRef image_buffer,
556                                           CMTime pts,
557                                           CMTime duration)
558 {
559     AVCodecContext *avctx = opaque;
560     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
561
562     if (vtctx->frame) {
563         CVPixelBufferRelease(vtctx->frame);
564         vtctx->frame = NULL;
565     }
566
567     if (!image_buffer) {
568         av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
569         return;
570     }
571
572     vtctx->frame = CVPixelBufferRetain(image_buffer);
573 }
574
575 static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx)
576 {
577     OSStatus status;
578     CMSampleBufferRef sample_buf;
579     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
580     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
581
582     sample_buf = videotoolbox_sample_buffer_create(videotoolbox->cm_fmt_desc,
583                                                    vtctx->bitstream,
584                                                    vtctx->bitstream_size);
585
586     if (!sample_buf)
587         return -1;
588
589     status = VTDecompressionSessionDecodeFrame(videotoolbox->session,
590                                                sample_buf,
591                                                0,       // decodeFlags
592                                                NULL,    // sourceFrameRefCon
593                                                0);      // infoFlagsOut
594     if (status == noErr)
595         status = VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
596
597     CFRelease(sample_buf);
598
599     return status;
600 }
601
602 static CMVideoFormatDescriptionRef videotoolbox_format_desc_create(CMVideoCodecType codec_type,
603                                                                    CFDictionaryRef decoder_spec,
604                                                                    int width,
605                                                                    int height)
606 {
607     CMFormatDescriptionRef cm_fmt_desc;
608     OSStatus status;
609
610     status = CMVideoFormatDescriptionCreate(kCFAllocatorDefault,
611                                             codec_type,
612                                             width,
613                                             height,
614                                             decoder_spec, // Dictionary of extension
615                                             &cm_fmt_desc);
616
617     if (status)
618         return NULL;
619
620     return cm_fmt_desc;
621 }
622
623 static CFDictionaryRef videotoolbox_buffer_attributes_create(int width,
624                                                              int height,
625                                                              OSType pix_fmt)
626 {
627     CFMutableDictionaryRef buffer_attributes;
628     CFMutableDictionaryRef io_surface_properties;
629     CFNumberRef cv_pix_fmt;
630     CFNumberRef w;
631     CFNumberRef h;
632
633     w = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &width);
634     h = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &height);
635     cv_pix_fmt = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt);
636
637     buffer_attributes = CFDictionaryCreateMutable(kCFAllocatorDefault,
638                                                   4,
639                                                   &kCFTypeDictionaryKeyCallBacks,
640                                                   &kCFTypeDictionaryValueCallBacks);
641     io_surface_properties = CFDictionaryCreateMutable(kCFAllocatorDefault,
642                                                       0,
643                                                       &kCFTypeDictionaryKeyCallBacks,
644                                                       &kCFTypeDictionaryValueCallBacks);
645
646     if (pix_fmt)
647         CFDictionarySetValue(buffer_attributes, kCVPixelBufferPixelFormatTypeKey, cv_pix_fmt);
648     CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfacePropertiesKey, io_surface_properties);
649     CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
650     CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
651 #if TARGET_OS_IPHONE
652     CFDictionarySetValue(buffer_attributes, kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
653 #else
654     CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey, kCFBooleanTrue);
655 #endif
656
657     CFRelease(io_surface_properties);
658     CFRelease(cv_pix_fmt);
659     CFRelease(w);
660     CFRelease(h);
661
662     return buffer_attributes;
663 }
664
665 static CFDictionaryRef videotoolbox_decoder_config_create(CMVideoCodecType codec_type,
666                                                           AVCodecContext *avctx)
667 {
668     CFMutableDictionaryRef config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
669                                                                    0,
670                                                                    &kCFTypeDictionaryKeyCallBacks,
671                                                                    &kCFTypeDictionaryValueCallBacks);
672
673     CFDictionarySetValue(config_info,
674                          kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder,
675                          kCFBooleanTrue);
676
677     CFMutableDictionaryRef avc_info;
678     CFDataRef data = NULL;
679
680     avc_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
681                                          1,
682                                          &kCFTypeDictionaryKeyCallBacks,
683                                          &kCFTypeDictionaryValueCallBacks);
684
685     switch (codec_type) {
686     case kCMVideoCodecType_MPEG4Video :
687         if (avctx->extradata_size)
688             data = videotoolbox_esds_extradata_create(avctx);
689         if (data)
690             CFDictionarySetValue(avc_info, CFSTR("esds"), data);
691         break;
692     case kCMVideoCodecType_H264 :
693         data = ff_videotoolbox_avcc_extradata_create(avctx);
694         if (data)
695             CFDictionarySetValue(avc_info, CFSTR("avcC"), data);
696         break;
697     case kCMVideoCodecType_HEVC :
698         data = ff_videotoolbox_hvcc_extradata_create(avctx);
699         if (data)
700             CFDictionarySetValue(avc_info, CFSTR("hvcC"), data);
701         break;
702     default:
703         break;
704     }
705
706     CFDictionarySetValue(config_info,
707             kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms,
708             avc_info);
709
710     if (data)
711         CFRelease(data);
712
713     CFRelease(avc_info);
714     return config_info;
715 }
716
717 static int videotoolbox_start(AVCodecContext *avctx)
718 {
719     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
720     OSStatus status;
721     VTDecompressionOutputCallbackRecord decoder_cb;
722     CFDictionaryRef decoder_spec;
723     CFDictionaryRef buf_attr;
724
725     if (!videotoolbox) {
726         av_log(avctx, AV_LOG_ERROR, "hwaccel context is not set\n");
727         return -1;
728     }
729
730     switch( avctx->codec_id ) {
731     case AV_CODEC_ID_H263 :
732         videotoolbox->cm_codec_type = kCMVideoCodecType_H263;
733         break;
734     case AV_CODEC_ID_H264 :
735         videotoolbox->cm_codec_type = kCMVideoCodecType_H264;
736         break;
737     case AV_CODEC_ID_HEVC :
738         videotoolbox->cm_codec_type = kCMVideoCodecType_HEVC;
739         break;
740     case AV_CODEC_ID_MPEG1VIDEO :
741         videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG1Video;
742         break;
743     case AV_CODEC_ID_MPEG2VIDEO :
744         videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG2Video;
745         break;
746     case AV_CODEC_ID_MPEG4 :
747         videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG4Video;
748         break;
749     default :
750         break;
751     }
752
753     decoder_spec = videotoolbox_decoder_config_create(videotoolbox->cm_codec_type, avctx);
754
755     if (!decoder_spec) {
756         av_log(avctx, AV_LOG_ERROR, "decoder specification creation failed\n");
757         return -1;
758     }
759
760     videotoolbox->cm_fmt_desc = videotoolbox_format_desc_create(videotoolbox->cm_codec_type,
761                                                                 decoder_spec,
762                                                                 avctx->width,
763                                                                 avctx->height);
764     if (!videotoolbox->cm_fmt_desc) {
765         if (decoder_spec)
766             CFRelease(decoder_spec);
767
768         av_log(avctx, AV_LOG_ERROR, "format description creation failed\n");
769         return -1;
770     }
771
772     buf_attr = videotoolbox_buffer_attributes_create(avctx->width,
773                                                      avctx->height,
774                                                      videotoolbox->cv_pix_fmt_type);
775
776     decoder_cb.decompressionOutputCallback = videotoolbox_decoder_callback;
777     decoder_cb.decompressionOutputRefCon   = avctx;
778
779     status = VTDecompressionSessionCreate(NULL,                      // allocator
780                                           videotoolbox->cm_fmt_desc, // videoFormatDescription
781                                           decoder_spec,              // videoDecoderSpecification
782                                           buf_attr,                  // destinationImageBufferAttributes
783                                           &decoder_cb,               // outputCallback
784                                           &videotoolbox->session);   // decompressionSessionOut
785
786     if (decoder_spec)
787         CFRelease(decoder_spec);
788     if (buf_attr)
789         CFRelease(buf_attr);
790
791     switch (status) {
792     case kVTVideoDecoderNotAvailableNowErr:
793         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox session not available.\n");
794         return AVERROR(ENOSYS);
795     case kVTVideoDecoderUnsupportedDataFormatErr:
796         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox does not support this format.\n");
797         return AVERROR(ENOSYS);
798     case kVTVideoDecoderMalfunctionErr:
799         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox malfunction.\n");
800         return AVERROR(EINVAL);
801     case kVTVideoDecoderBadDataErr:
802         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox reported invalid data.\n");
803         return AVERROR_INVALIDDATA;
804     case 0:
805         return 0;
806     default:
807         av_log(avctx, AV_LOG_VERBOSE, "Unknown VideoToolbox session creation error %u\n", (unsigned)status);
808         return AVERROR_UNKNOWN;
809     }
810 }
811
812 static void videotoolbox_stop(AVCodecContext *avctx)
813 {
814     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
815     if (!videotoolbox)
816         return;
817
818     if (videotoolbox->cm_fmt_desc) {
819         CFRelease(videotoolbox->cm_fmt_desc);
820         videotoolbox->cm_fmt_desc = NULL;
821     }
822
823     if (videotoolbox->session) {
824         VTDecompressionSessionInvalidate(videotoolbox->session);
825         CFRelease(videotoolbox->session);
826         videotoolbox->session = NULL;
827     }
828 }
829
830 static const char *videotoolbox_error_string(OSStatus status)
831 {
832     switch (status) {
833         case kVTVideoDecoderBadDataErr:
834             return "bad data";
835         case kVTVideoDecoderMalfunctionErr:
836             return "decoder malfunction";
837         case kVTInvalidSessionErr:
838             return "invalid session";
839     }
840     return "unknown";
841 }
842
843 static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
844 {
845     OSStatus status;
846     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
847     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
848
849     if (vtctx->reconfig_needed == true) {
850         vtctx->reconfig_needed = false;
851         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox decoder needs reconfig, restarting..\n");
852         videotoolbox_stop(avctx);
853         if (videotoolbox_start(avctx) != 0) {
854             return AVERROR_EXTERNAL;
855         }
856     }
857
858     if (!videotoolbox->session || !vtctx->bitstream || !vtctx->bitstream_size)
859         return AVERROR_INVALIDDATA;
860
861     status = videotoolbox_session_decode_frame(avctx);
862     if (status != noErr) {
863         if (status == kVTVideoDecoderMalfunctionErr || status == kVTInvalidSessionErr)
864             vtctx->reconfig_needed = true;
865         av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%s, %d)\n", videotoolbox_error_string(status), (int)status);
866         return AVERROR_UNKNOWN;
867     }
868
869     if (!vtctx->frame) {
870         vtctx->reconfig_needed = true;
871         return AVERROR_UNKNOWN;
872     }
873
874     return videotoolbox_buffer_create(avctx, frame);
875 }
876
877 static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
878 {
879     H264Context *h = avctx->priv_data;
880     AVFrame *frame = h->cur_pic_ptr->f;
881     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
882     int ret = videotoolbox_common_end_frame(avctx, frame);
883     vtctx->bitstream_size = 0;
884     return ret;
885 }
886
887 static int videotoolbox_hevc_decode_params(AVCodecContext *avctx,
888                                            int type,
889                                            const uint8_t *buffer,
890                                            uint32_t size)
891 {
892     return ff_videotoolbox_h264_decode_slice(avctx, buffer, size);
893 }
894
895 static int videotoolbox_hevc_end_frame(AVCodecContext *avctx)
896 {
897     HEVCContext *h = avctx->priv_data;
898     AVFrame *frame = h->ref->frame;
899     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
900     int ret;
901
902     ret = videotoolbox_common_end_frame(avctx, frame);
903     vtctx->bitstream_size = 0;
904     return ret;
905 }
906
907 static int videotoolbox_mpeg_start_frame(AVCodecContext *avctx,
908                                          const uint8_t *buffer,
909                                          uint32_t size)
910 {
911     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
912
913     return videotoolbox_buffer_copy(vtctx, buffer, size);
914 }
915
916 static int videotoolbox_mpeg_decode_slice(AVCodecContext *avctx,
917                                           const uint8_t *buffer,
918                                           uint32_t size)
919 {
920     return 0;
921 }
922
923 static int videotoolbox_mpeg_end_frame(AVCodecContext *avctx)
924 {
925     MpegEncContext *s = avctx->priv_data;
926     AVFrame *frame = s->current_picture_ptr->f;
927
928     return videotoolbox_common_end_frame(avctx, frame);
929 }
930
931 static int videotoolbox_uninit(AVCodecContext *avctx)
932 {
933     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
934     if (!vtctx)
935         return 0;
936
937     ff_videotoolbox_uninit(avctx);
938
939     if (vtctx->vt_ctx)
940         videotoolbox_stop(avctx);
941
942     av_buffer_unref(&vtctx->cached_hw_frames_ctx);
943     av_freep(&vtctx->vt_ctx);
944
945     return 0;
946 }
947
948 static int videotoolbox_common_init(AVCodecContext *avctx)
949 {
950     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
951     AVHWFramesContext *hw_frames;
952     int err;
953
954     // Old API - do nothing.
955     if (avctx->hwaccel_context)
956         return 0;
957
958     if (!avctx->hw_frames_ctx && !avctx->hw_device_ctx) {
959         av_log(avctx, AV_LOG_ERROR,
960                "Either hw_frames_ctx or hw_device_ctx must be set.\n");
961         return AVERROR(EINVAL);
962     }
963
964     vtctx->vt_ctx = av_videotoolbox_alloc_context();
965     if (!vtctx->vt_ctx) {
966         err = AVERROR(ENOMEM);
967         goto fail;
968     }
969
970     if (avctx->hw_frames_ctx) {
971         hw_frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
972     } else {
973         avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx);
974         if (!avctx->hw_frames_ctx) {
975             err = AVERROR(ENOMEM);
976             goto fail;
977         }
978
979         hw_frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
980         hw_frames->format = AV_PIX_FMT_VIDEOTOOLBOX;
981         hw_frames->sw_format = AV_PIX_FMT_NV12; // same as av_videotoolbox_alloc_context()
982         hw_frames->width = avctx->width;
983         hw_frames->height = avctx->height;
984
985         err = av_hwframe_ctx_init(avctx->hw_frames_ctx);
986         if (err < 0) {
987             av_buffer_unref(&avctx->hw_frames_ctx);
988             goto fail;
989         }
990     }
991
992     vtctx->cached_hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
993     if (!vtctx->cached_hw_frames_ctx) {
994         err = AVERROR(ENOMEM);
995         goto fail;
996     }
997
998     vtctx->vt_ctx->cv_pix_fmt_type =
999         av_map_videotoolbox_format_from_pixfmt(hw_frames->sw_format);
1000     if (!vtctx->vt_ctx->cv_pix_fmt_type) {
1001         av_log(avctx, AV_LOG_ERROR, "Unknown sw_format.\n");
1002         err = AVERROR(EINVAL);
1003         goto fail;
1004     }
1005
1006     err = videotoolbox_start(avctx);
1007     if (err < 0)
1008         goto fail;
1009
1010     return 0;
1011
1012 fail:
1013     videotoolbox_uninit(avctx);
1014     return err;
1015 }
1016
1017 static int videotoolbox_frame_params(AVCodecContext *avctx,
1018                                      AVBufferRef *hw_frames_ctx)
1019 {
1020     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
1021
1022     frames_ctx->format            = AV_PIX_FMT_VIDEOTOOLBOX;
1023     frames_ctx->width             = avctx->coded_width;
1024     frames_ctx->height            = avctx->coded_height;
1025     frames_ctx->sw_format         = AV_PIX_FMT_NV12;
1026
1027     return 0;
1028 }
1029
1030 AVHWAccel ff_h263_videotoolbox_hwaccel = {
1031     .name           = "h263_videotoolbox",
1032     .type           = AVMEDIA_TYPE_VIDEO,
1033     .id             = AV_CODEC_ID_H263,
1034     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1035     .alloc_frame    = ff_videotoolbox_alloc_frame,
1036     .start_frame    = videotoolbox_mpeg_start_frame,
1037     .decode_slice   = videotoolbox_mpeg_decode_slice,
1038     .end_frame      = videotoolbox_mpeg_end_frame,
1039     .frame_params   = videotoolbox_frame_params,
1040     .init           = videotoolbox_common_init,
1041     .uninit         = videotoolbox_uninit,
1042     .priv_data_size = sizeof(VTContext),
1043 };
1044
1045 AVHWAccel ff_hevc_videotoolbox_hwaccel = {
1046     .name           = "hevc_videotoolbox",
1047     .type           = AVMEDIA_TYPE_VIDEO,
1048     .id             = AV_CODEC_ID_HEVC,
1049     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1050     .alloc_frame    = ff_videotoolbox_alloc_frame,
1051     .start_frame    = ff_videotoolbox_h264_start_frame,
1052     .decode_slice   = ff_videotoolbox_h264_decode_slice,
1053     .decode_params  = videotoolbox_hevc_decode_params,
1054     .end_frame      = videotoolbox_hevc_end_frame,
1055     .frame_params   = videotoolbox_frame_params,
1056     .init           = videotoolbox_common_init,
1057     .uninit         = ff_videotoolbox_uninit,
1058     .priv_data_size = sizeof(VTContext),
1059 };
1060
1061 AVHWAccel ff_h264_videotoolbox_hwaccel = {
1062     .name           = "h264_videotoolbox",
1063     .type           = AVMEDIA_TYPE_VIDEO,
1064     .id             = AV_CODEC_ID_H264,
1065     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1066     .alloc_frame    = ff_videotoolbox_alloc_frame,
1067     .start_frame    = ff_videotoolbox_h264_start_frame,
1068     .decode_slice   = ff_videotoolbox_h264_decode_slice,
1069     .decode_params  = videotoolbox_h264_decode_params,
1070     .end_frame      = videotoolbox_h264_end_frame,
1071     .frame_params   = videotoolbox_frame_params,
1072     .init           = videotoolbox_common_init,
1073     .uninit         = videotoolbox_uninit,
1074     .priv_data_size = sizeof(VTContext),
1075 };
1076
1077 AVHWAccel ff_mpeg1_videotoolbox_hwaccel = {
1078     .name           = "mpeg1_videotoolbox",
1079     .type           = AVMEDIA_TYPE_VIDEO,
1080     .id             = AV_CODEC_ID_MPEG1VIDEO,
1081     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1082     .alloc_frame    = ff_videotoolbox_alloc_frame,
1083     .start_frame    = videotoolbox_mpeg_start_frame,
1084     .decode_slice   = videotoolbox_mpeg_decode_slice,
1085     .end_frame      = videotoolbox_mpeg_end_frame,
1086     .frame_params   = videotoolbox_frame_params,
1087     .init           = videotoolbox_common_init,
1088     .uninit         = videotoolbox_uninit,
1089     .priv_data_size = sizeof(VTContext),
1090 };
1091
1092 AVHWAccel ff_mpeg2_videotoolbox_hwaccel = {
1093     .name           = "mpeg2_videotoolbox",
1094     .type           = AVMEDIA_TYPE_VIDEO,
1095     .id             = AV_CODEC_ID_MPEG2VIDEO,
1096     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1097     .alloc_frame    = ff_videotoolbox_alloc_frame,
1098     .start_frame    = videotoolbox_mpeg_start_frame,
1099     .decode_slice   = videotoolbox_mpeg_decode_slice,
1100     .end_frame      = videotoolbox_mpeg_end_frame,
1101     .frame_params   = videotoolbox_frame_params,
1102     .init           = videotoolbox_common_init,
1103     .uninit         = videotoolbox_uninit,
1104     .priv_data_size = sizeof(VTContext),
1105 };
1106
1107 AVHWAccel ff_mpeg4_videotoolbox_hwaccel = {
1108     .name           = "mpeg4_videotoolbox",
1109     .type           = AVMEDIA_TYPE_VIDEO,
1110     .id             = AV_CODEC_ID_MPEG4,
1111     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1112     .alloc_frame    = ff_videotoolbox_alloc_frame,
1113     .start_frame    = videotoolbox_mpeg_start_frame,
1114     .decode_slice   = videotoolbox_mpeg_decode_slice,
1115     .end_frame      = videotoolbox_mpeg_end_frame,
1116     .frame_params   = videotoolbox_frame_params,
1117     .init           = videotoolbox_common_init,
1118     .uninit         = videotoolbox_uninit,
1119     .priv_data_size = sizeof(VTContext),
1120 };
1121
1122 AVVideotoolboxContext *av_videotoolbox_alloc_context(void)
1123 {
1124     AVVideotoolboxContext *ret = av_mallocz(sizeof(*ret));
1125
1126     if (ret) {
1127         ret->output_callback = videotoolbox_decoder_callback;
1128         ret->cv_pix_fmt_type = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
1129     }
1130
1131     return ret;
1132 }
1133
1134 int av_videotoolbox_default_init(AVCodecContext *avctx)
1135 {
1136     return av_videotoolbox_default_init2(avctx, NULL);
1137 }
1138
1139 int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *vtctx)
1140 {
1141     avctx->hwaccel_context = vtctx ?: av_videotoolbox_alloc_context();
1142     if (!avctx->hwaccel_context)
1143         return AVERROR(ENOMEM);
1144     return videotoolbox_start(avctx);
1145 }
1146
1147 void av_videotoolbox_default_free(AVCodecContext *avctx)
1148 {
1149
1150     videotoolbox_stop(avctx);
1151     av_freep(&avctx->hwaccel_context);
1152 }
1153 #endif /* CONFIG_VIDEOTOOLBOX */