]> git.sesse.net Git - ffmpeg/blob - libavcodec/videotoolbox.c
c24f5aa3f399c5d2cf0f8b33d0ef472ba680d1b4
[ffmpeg] / libavcodec / videotoolbox.c
1 /*
2  * Videotoolbox hardware acceleration
3  *
4  * copyright (c) 2012 Sebastien Zwickert
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22
23 #include "config.h"
24 #include "videotoolbox.h"
25 #include "libavutil/hwcontext_videotoolbox.h"
26 #include "vt_internal.h"
27 #include "libavutil/avutil.h"
28 #include "libavutil/hwcontext.h"
29 #include "bytestream.h"
30 #include "h264dec.h"
31 #include "hevcdec.h"
32 #include "mpegvideo.h"
33 #include <TargetConditionals.h>
34
35 #ifndef kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder
36 #  define kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder CFSTR("RequireHardwareAcceleratedVideoDecoder")
37 #endif
38
39 #if !HAVE_KCMVIDEOCODECTYPE_HEVC
40 enum { kCMVideoCodecType_HEVC = 'hvc1' };
41 #endif
42
43 #define VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING  12
44
45 static void videotoolbox_stop(AVCodecContext *avctx);
46 static int videotoolbox_start(AVCodecContext *avctx);
47
48 static void videotoolbox_buffer_release(void *opaque, uint8_t *data)
49 {
50     CVPixelBufferRef cv_buffer = (CVImageBufferRef)data;
51     CVPixelBufferRelease(cv_buffer);
52 }
53
54 static int videotoolbox_buffer_copy(VTContext *vtctx,
55                                     const uint8_t *buffer,
56                                     uint32_t size)
57 {
58     void *tmp;
59
60     tmp = av_fast_realloc(vtctx->bitstream,
61                          &vtctx->allocated_size,
62                          size);
63
64     if (!tmp)
65         return AVERROR(ENOMEM);
66
67     vtctx->bitstream = tmp;
68     memcpy(vtctx->bitstream, buffer, size);
69     vtctx->bitstream_size = size;
70
71     return 0;
72 }
73
74 int ff_videotoolbox_alloc_frame(AVCodecContext *avctx, AVFrame *frame)
75 {
76     frame->width  = avctx->width;
77     frame->height = avctx->height;
78     frame->format = avctx->pix_fmt;
79     frame->buf[0] = av_buffer_alloc(1);
80
81     if (!frame->buf[0])
82         return AVERROR(ENOMEM);
83
84     return 0;
85 }
86
87 #define AV_W8(p, v) *(p) = (v)
88
89 CFDataRef ff_videotoolbox_avcc_extradata_create(AVCodecContext *avctx)
90 {
91     H264Context *h     = avctx->priv_data;
92     CFDataRef data = NULL;
93     uint8_t *p;
94     int vt_extradata_size = 6 + 2 + h->ps.sps->data_size + 3 + h->ps.pps->data_size;
95     uint8_t *vt_extradata = av_malloc(vt_extradata_size);
96     if (!vt_extradata)
97         return NULL;
98
99     p = vt_extradata;
100
101     AV_W8(p + 0, 1); /* version */
102     AV_W8(p + 1, h->ps.sps->data[1]); /* profile */
103     AV_W8(p + 2, h->ps.sps->data[2]); /* profile compat */
104     AV_W8(p + 3, h->ps.sps->data[3]); /* level */
105     AV_W8(p + 4, 0xff); /* 6 bits reserved (111111) + 2 bits nal size length - 3 (11) */
106     AV_W8(p + 5, 0xe1); /* 3 bits reserved (111) + 5 bits number of sps (00001) */
107     AV_WB16(p + 6, h->ps.sps->data_size);
108     memcpy(p + 8, h->ps.sps->data, h->ps.sps->data_size);
109     p += 8 + h->ps.sps->data_size;
110     AV_W8(p + 0, 1); /* number of pps */
111     AV_WB16(p + 1, h->ps.pps->data_size);
112     memcpy(p + 3, h->ps.pps->data, h->ps.pps->data_size);
113
114     p += 3 + h->ps.pps->data_size;
115     av_assert0(p - vt_extradata == vt_extradata_size);
116
117     data = CFDataCreate(kCFAllocatorDefault, vt_extradata, vt_extradata_size);
118     av_free(vt_extradata);
119     return data;
120 }
121
122 CFDataRef ff_videotoolbox_hvcc_extradata_create(AVCodecContext *avctx)
123 {
124     HEVCContext *h = avctx->priv_data;
125     const HEVCVPS *vps = (const HEVCVPS *)h->ps.vps_list[0]->data;
126     const HEVCSPS *sps = (const HEVCSPS *)h->ps.sps_list[0]->data;
127     int i, num_pps = 0;
128     const HEVCPPS *pps = h->ps.pps;
129     PTLCommon ptlc = vps->ptl.general_ptl;
130     VUI vui = sps->vui;
131     uint8_t parallelismType;
132     CFDataRef data = NULL;
133     uint8_t *p;
134     int vt_extradata_size = 23 + 5 + vps->data_size + 5 + sps->data_size + 3;
135     uint8_t *vt_extradata;
136
137     for (i = 0; i < HEVC_MAX_PPS_COUNT; i++) {
138         if (h->ps.pps_list[i]) {
139             const HEVCPPS *pps = (const HEVCPPS *)h->ps.pps_list[i]->data;
140             vt_extradata_size += 2 + pps->data_size;
141             num_pps++;
142         }
143     }
144
145     vt_extradata = av_malloc(vt_extradata_size);
146     if (!vt_extradata)
147         return NULL;
148     p = vt_extradata;
149
150     /* unsigned int(8) configurationVersion = 1; */
151     AV_W8(p + 0, 1);
152
153     /*
154      * unsigned int(2) general_profile_space;
155      * unsigned int(1) general_tier_flag;
156      * unsigned int(5) general_profile_idc;
157      */
158     AV_W8(p + 1, ptlc.profile_space << 6 |
159                  ptlc.tier_flag     << 5 |
160                  ptlc.profile_idc);
161
162     /* unsigned int(32) general_profile_compatibility_flags; */
163     memcpy(p + 2, ptlc.profile_compatibility_flag, 4);
164
165     /* unsigned int(48) general_constraint_indicator_flags; */
166     AV_W8(p + 6, ptlc.progressive_source_flag    << 7 |
167                  ptlc.interlaced_source_flag     << 6 |
168                  ptlc.non_packed_constraint_flag << 5 |
169                  ptlc.frame_only_constraint_flag << 4);
170     AV_W8(p + 7, 0);
171     AV_WN32(p + 8, 0);
172
173     /* unsigned int(8) general_level_idc; */
174     AV_W8(p + 12, ptlc.level_idc);
175
176     /*
177      * bit(4) reserved = ‘1111’b;
178      * unsigned int(12) min_spatial_segmentation_idc;
179      */
180     AV_W8(p + 13, 0xf0 | (vui.min_spatial_segmentation_idc >> 4));
181     AV_W8(p + 14, vui.min_spatial_segmentation_idc & 0xff);
182
183     /*
184      * bit(6) reserved = ‘111111’b;
185      * unsigned int(2) parallelismType;
186      */
187     if (!vui.min_spatial_segmentation_idc)
188         parallelismType = 0;
189     else if (pps->entropy_coding_sync_enabled_flag && pps->tiles_enabled_flag)
190         parallelismType = 0;
191     else if (pps->entropy_coding_sync_enabled_flag)
192         parallelismType = 3;
193     else if (pps->tiles_enabled_flag)
194         parallelismType = 2;
195     else
196         parallelismType = 1;
197     AV_W8(p + 15, 0xfc | parallelismType);
198
199     /*
200      * bit(6) reserved = ‘111111’b;
201      * unsigned int(2) chromaFormat;
202      */
203     AV_W8(p + 16, sps->chroma_format_idc | 0xfc);
204
205     /*
206      * bit(5) reserved = ‘11111’b;
207      * unsigned int(3) bitDepthLumaMinus8;
208      */
209     AV_W8(p + 17, (sps->bit_depth - 8) | 0xfc);
210
211     /*
212      * bit(5) reserved = ‘11111’b;
213      * unsigned int(3) bitDepthChromaMinus8;
214      */
215     AV_W8(p + 18, (sps->bit_depth_chroma - 8) | 0xfc);
216
217     /* bit(16) avgFrameRate; */
218     AV_WB16(p + 19, 0);
219
220     /*
221      * bit(2) constantFrameRate;
222      * bit(3) numTemporalLayers;
223      * bit(1) temporalIdNested;
224      * unsigned int(2) lengthSizeMinusOne;
225      */
226     AV_W8(p + 21, 0                             << 6 |
227                   sps->max_sub_layers           << 3 |
228                   sps->temporal_id_nesting_flag << 2 |
229                   3);
230
231     /* unsigned int(8) numOfArrays; */
232     AV_W8(p + 22, 3);
233
234     p += 23;
235     /* vps */
236     /*
237      * bit(1) array_completeness;
238      * unsigned int(1) reserved = 0;
239      * unsigned int(6) NAL_unit_type;
240      */
241     AV_W8(p, 1 << 7 |
242              HEVC_NAL_VPS & 0x3f);
243     /* unsigned int(16) numNalus; */
244     AV_WB16(p + 1, 1);
245     /* unsigned int(16) nalUnitLength; */
246     AV_WB16(p + 3, vps->data_size);
247     /* bit(8*nalUnitLength) nalUnit; */
248     memcpy(p + 5, vps->data, vps->data_size);
249     p += 5 + vps->data_size;
250
251     /* sps */
252     AV_W8(p, 1 << 7 |
253              HEVC_NAL_SPS & 0x3f);
254     AV_WB16(p + 1, 1);
255     AV_WB16(p + 3, sps->data_size);
256     memcpy(p + 5, sps->data, sps->data_size);
257     p += 5 + sps->data_size;
258
259     /* pps */
260     AV_W8(p, 1 << 7 |
261              HEVC_NAL_PPS & 0x3f);
262     AV_WB16(p + 1, num_pps);
263     p += 3;
264     for (i = 0; i < HEVC_MAX_PPS_COUNT; i++) {
265         if (h->ps.pps_list[i]) {
266             const HEVCPPS *pps = (const HEVCPPS *)h->ps.pps_list[i]->data;
267             AV_WB16(p, pps->data_size);
268             memcpy(p + 2, pps->data, pps->data_size);
269             p += 2 + pps->data_size;
270         }
271     }
272
273     av_assert0(p - vt_extradata == vt_extradata_size);
274
275     data = CFDataCreate(kCFAllocatorDefault, vt_extradata, vt_extradata_size);
276     av_free(vt_extradata);
277     return data;
278 }
279
280 int ff_videotoolbox_buffer_create(VTContext *vtctx, AVFrame *frame)
281 {
282     av_buffer_unref(&frame->buf[0]);
283
284     frame->buf[0] = av_buffer_create((uint8_t*)vtctx->frame,
285                                      sizeof(vtctx->frame),
286                                      videotoolbox_buffer_release,
287                                      NULL,
288                                      AV_BUFFER_FLAG_READONLY);
289     if (!frame->buf[0]) {
290         return AVERROR(ENOMEM);
291     }
292
293     frame->data[3] = (uint8_t*)vtctx->frame;
294     vtctx->frame = NULL;
295
296     return 0;
297 }
298
299 int ff_videotoolbox_h264_start_frame(AVCodecContext *avctx,
300                                      const uint8_t *buffer,
301                                      uint32_t size)
302 {
303     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
304     H264Context *h  = avctx->priv_data;
305
306     if (h->is_avc == 1) {
307         return videotoolbox_buffer_copy(vtctx, buffer, size);
308     }
309
310     return 0;
311 }
312
313 static int videotoolbox_h264_decode_params(AVCodecContext *avctx,
314                                            int type,
315                                            const uint8_t *buffer,
316                                            uint32_t size)
317 {
318     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
319
320     if (type == H264_NAL_SPS) {
321         if (!vtctx->sps || vtctx->sps_len != size || memcmp(buffer, vtctx->sps, size) != 0) {
322             vtctx->sps = av_fast_realloc(vtctx->sps, &vtctx->sps_capa, size);
323             if (vtctx->sps)
324                 memcpy(vtctx->sps, buffer, size);
325             vtctx->reconfig_needed = true;
326             vtctx->sps_len = size;
327         }
328     }
329
330     // pass-through new PPS to the decoder
331     return ff_videotoolbox_h264_decode_slice(avctx, buffer, size);
332 }
333
334 int ff_videotoolbox_h264_decode_slice(AVCodecContext *avctx,
335                                       const uint8_t *buffer,
336                                       uint32_t size)
337 {
338     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
339     H264Context *h  = avctx->priv_data;
340     void *tmp;
341
342     if (h->is_avc == 1)
343         return 0;
344
345     tmp = av_fast_realloc(vtctx->bitstream,
346                           &vtctx->allocated_size,
347                           vtctx->bitstream_size+size+4);
348     if (!tmp)
349         return AVERROR(ENOMEM);
350
351     vtctx->bitstream = tmp;
352
353     AV_WB32(vtctx->bitstream + vtctx->bitstream_size, size);
354     memcpy(vtctx->bitstream + vtctx->bitstream_size + 4, buffer, size);
355
356     vtctx->bitstream_size += size + 4;
357
358     return 0;
359 }
360
361 int ff_videotoolbox_uninit(AVCodecContext *avctx)
362 {
363     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
364     if (vtctx) {
365         av_freep(&vtctx->bitstream);
366         av_freep(&vtctx->sps);
367         if (vtctx->frame)
368             CVPixelBufferRelease(vtctx->frame);
369     }
370
371     return 0;
372 }
373
374 #if CONFIG_VIDEOTOOLBOX
375 // Return the AVVideotoolboxContext that matters currently. Where it comes from
376 // depends on the API used.
377 static AVVideotoolboxContext *videotoolbox_get_context(AVCodecContext *avctx)
378 {
379     // Somewhat tricky because the user can call av_videotoolbox_default_free()
380     // at any time, even when the codec is closed.
381     if (avctx->internal && avctx->internal->hwaccel_priv_data) {
382         VTContext *vtctx = avctx->internal->hwaccel_priv_data;
383         if (vtctx->vt_ctx)
384             return vtctx->vt_ctx;
385     }
386     return avctx->hwaccel_context;
387 }
388
389 static int videotoolbox_buffer_create(AVCodecContext *avctx, AVFrame *frame)
390 {
391     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
392     CVPixelBufferRef pixbuf = (CVPixelBufferRef)vtctx->frame;
393     OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
394     enum AVPixelFormat sw_format = av_map_videotoolbox_format_to_pixfmt(pixel_format);
395     int width = CVPixelBufferGetWidth(pixbuf);
396     int height = CVPixelBufferGetHeight(pixbuf);
397     AVHWFramesContext *cached_frames;
398     int ret;
399
400     ret = ff_videotoolbox_buffer_create(vtctx, frame);
401     if (ret < 0)
402         return ret;
403
404     // Old API code path.
405     if (!vtctx->cached_hw_frames_ctx)
406         return 0;
407
408     cached_frames = (AVHWFramesContext*)vtctx->cached_hw_frames_ctx->data;
409
410     if (cached_frames->sw_format != sw_format ||
411         cached_frames->width != width ||
412         cached_frames->height != height) {
413         AVBufferRef *hw_frames_ctx = av_hwframe_ctx_alloc(cached_frames->device_ref);
414         AVHWFramesContext *hw_frames;
415         if (!hw_frames_ctx)
416             return AVERROR(ENOMEM);
417
418         hw_frames = (AVHWFramesContext*)hw_frames_ctx->data;
419         hw_frames->format = cached_frames->format;
420         hw_frames->sw_format = sw_format;
421         hw_frames->width = width;
422         hw_frames->height = height;
423
424         ret = av_hwframe_ctx_init(hw_frames_ctx);
425         if (ret < 0) {
426             av_buffer_unref(&hw_frames_ctx);
427             return ret;
428         }
429
430         av_buffer_unref(&vtctx->cached_hw_frames_ctx);
431         vtctx->cached_hw_frames_ctx = hw_frames_ctx;
432     }
433
434     av_buffer_unref(&frame->hw_frames_ctx);
435     frame->hw_frames_ctx = av_buffer_ref(vtctx->cached_hw_frames_ctx);
436     if (!frame->hw_frames_ctx)
437         return AVERROR(ENOMEM);
438
439     return 0;
440 }
441
442 static void videotoolbox_write_mp4_descr_length(PutByteContext *pb, int length)
443 {
444     int i;
445     uint8_t b;
446
447     for (i = 3; i >= 0; i--) {
448         b = (length >> (i * 7)) & 0x7F;
449         if (i != 0)
450             b |= 0x80;
451
452         bytestream2_put_byteu(pb, b);
453     }
454 }
455
456 static CFDataRef videotoolbox_esds_extradata_create(AVCodecContext *avctx)
457 {
458     CFDataRef data;
459     uint8_t *rw_extradata;
460     PutByteContext pb;
461     int full_size = 3 + 5 + 13 + 5 + avctx->extradata_size + 3;
462     // ES_DescrTag data + DecoderConfigDescrTag + data + DecSpecificInfoTag + size + SLConfigDescriptor
463     int config_size = 13 + 5 + avctx->extradata_size;
464     int s;
465
466     if (!(rw_extradata = av_mallocz(full_size + VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING)))
467         return NULL;
468
469     bytestream2_init_writer(&pb, rw_extradata, full_size + VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING);
470     bytestream2_put_byteu(&pb, 0);        // version
471     bytestream2_put_ne24(&pb, 0);         // flags
472
473     // elementary stream descriptor
474     bytestream2_put_byteu(&pb, 0x03);     // ES_DescrTag
475     videotoolbox_write_mp4_descr_length(&pb, full_size);
476     bytestream2_put_ne16(&pb, 0);         // esid
477     bytestream2_put_byteu(&pb, 0);        // stream priority (0-32)
478
479     // decoder configuration descriptor
480     bytestream2_put_byteu(&pb, 0x04);     // DecoderConfigDescrTag
481     videotoolbox_write_mp4_descr_length(&pb, config_size);
482     bytestream2_put_byteu(&pb, 32);       // object type indication. 32 = AV_CODEC_ID_MPEG4
483     bytestream2_put_byteu(&pb, 0x11);     // stream type
484     bytestream2_put_ne24(&pb, 0);         // buffer size
485     bytestream2_put_ne32(&pb, 0);         // max bitrate
486     bytestream2_put_ne32(&pb, 0);         // avg bitrate
487
488     // decoder specific descriptor
489     bytestream2_put_byteu(&pb, 0x05);     ///< DecSpecificInfoTag
490     videotoolbox_write_mp4_descr_length(&pb, avctx->extradata_size);
491
492     bytestream2_put_buffer(&pb, avctx->extradata, avctx->extradata_size);
493
494     // SLConfigDescriptor
495     bytestream2_put_byteu(&pb, 0x06);     // SLConfigDescrTag
496     bytestream2_put_byteu(&pb, 0x01);     // length
497     bytestream2_put_byteu(&pb, 0x02);     //
498
499     s = bytestream2_size_p(&pb);
500
501     data = CFDataCreate(kCFAllocatorDefault, rw_extradata, s);
502
503     av_freep(&rw_extradata);
504     return data;
505 }
506
507 static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRef fmt_desc,
508                                                            void *buffer,
509                                                            int size)
510 {
511     OSStatus status;
512     CMBlockBufferRef  block_buf;
513     CMSampleBufferRef sample_buf;
514
515     block_buf  = NULL;
516     sample_buf = NULL;
517
518     status = CMBlockBufferCreateWithMemoryBlock(kCFAllocatorDefault,// structureAllocator
519                                                 buffer,             // memoryBlock
520                                                 size,               // blockLength
521                                                 kCFAllocatorNull,   // blockAllocator
522                                                 NULL,               // customBlockSource
523                                                 0,                  // offsetToData
524                                                 size,               // dataLength
525                                                 0,                  // flags
526                                                 &block_buf);
527
528     if (!status) {
529         status = CMSampleBufferCreate(kCFAllocatorDefault,  // allocator
530                                       block_buf,            // dataBuffer
531                                       TRUE,                 // dataReady
532                                       0,                    // makeDataReadyCallback
533                                       0,                    // makeDataReadyRefcon
534                                       fmt_desc,             // formatDescription
535                                       1,                    // numSamples
536                                       0,                    // numSampleTimingEntries
537                                       NULL,                 // sampleTimingArray
538                                       0,                    // numSampleSizeEntries
539                                       NULL,                 // sampleSizeArray
540                                       &sample_buf);
541     }
542
543     if (block_buf)
544         CFRelease(block_buf);
545
546     return sample_buf;
547 }
548
549 static void videotoolbox_decoder_callback(void *opaque,
550                                           void *sourceFrameRefCon,
551                                           OSStatus status,
552                                           VTDecodeInfoFlags flags,
553                                           CVImageBufferRef image_buffer,
554                                           CMTime pts,
555                                           CMTime duration)
556 {
557     AVCodecContext *avctx = opaque;
558     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
559
560     if (vtctx->frame) {
561         CVPixelBufferRelease(vtctx->frame);
562         vtctx->frame = NULL;
563     }
564
565     if (!image_buffer) {
566         av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
567         return;
568     }
569
570     vtctx->frame = CVPixelBufferRetain(image_buffer);
571 }
572
573 static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx)
574 {
575     OSStatus status;
576     CMSampleBufferRef sample_buf;
577     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
578     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
579
580     sample_buf = videotoolbox_sample_buffer_create(videotoolbox->cm_fmt_desc,
581                                                    vtctx->bitstream,
582                                                    vtctx->bitstream_size);
583
584     if (!sample_buf)
585         return -1;
586
587     status = VTDecompressionSessionDecodeFrame(videotoolbox->session,
588                                                sample_buf,
589                                                0,       // decodeFlags
590                                                NULL,    // sourceFrameRefCon
591                                                0);      // infoFlagsOut
592     if (status == noErr)
593         status = VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
594
595     CFRelease(sample_buf);
596
597     return status;
598 }
599
600 static const char *videotoolbox_error_string(OSStatus status)
601 {
602     switch (status) {
603         case kVTVideoDecoderBadDataErr:
604             return "bad data";
605         case kVTVideoDecoderMalfunctionErr:
606             return "decoder malfunction";
607         case kVTInvalidSessionErr:
608             return "invalid session";
609     }
610     return "unknown";
611 }
612
613 static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
614 {
615     OSStatus status;
616     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
617     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
618
619     if (vtctx->reconfig_needed == true) {
620         vtctx->reconfig_needed = false;
621         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox decoder needs reconfig, restarting..\n");
622         videotoolbox_stop(avctx);
623         if (videotoolbox_start(avctx) != 0) {
624             return AVERROR_EXTERNAL;
625         }
626     }
627
628     if (!videotoolbox->session || !vtctx->bitstream || !vtctx->bitstream_size)
629         return AVERROR_INVALIDDATA;
630
631     status = videotoolbox_session_decode_frame(avctx);
632     if (status != noErr) {
633         if (status == kVTVideoDecoderMalfunctionErr || status == kVTInvalidSessionErr)
634             vtctx->reconfig_needed = true;
635         av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%s, %d)\n", videotoolbox_error_string(status), (int)status);
636         return AVERROR_UNKNOWN;
637     }
638
639     if (!vtctx->frame) {
640         vtctx->reconfig_needed = true;
641         return AVERROR_UNKNOWN;
642     }
643
644     return videotoolbox_buffer_create(avctx, frame);
645 }
646
647 static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
648 {
649     H264Context *h = avctx->priv_data;
650     AVFrame *frame = h->cur_pic_ptr->f;
651     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
652     int ret = videotoolbox_common_end_frame(avctx, frame);
653     vtctx->bitstream_size = 0;
654     return ret;
655 }
656
657 static int videotoolbox_hevc_end_frame(AVCodecContext *avctx)
658 {
659     HEVCContext *h = avctx->priv_data;
660     AVFrame *frame = h->ref->frame;
661     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
662     int ret;
663
664     ret = videotoolbox_common_end_frame(avctx, frame);
665     vtctx->bitstream_size = 0;
666     return ret;
667 }
668
669 static int videotoolbox_mpeg_start_frame(AVCodecContext *avctx,
670                                          const uint8_t *buffer,
671                                          uint32_t size)
672 {
673     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
674
675     return videotoolbox_buffer_copy(vtctx, buffer, size);
676 }
677
678 static int videotoolbox_mpeg_decode_slice(AVCodecContext *avctx,
679                                           const uint8_t *buffer,
680                                           uint32_t size)
681 {
682     return 0;
683 }
684
685 static int videotoolbox_mpeg_end_frame(AVCodecContext *avctx)
686 {
687     MpegEncContext *s = avctx->priv_data;
688     AVFrame *frame = s->current_picture_ptr->f;
689
690     return videotoolbox_common_end_frame(avctx, frame);
691 }
692
693 static CFDictionaryRef videotoolbox_decoder_config_create(CMVideoCodecType codec_type,
694                                                           AVCodecContext *avctx)
695 {
696     CFMutableDictionaryRef config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
697                                                                    0,
698                                                                    &kCFTypeDictionaryKeyCallBacks,
699                                                                    &kCFTypeDictionaryValueCallBacks);
700
701     CFDictionarySetValue(config_info,
702                          kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder,
703                          kCFBooleanTrue);
704
705     if (avctx->extradata_size) {
706         CFMutableDictionaryRef avc_info;
707         CFDataRef data = NULL;
708
709         avc_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
710                                              1,
711                                              &kCFTypeDictionaryKeyCallBacks,
712                                              &kCFTypeDictionaryValueCallBacks);
713
714         switch (codec_type) {
715         case kCMVideoCodecType_MPEG4Video :
716             data = videotoolbox_esds_extradata_create(avctx);
717             if (data)
718                 CFDictionarySetValue(avc_info, CFSTR("esds"), data);
719             break;
720         case kCMVideoCodecType_H264 :
721             data = ff_videotoolbox_avcc_extradata_create(avctx);
722             if (data)
723                 CFDictionarySetValue(avc_info, CFSTR("avcC"), data);
724             break;
725         case kCMVideoCodecType_HEVC :
726             data = ff_videotoolbox_hvcc_extradata_create(avctx);
727             if (data)
728                 CFDictionarySetValue(avc_info, CFSTR("hvcC"), data);
729             break;
730         default:
731             break;
732         }
733
734         CFDictionarySetValue(config_info,
735                 kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms,
736                 avc_info);
737
738         if (data)
739             CFRelease(data);
740
741         CFRelease(avc_info);
742     }
743     return config_info;
744 }
745
746 static CFDictionaryRef videotoolbox_buffer_attributes_create(int width,
747                                                              int height,
748                                                              OSType pix_fmt)
749 {
750     CFMutableDictionaryRef buffer_attributes;
751     CFMutableDictionaryRef io_surface_properties;
752     CFNumberRef cv_pix_fmt;
753     CFNumberRef w;
754     CFNumberRef h;
755
756     w = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &width);
757     h = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &height);
758     cv_pix_fmt = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt);
759
760     buffer_attributes = CFDictionaryCreateMutable(kCFAllocatorDefault,
761                                                   4,
762                                                   &kCFTypeDictionaryKeyCallBacks,
763                                                   &kCFTypeDictionaryValueCallBacks);
764     io_surface_properties = CFDictionaryCreateMutable(kCFAllocatorDefault,
765                                                       0,
766                                                       &kCFTypeDictionaryKeyCallBacks,
767                                                       &kCFTypeDictionaryValueCallBacks);
768
769     if (pix_fmt)
770         CFDictionarySetValue(buffer_attributes, kCVPixelBufferPixelFormatTypeKey, cv_pix_fmt);
771     CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfacePropertiesKey, io_surface_properties);
772     CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
773     CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
774 #if TARGET_OS_IPHONE
775     CFDictionarySetValue(buffer_attributes, kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
776 #else
777     CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey, kCFBooleanTrue);
778 #endif
779
780     CFRelease(io_surface_properties);
781     CFRelease(cv_pix_fmt);
782     CFRelease(w);
783     CFRelease(h);
784
785     return buffer_attributes;
786 }
787
788 static CMVideoFormatDescriptionRef videotoolbox_format_desc_create(CMVideoCodecType codec_type,
789                                                                    CFDictionaryRef decoder_spec,
790                                                                    int width,
791                                                                    int height)
792 {
793     CMFormatDescriptionRef cm_fmt_desc;
794     OSStatus status;
795
796     status = CMVideoFormatDescriptionCreate(kCFAllocatorDefault,
797                                             codec_type,
798                                             width,
799                                             height,
800                                             decoder_spec, // Dictionary of extension
801                                             &cm_fmt_desc);
802
803     if (status)
804         return NULL;
805
806     return cm_fmt_desc;
807 }
808
809 static int videotoolbox_start(AVCodecContext *avctx)
810 {
811     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
812     OSStatus status;
813     VTDecompressionOutputCallbackRecord decoder_cb;
814     CFDictionaryRef decoder_spec;
815     CFDictionaryRef buf_attr;
816
817     if (!videotoolbox) {
818         av_log(avctx, AV_LOG_ERROR, "hwaccel context is not set\n");
819         return -1;
820     }
821
822     switch( avctx->codec_id ) {
823     case AV_CODEC_ID_H263 :
824         videotoolbox->cm_codec_type = kCMVideoCodecType_H263;
825         break;
826     case AV_CODEC_ID_H264 :
827         videotoolbox->cm_codec_type = kCMVideoCodecType_H264;
828         break;
829     case AV_CODEC_ID_HEVC :
830         videotoolbox->cm_codec_type = kCMVideoCodecType_HEVC;
831         break;
832     case AV_CODEC_ID_MPEG1VIDEO :
833         videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG1Video;
834         break;
835     case AV_CODEC_ID_MPEG2VIDEO :
836         videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG2Video;
837         break;
838     case AV_CODEC_ID_MPEG4 :
839         videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG4Video;
840         break;
841     default :
842         break;
843     }
844
845     decoder_spec = videotoolbox_decoder_config_create(videotoolbox->cm_codec_type, avctx);
846
847     if (!decoder_spec) {
848         av_log(avctx, AV_LOG_ERROR, "decoder specification creation failed\n");
849         return -1;
850     }
851
852     videotoolbox->cm_fmt_desc = videotoolbox_format_desc_create(videotoolbox->cm_codec_type,
853                                                                 decoder_spec,
854                                                                 avctx->width,
855                                                                 avctx->height);
856     if (!videotoolbox->cm_fmt_desc) {
857         if (decoder_spec)
858             CFRelease(decoder_spec);
859
860         av_log(avctx, AV_LOG_ERROR, "format description creation failed\n");
861         return -1;
862     }
863
864     buf_attr = videotoolbox_buffer_attributes_create(avctx->width,
865                                                      avctx->height,
866                                                      videotoolbox->cv_pix_fmt_type);
867
868     decoder_cb.decompressionOutputCallback = videotoolbox_decoder_callback;
869     decoder_cb.decompressionOutputRefCon   = avctx;
870
871     status = VTDecompressionSessionCreate(NULL,                      // allocator
872                                           videotoolbox->cm_fmt_desc, // videoFormatDescription
873                                           decoder_spec,              // videoDecoderSpecification
874                                           buf_attr,                  // destinationImageBufferAttributes
875                                           &decoder_cb,               // outputCallback
876                                           &videotoolbox->session);   // decompressionSessionOut
877
878     if (decoder_spec)
879         CFRelease(decoder_spec);
880     if (buf_attr)
881         CFRelease(buf_attr);
882
883     switch (status) {
884     case kVTVideoDecoderNotAvailableNowErr:
885         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox session not available.\n");
886         return AVERROR(ENOSYS);
887     case kVTVideoDecoderUnsupportedDataFormatErr:
888         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox does not support this format.\n");
889         return AVERROR(ENOSYS);
890     case kVTVideoDecoderMalfunctionErr:
891         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox malfunction.\n");
892         return AVERROR(EINVAL);
893     case kVTVideoDecoderBadDataErr :
894         av_log(avctx, AV_LOG_VERBOSE, "VideoToolbox reported invalid data.\n");
895         return AVERROR_INVALIDDATA;
896     case 0:
897         return 0;
898     default:
899         av_log(avctx, AV_LOG_VERBOSE, "Unknown VideoToolbox session creation error %u\n", (unsigned)status);
900         return AVERROR_UNKNOWN;
901     }
902 }
903
904 static void videotoolbox_stop(AVCodecContext *avctx)
905 {
906     AVVideotoolboxContext *videotoolbox = videotoolbox_get_context(avctx);
907     if (!videotoolbox)
908         return;
909
910     if (videotoolbox->cm_fmt_desc) {
911         CFRelease(videotoolbox->cm_fmt_desc);
912         videotoolbox->cm_fmt_desc = NULL;
913     }
914
915     if (videotoolbox->session) {
916         VTDecompressionSessionInvalidate(videotoolbox->session);
917         CFRelease(videotoolbox->session);
918         videotoolbox->session = NULL;
919     }
920 }
921
922 static int videotoolbox_uninit(AVCodecContext *avctx)
923 {
924     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
925     if (!vtctx)
926         return 0;
927
928     ff_videotoolbox_uninit(avctx);
929
930     if (vtctx->vt_ctx)
931         videotoolbox_stop(avctx);
932
933     av_buffer_unref(&vtctx->cached_hw_frames_ctx);
934     av_freep(&vtctx->vt_ctx);
935
936     return 0;
937 }
938
939 static int videotoolbox_common_init(AVCodecContext *avctx)
940 {
941     VTContext *vtctx = avctx->internal->hwaccel_priv_data;
942     AVHWFramesContext *hw_frames;
943     int err;
944
945     // Old API - do nothing.
946     if (avctx->hwaccel_context)
947         return 0;
948
949     if (!avctx->hw_frames_ctx && !avctx->hw_device_ctx) {
950         av_log(avctx, AV_LOG_ERROR,
951                "Either hw_frames_ctx or hw_device_ctx must be set.\n");
952         return AVERROR(EINVAL);
953     }
954
955     vtctx->vt_ctx = av_videotoolbox_alloc_context();
956     if (!vtctx->vt_ctx) {
957         err = AVERROR(ENOMEM);
958         goto fail;
959     }
960
961     if (avctx->hw_frames_ctx) {
962         hw_frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
963     } else {
964         avctx->hw_frames_ctx = av_hwframe_ctx_alloc(avctx->hw_device_ctx);
965         if (!avctx->hw_frames_ctx) {
966             err = AVERROR(ENOMEM);
967             goto fail;
968         }
969
970         hw_frames = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
971         hw_frames->format = AV_PIX_FMT_VIDEOTOOLBOX;
972         hw_frames->sw_format = AV_PIX_FMT_NV12; // same as av_videotoolbox_alloc_context()
973         hw_frames->width = avctx->width;
974         hw_frames->height = avctx->height;
975
976         err = av_hwframe_ctx_init(avctx->hw_frames_ctx);
977         if (err < 0) {
978             av_buffer_unref(&avctx->hw_frames_ctx);
979             goto fail;
980         }
981     }
982
983     vtctx->cached_hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
984     if (!vtctx->cached_hw_frames_ctx) {
985         err = AVERROR(ENOMEM);
986         goto fail;
987     }
988
989     vtctx->vt_ctx->cv_pix_fmt_type =
990         av_map_videotoolbox_format_from_pixfmt(hw_frames->sw_format);
991     if (!vtctx->vt_ctx->cv_pix_fmt_type) {
992         av_log(avctx, AV_LOG_ERROR, "Unknown sw_format.\n");
993         err = AVERROR(EINVAL);
994         goto fail;
995     }
996
997     err = videotoolbox_start(avctx);
998     if (err < 0)
999         goto fail;
1000
1001     return 0;
1002
1003 fail:
1004     videotoolbox_uninit(avctx);
1005     return err;
1006 }
1007
1008 static int videotoolbox_frame_params(AVCodecContext *avctx,
1009                                      AVBufferRef *hw_frames_ctx)
1010 {
1011     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
1012
1013     frames_ctx->format            = AV_PIX_FMT_VIDEOTOOLBOX;
1014     frames_ctx->width             = avctx->coded_width;
1015     frames_ctx->height            = avctx->coded_height;
1016     frames_ctx->sw_format         = AV_PIX_FMT_NV12;
1017
1018     return 0;
1019 }
1020
1021 AVHWAccel ff_h263_videotoolbox_hwaccel = {
1022     .name           = "h263_videotoolbox",
1023     .type           = AVMEDIA_TYPE_VIDEO,
1024     .id             = AV_CODEC_ID_H263,
1025     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1026     .alloc_frame    = ff_videotoolbox_alloc_frame,
1027     .start_frame    = videotoolbox_mpeg_start_frame,
1028     .decode_slice   = videotoolbox_mpeg_decode_slice,
1029     .end_frame      = videotoolbox_mpeg_end_frame,
1030     .frame_params   = videotoolbox_frame_params,
1031     .init           = videotoolbox_common_init,
1032     .uninit         = videotoolbox_uninit,
1033     .priv_data_size = sizeof(VTContext),
1034 };
1035
1036 AVHWAccel ff_hevc_videotoolbox_hwaccel = {
1037     .name           = "hevc_videotoolbox",
1038     .type           = AVMEDIA_TYPE_VIDEO,
1039     .id             = AV_CODEC_ID_HEVC,
1040     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1041     .alloc_frame    = ff_videotoolbox_alloc_frame,
1042     .start_frame    = ff_videotoolbox_h264_start_frame,
1043     .decode_slice   = ff_videotoolbox_h264_decode_slice,
1044     .end_frame      = videotoolbox_hevc_end_frame,
1045     .frame_params   = videotoolbox_frame_params,
1046     .init           = videotoolbox_common_init,
1047     .uninit         = ff_videotoolbox_uninit,
1048     .priv_data_size = sizeof(VTContext),
1049 };
1050
1051 AVHWAccel ff_h264_videotoolbox_hwaccel = {
1052     .name           = "h264_videotoolbox",
1053     .type           = AVMEDIA_TYPE_VIDEO,
1054     .id             = AV_CODEC_ID_H264,
1055     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1056     .alloc_frame    = ff_videotoolbox_alloc_frame,
1057     .start_frame    = ff_videotoolbox_h264_start_frame,
1058     .decode_slice   = ff_videotoolbox_h264_decode_slice,
1059     .decode_params  = videotoolbox_h264_decode_params,
1060     .end_frame      = videotoolbox_h264_end_frame,
1061     .frame_params   = videotoolbox_frame_params,
1062     .init           = videotoolbox_common_init,
1063     .uninit         = videotoolbox_uninit,
1064     .priv_data_size = sizeof(VTContext),
1065 };
1066
1067 AVHWAccel ff_mpeg1_videotoolbox_hwaccel = {
1068     .name           = "mpeg1_videotoolbox",
1069     .type           = AVMEDIA_TYPE_VIDEO,
1070     .id             = AV_CODEC_ID_MPEG1VIDEO,
1071     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1072     .alloc_frame    = ff_videotoolbox_alloc_frame,
1073     .start_frame    = videotoolbox_mpeg_start_frame,
1074     .decode_slice   = videotoolbox_mpeg_decode_slice,
1075     .end_frame      = videotoolbox_mpeg_end_frame,
1076     .frame_params   = videotoolbox_frame_params,
1077     .init           = videotoolbox_common_init,
1078     .uninit         = videotoolbox_uninit,
1079     .priv_data_size = sizeof(VTContext),
1080 };
1081
1082 AVHWAccel ff_mpeg2_videotoolbox_hwaccel = {
1083     .name           = "mpeg2_videotoolbox",
1084     .type           = AVMEDIA_TYPE_VIDEO,
1085     .id             = AV_CODEC_ID_MPEG2VIDEO,
1086     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1087     .alloc_frame    = ff_videotoolbox_alloc_frame,
1088     .start_frame    = videotoolbox_mpeg_start_frame,
1089     .decode_slice   = videotoolbox_mpeg_decode_slice,
1090     .end_frame      = videotoolbox_mpeg_end_frame,
1091     .frame_params   = videotoolbox_frame_params,
1092     .init           = videotoolbox_common_init,
1093     .uninit         = videotoolbox_uninit,
1094     .priv_data_size = sizeof(VTContext),
1095 };
1096
1097 AVHWAccel ff_mpeg4_videotoolbox_hwaccel = {
1098     .name           = "mpeg4_videotoolbox",
1099     .type           = AVMEDIA_TYPE_VIDEO,
1100     .id             = AV_CODEC_ID_MPEG4,
1101     .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
1102     .alloc_frame    = ff_videotoolbox_alloc_frame,
1103     .start_frame    = videotoolbox_mpeg_start_frame,
1104     .decode_slice   = videotoolbox_mpeg_decode_slice,
1105     .end_frame      = videotoolbox_mpeg_end_frame,
1106     .frame_params   = videotoolbox_frame_params,
1107     .init           = videotoolbox_common_init,
1108     .uninit         = videotoolbox_uninit,
1109     .priv_data_size = sizeof(VTContext),
1110 };
1111
1112 AVVideotoolboxContext *av_videotoolbox_alloc_context(void)
1113 {
1114     AVVideotoolboxContext *ret = av_mallocz(sizeof(*ret));
1115
1116     if (ret) {
1117         ret->output_callback = videotoolbox_decoder_callback;
1118         ret->cv_pix_fmt_type = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
1119     }
1120
1121     return ret;
1122 }
1123
1124 int av_videotoolbox_default_init(AVCodecContext *avctx)
1125 {
1126     return av_videotoolbox_default_init2(avctx, NULL);
1127 }
1128
1129 int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *vtctx)
1130 {
1131     avctx->hwaccel_context = vtctx ?: av_videotoolbox_alloc_context();
1132     if (!avctx->hwaccel_context)
1133         return AVERROR(ENOMEM);
1134     return videotoolbox_start(avctx);
1135 }
1136
1137 void av_videotoolbox_default_free(AVCodecContext *avctx)
1138 {
1139
1140     videotoolbox_stop(avctx);
1141     av_freep(&avctx->hwaccel_context);
1142 }
1143 #endif /* CONFIG_VIDEOTOOLBOX */