2 * This file is part of FFmpeg.
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
22 #define _WIN32_WINNT 0x0602
26 #include "libavutil/imgutils.h"
27 #include "libavutil/opt.h"
28 #include "libavutil/time.h"
30 // Include after mf_utils.h due to Windows include mess.
31 #include "mpeg4audio.h"
33 typedef struct MFContext {
35 int is_video, is_audio;
38 IMFMediaEventGenerator *async_events;
39 DWORD in_stream_id, out_stream_id;
40 MFT_INPUT_STREAM_INFO in_info;
41 MFT_OUTPUT_STREAM_INFO out_info;
42 int out_stream_provides_samples;
43 int draining, draining_done;
45 int async_need_input, async_have_output, async_marker;
46 int64_t reorder_delay;
55 static int mf_choose_output_type(AVCodecContext *avctx);
56 static int mf_setup_context(AVCodecContext *avctx);
58 #define MF_TIMEBASE (AVRational){1, 10000000}
59 // Sentinel value only used by us.
60 #define MF_INVALID_TIME AV_NOPTS_VALUE
62 static int mf_wait_events(AVCodecContext *avctx)
64 MFContext *c = avctx->priv_data;
69 while (!(c->async_need_input || c->async_have_output || c->draining_done || c->async_marker)) {
70 IMFMediaEvent *ev = NULL;
71 MediaEventType ev_id = 0;
72 HRESULT hr = IMFMediaEventGenerator_GetEvent(c->async_events, 0, &ev);
74 av_log(avctx, AV_LOG_ERROR, "IMFMediaEventGenerator_GetEvent() failed: %s\n",
76 return AVERROR_EXTERNAL;
78 IMFMediaEvent_GetType(ev, &ev_id);
80 case ff_METransformNeedInput:
82 c->async_need_input = 1;
84 case ff_METransformHaveOutput:
85 c->async_have_output = 1;
87 case ff_METransformDrainComplete:
90 case ff_METransformMarker:
95 IMFMediaEvent_Release(ev);
101 static AVRational mf_get_tb(AVCodecContext *avctx)
103 if (avctx->pkt_timebase.num > 0 && avctx->pkt_timebase.den > 0)
104 return avctx->pkt_timebase;
105 if (avctx->time_base.num > 0 && avctx->time_base.den > 0)
106 return avctx->time_base;
110 static LONGLONG mf_to_mf_time(AVCodecContext *avctx, int64_t av_pts)
112 if (av_pts == AV_NOPTS_VALUE)
113 return MF_INVALID_TIME;
114 return av_rescale_q(av_pts, mf_get_tb(avctx), MF_TIMEBASE);
117 static void mf_sample_set_pts(AVCodecContext *avctx, IMFSample *sample, int64_t av_pts)
119 LONGLONG stime = mf_to_mf_time(avctx, av_pts);
120 if (stime != MF_INVALID_TIME)
121 IMFSample_SetSampleTime(sample, stime);
124 static int64_t mf_from_mf_time(AVCodecContext *avctx, LONGLONG stime)
126 return av_rescale_q(stime, MF_TIMEBASE, mf_get_tb(avctx));
129 static int64_t mf_sample_get_pts(AVCodecContext *avctx, IMFSample *sample)
132 HRESULT hr = IMFSample_GetSampleTime(sample, &pts);
134 return AV_NOPTS_VALUE;
135 return mf_from_mf_time(avctx, pts);
138 static int mf_enca_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
140 MFContext *c = avctx->priv_data;
144 if (avctx->codec_id != AV_CODEC_ID_MP3 && avctx->codec_id != AV_CODEC_ID_AC3) {
145 hr = IMFAttributes_GetBlobSize(type, &MF_MT_USER_DATA, &sz);
146 if (!FAILED(hr) && sz > 0) {
147 avctx->extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
148 if (!avctx->extradata)
149 return AVERROR(ENOMEM);
150 avctx->extradata_size = sz;
151 hr = IMFAttributes_GetBlob(type, &MF_MT_USER_DATA, avctx->extradata, sz, NULL);
153 return AVERROR_EXTERNAL;
155 if (avctx->codec_id == AV_CODEC_ID_AAC && avctx->extradata_size >= 12) {
156 // Get rid of HEAACWAVEINFO (after wfx field, 12 bytes).
157 avctx->extradata_size = avctx->extradata_size - 12;
158 memmove(avctx->extradata, avctx->extradata + 12, avctx->extradata_size);
163 // I don't know where it's documented that we need this. It happens with the
164 // MS mp3 encoder MFT. The idea for the workaround is taken from NAudio.
165 // (Certainly any lossy codec will have frames much smaller than 1 second.)
166 if (!c->out_info.cbSize && !c->out_stream_provides_samples) {
167 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &sz);
169 av_log(avctx, AV_LOG_VERBOSE, "MFT_OUTPUT_STREAM_INFO.cbSize set to 0, "
170 "assuming %d bytes instead.\n", (int)sz);
171 c->out_info.cbSize = sz;
178 static int mf_encv_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
183 hr = IMFAttributes_GetBlobSize(type, &MF_MT_MPEG_SEQUENCE_HEADER, &sz);
184 if (!FAILED(hr) && sz > 0) {
185 uint8_t *extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
187 return AVERROR(ENOMEM);
188 hr = IMFAttributes_GetBlob(type, &MF_MT_MPEG_SEQUENCE_HEADER, extradata, sz, NULL);
191 return AVERROR_EXTERNAL;
193 av_freep(&avctx->extradata);
194 avctx->extradata = extradata;
195 avctx->extradata_size = sz;
201 static int mf_output_type_get(AVCodecContext *avctx)
203 MFContext *c = avctx->priv_data;
208 hr = IMFTransform_GetOutputCurrentType(c->mft, c->out_stream_id, &type);
210 av_log(avctx, AV_LOG_ERROR, "could not get output type\n");
211 return AVERROR_EXTERNAL;
214 av_log(avctx, AV_LOG_VERBOSE, "final output type:\n");
215 ff_media_type_dump(avctx, type);
219 ret = mf_encv_output_type_get(avctx, type);
220 } else if (c->is_audio) {
221 ret = mf_enca_output_type_get(avctx, type);
225 av_log(avctx, AV_LOG_ERROR, "output type not supported\n");
227 IMFMediaType_Release(type);
231 static int mf_sample_to_avpacket(AVCodecContext *avctx, IMFSample *sample, AVPacket *avpkt)
233 MFContext *c = avctx->priv_data;
237 IMFMediaBuffer *buffer;
242 hr = IMFSample_GetTotalLength(sample, &len);
244 return AVERROR_EXTERNAL;
246 if ((ret = av_new_packet(avpkt, len)) < 0)
249 IMFSample_ConvertToContiguousBuffer(sample, &buffer);
251 return AVERROR_EXTERNAL;
253 hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
255 IMFMediaBuffer_Release(buffer);
256 return AVERROR_EXTERNAL;
259 memcpy(avpkt->data, data, len);
261 IMFMediaBuffer_Unlock(buffer);
262 IMFMediaBuffer_Release(buffer);
264 avpkt->pts = avpkt->dts = mf_sample_get_pts(avctx, sample);
266 hr = IMFAttributes_GetUINT32(sample, &MFSampleExtension_CleanPoint, &t32);
267 if (c->is_audio || (!FAILED(hr) && t32 != 0))
268 avpkt->flags |= AV_PKT_FLAG_KEY;
270 hr = IMFAttributes_GetUINT64(sample, &MFSampleExtension_DecodeTimestamp, &t);
272 avpkt->dts = mf_from_mf_time(avctx, t);
273 // At least on Qualcomm's HEVC encoder on SD 835, the output dts
274 // starts from the input pts of the first frame, while the output pts
275 // is shifted forward. Therefore, shift the output values back so that
276 // the output pts matches the input.
277 if (c->reorder_delay == AV_NOPTS_VALUE)
278 c->reorder_delay = avpkt->pts - avpkt->dts;
279 avpkt->dts -= c->reorder_delay;
280 avpkt->pts -= c->reorder_delay;
286 static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
288 MFContext *c = avctx->priv_data;
293 bps = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels;
294 len = frame->nb_samples * bps;
296 sample = ff_create_memory_sample(frame->data[0], len, c->in_info.cbAlignment);
298 IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->nb_samples));
302 static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
304 MFContext *c = avctx->priv_data;
306 IMFMediaBuffer *buffer;
312 size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
316 sample = ff_create_memory_sample(NULL, size, c->in_info.cbAlignment);
320 hr = IMFSample_GetBufferByIndex(sample, 0, &buffer);
322 IMFSample_Release(sample);
326 hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
328 IMFMediaBuffer_Release(buffer);
329 IMFSample_Release(sample);
333 ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize,
334 avctx->pix_fmt, avctx->width, avctx->height, 1);
335 IMFMediaBuffer_SetCurrentLength(buffer, size);
336 IMFMediaBuffer_Unlock(buffer);
337 IMFMediaBuffer_Release(buffer);
339 IMFSample_Release(sample);
343 IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->pkt_duration));
348 static IMFSample *mf_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
350 MFContext *c = avctx->priv_data;
354 sample = mf_a_avframe_to_sample(avctx, frame);
356 sample = mf_v_avframe_to_sample(avctx, frame);
360 mf_sample_set_pts(avctx, sample, frame->pts);
365 static int mf_send_sample(AVCodecContext *avctx, IMFSample *sample)
367 MFContext *c = avctx->priv_data;
372 if (c->async_events) {
373 if ((ret = mf_wait_events(avctx)) < 0)
375 if (!c->async_need_input)
376 return AVERROR(EAGAIN);
379 IMFSample_SetUINT32(sample, &MFSampleExtension_Discontinuity, TRUE);
381 hr = IMFTransform_ProcessInput(c->mft, c->in_stream_id, sample, 0);
382 if (hr == MF_E_NOTACCEPTING) {
383 return AVERROR(EAGAIN);
384 } else if (FAILED(hr)) {
385 av_log(avctx, AV_LOG_ERROR, "failed processing input: %s\n", ff_hr_str(hr));
386 return AVERROR_EXTERNAL;
388 c->async_need_input = 0;
389 } else if (!c->draining) {
390 hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_COMMAND_DRAIN, 0);
392 av_log(avctx, AV_LOG_ERROR, "failed draining: %s\n", ff_hr_str(hr));
393 // Some MFTs (AC3) will send a frame after each drain command (???), so
394 // this is required to make draining actually terminate.
396 c->async_need_input = 0;
403 static int mf_send_frame(AVCodecContext *avctx, const AVFrame *frame)
405 MFContext *c = avctx->priv_data;
407 IMFSample *sample = NULL;
409 sample = mf_avframe_to_sample(avctx, frame);
411 return AVERROR(ENOMEM);
412 if (c->is_video && c->codec_api) {
413 if (frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
414 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
417 ret = mf_send_sample(avctx, sample);
419 IMFSample_Release(sample);
423 static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
425 MFContext *c = avctx->priv_data;
428 MFT_OUTPUT_DATA_BUFFER out_buffers;
436 if (c->async_events) {
437 if ((ret = mf_wait_events(avctx)) < 0)
439 if (!c->async_have_output || c->draining_done) {
445 if (!c->out_stream_provides_samples) {
446 sample = ff_create_memory_sample(NULL, c->out_info.cbSize, c->out_info.cbAlignment);
448 return AVERROR(ENOMEM);
451 out_buffers = (MFT_OUTPUT_DATA_BUFFER) {
452 .dwStreamID = c->out_stream_id,
457 hr = IMFTransform_ProcessOutput(c->mft, 0, 1, &out_buffers, &st);
459 if (out_buffers.pEvents)
460 IMFCollection_Release(out_buffers.pEvents);
463 *out_sample = out_buffers.pSample;
468 if (out_buffers.pSample)
469 IMFSample_Release(out_buffers.pSample);
471 if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
473 c->draining_done = 1;
475 } else if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
476 av_log(avctx, AV_LOG_WARNING, "stream format change\n");
477 ret = mf_choose_output_type(avctx);
478 if (ret == 0) // we don't expect renegotiating the input type
479 ret = AVERROR_EXTERNAL;
481 ret = mf_setup_context(avctx);
483 c->async_have_output = 0;
488 av_log(avctx, AV_LOG_ERROR, "failed processing output: %s\n", ff_hr_str(hr));
489 ret = AVERROR_EXTERNAL;
495 c->async_have_output = 0;
497 if (ret >= 0 && !*out_sample)
498 ret = c->draining_done ? AVERROR_EOF : AVERROR(EAGAIN);
503 static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
508 ret = mf_receive_sample(avctx, &sample);
512 ret = mf_sample_to_avpacket(avctx, sample, avpkt);
513 IMFSample_Release(sample);
518 // Most encoders seem to enumerate supported audio formats on the output types,
519 // at least as far as channel configuration and sample rate is concerned. Pick
520 // the one which seems to match best.
521 static int64_t mf_enca_output_score(AVCodecContext *avctx, IMFMediaType *type)
523 MFContext *c = avctx->priv_data;
529 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
530 if (!FAILED(hr) && t == avctx->sample_rate)
533 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
534 if (!FAILED(hr) && t == avctx->channels)
537 hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
539 if (IsEqualGUID(&c->main_subtype, &tg))
543 // Select the bitrate (lowest priority).
544 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &t);
546 int diff = (int)t - avctx->bit_rate / 8;
548 score |= (1LL << 31) - diff; // prefer lower bitrate
550 score |= (1LL << 30) + diff; // prefer higher bitrate
554 hr = IMFAttributes_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &t);
555 if (!FAILED(hr) && t != 0)
561 static int mf_enca_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
563 // (some decoders allow adjusting this freely, but it can also cause failure
564 // to set the output type - so it's commented for being too fragile)
565 //IMFAttributes_SetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, avctx->bit_rate / 8);
566 //IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
571 static int64_t mf_enca_input_score(AVCodecContext *avctx, IMFMediaType *type)
577 enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
578 if (sformat == AV_SAMPLE_FMT_NONE)
579 return -1; // can not use
581 if (sformat == avctx->sample_fmt)
584 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
585 if (!FAILED(hr) && t == avctx->sample_rate)
588 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
589 if (!FAILED(hr) && t == avctx->channels)
595 static int mf_enca_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
600 enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
601 if (sformat != avctx->sample_fmt) {
602 av_log(avctx, AV_LOG_ERROR, "unsupported input sample format set\n");
603 return AVERROR(EINVAL);
606 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
607 if (FAILED(hr) || t != avctx->sample_rate) {
608 av_log(avctx, AV_LOG_ERROR, "unsupported input sample rate set\n");
609 return AVERROR(EINVAL);
612 hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
613 if (FAILED(hr) || t != avctx->channels) {
614 av_log(avctx, AV_LOG_ERROR, "unsupported input channel number set\n");
615 return AVERROR(EINVAL);
621 static int64_t mf_encv_output_score(AVCodecContext *avctx, IMFMediaType *type)
623 MFContext *c = avctx->priv_data;
628 hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
630 if (IsEqualGUID(&c->main_subtype, &tg))
637 static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
639 MFContext *c = avctx->priv_data;
641 ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
642 IMFAttributes_SetUINT32(type, &MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
644 ff_MFSetAttributeRatio((IMFAttributes *)type, &MF_MT_FRAME_RATE, avctx->framerate.num, avctx->framerate.den);
646 // (MS HEVC supports eAVEncH265VProfile_Main_420_8 only.)
647 if (avctx->codec_id == AV_CODEC_ID_H264) {
648 UINT32 profile = eAVEncH264VProfile_Base;
649 switch (avctx->profile) {
650 case FF_PROFILE_H264_MAIN:
651 profile = eAVEncH264VProfile_Main;
653 case FF_PROFILE_H264_HIGH:
654 profile = eAVEncH264VProfile_High;
657 IMFAttributes_SetUINT32(type, &MF_MT_MPEG2_PROFILE, profile);
660 IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
662 // Note that some of the ICodecAPI options must be set before SetOutputType.
665 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonMeanBitRate, FF_VAL_VT_UI4(avctx->bit_rate));
667 if (c->opt_enc_rc >= 0)
668 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonRateControlMode, FF_VAL_VT_UI4(c->opt_enc_rc));
670 if (c->opt_enc_quality >= 0)
671 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonQuality, FF_VAL_VT_UI4(c->opt_enc_quality));
673 // Always set the number of b-frames. Qualcomm's HEVC encoder on SD835
674 // defaults this to 1, and that setting is buggy with many of the
675 // rate control modes. (0 or 2 b-frames works fine with most rate
676 // control modes, but 2 seems buggy with the u_vbr mode.) Setting
677 // "scenario" to "camera_record" sets it in CFR mode (where the default
678 // is VFR), which makes the encoder avoid dropping frames.
679 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncMPVDefaultBPictureCount, FF_VAL_VT_UI4(avctx->max_b_frames));
680 avctx->has_b_frames = avctx->max_b_frames > 0;
682 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncH264CABACEnable, FF_VAL_VT_BOOL(1));
684 if (c->opt_enc_scenario >= 0)
685 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVScenarioInfo, FF_VAL_VT_UI4(c->opt_enc_scenario));
691 static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
693 enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
694 if (pix_fmt != avctx->pix_fmt)
695 return -1; // can not use
700 static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
702 enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
703 if (pix_fmt != avctx->pix_fmt) {
704 av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
705 return AVERROR(EINVAL);
708 //ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
713 static int mf_choose_output_type(AVCodecContext *avctx)
715 MFContext *c = avctx->priv_data;
718 IMFMediaType *out_type = NULL;
719 int64_t out_type_score = -1;
720 int out_type_index = -1;
723 av_log(avctx, AV_LOG_VERBOSE, "output types:\n");
728 hr = IMFTransform_GetOutputAvailableType(c->mft, c->out_stream_id, n, &type);
729 if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
731 if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
732 av_log(avctx, AV_LOG_VERBOSE, "(need to set input type)\n");
737 av_log(avctx, AV_LOG_ERROR, "error getting output type: %s\n", ff_hr_str(hr));
738 ret = AVERROR_EXTERNAL;
742 av_log(avctx, AV_LOG_VERBOSE, "output type %d:\n", n);
743 ff_media_type_dump(avctx, type);
746 score = mf_encv_output_score(avctx, type);
747 } else if (c->is_audio) {
748 score = mf_enca_output_score(avctx, type);
751 if (score > out_type_score) {
753 IMFMediaType_Release(out_type);
755 out_type_score = score;
757 IMFMediaType_AddRef(out_type);
760 IMFMediaType_Release(type);
764 av_log(avctx, AV_LOG_VERBOSE, "picking output type %d.\n", out_type_index);
766 hr = MFCreateMediaType(&out_type);
768 ret = AVERROR(ENOMEM);
775 ret = mf_encv_output_adjust(avctx, out_type);
776 } else if (c->is_audio) {
777 ret = mf_enca_output_adjust(avctx, out_type);
781 av_log(avctx, AV_LOG_VERBOSE, "setting output type:\n");
782 ff_media_type_dump(avctx, out_type);
784 hr = IMFTransform_SetOutputType(c->mft, c->out_stream_id, out_type, 0);
787 } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
788 av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set input type\n");
791 av_log(avctx, AV_LOG_ERROR, "could not set output type (%s)\n", ff_hr_str(hr));
792 ret = AVERROR_EXTERNAL;
798 IMFMediaType_Release(out_type);
802 static int mf_choose_input_type(AVCodecContext *avctx)
804 MFContext *c = avctx->priv_data;
807 IMFMediaType *in_type = NULL;
808 int64_t in_type_score = -1;
809 int in_type_index = -1;
812 av_log(avctx, AV_LOG_VERBOSE, "input types:\n");
814 IMFMediaType *type = NULL;
817 hr = IMFTransform_GetInputAvailableType(c->mft, c->in_stream_id, n, &type);
818 if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
820 if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
821 av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 1)\n");
826 av_log(avctx, AV_LOG_ERROR, "error getting input type: %s\n", ff_hr_str(hr));
827 ret = AVERROR_EXTERNAL;
831 av_log(avctx, AV_LOG_VERBOSE, "input type %d:\n", n);
832 ff_media_type_dump(avctx, type);
835 score = mf_encv_input_score(avctx, type);
836 } else if (c->is_audio) {
837 score = mf_enca_input_score(avctx, type);
840 if (score > in_type_score) {
842 IMFMediaType_Release(in_type);
844 in_type_score = score;
846 IMFMediaType_AddRef(in_type);
849 IMFMediaType_Release(type);
853 av_log(avctx, AV_LOG_VERBOSE, "picking input type %d.\n", in_type_index);
855 // Some buggy MFTs (WMA encoder) fail to return MF_E_TRANSFORM_TYPE_NOT_SET.
856 av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 2)\n");
863 ret = mf_encv_input_adjust(avctx, in_type);
864 } else if (c->is_audio) {
865 ret = mf_enca_input_adjust(avctx, in_type);
869 av_log(avctx, AV_LOG_VERBOSE, "setting input type:\n");
870 ff_media_type_dump(avctx, in_type);
872 hr = IMFTransform_SetInputType(c->mft, c->in_stream_id, in_type, 0);
875 } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
876 av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set output type\n");
879 av_log(avctx, AV_LOG_ERROR, "could not set input type (%s)\n", ff_hr_str(hr));
880 ret = AVERROR_EXTERNAL;
886 IMFMediaType_Release(in_type);
890 static int mf_negotiate_types(AVCodecContext *avctx)
892 // This follows steps 1-5 on:
893 // https://msdn.microsoft.com/en-us/library/windows/desktop/aa965264(v=vs.85).aspx
894 // If every MFT implementer does this correctly, this loop should at worst
896 int need_input = 1, need_output = 1;
898 for (n = 0; n < 2 && (need_input || need_output); n++) {
900 ret = mf_choose_input_type(avctx);
903 need_input = ret < 1;
904 ret = mf_choose_output_type(avctx);
907 need_output = ret < 1;
909 if (need_input || need_output) {
910 av_log(avctx, AV_LOG_ERROR, "format negotiation failed (%d/%d)\n",
911 need_input, need_output);
912 return AVERROR_EXTERNAL;
917 static int mf_setup_context(AVCodecContext *avctx)
919 MFContext *c = avctx->priv_data;
923 hr = IMFTransform_GetInputStreamInfo(c->mft, c->in_stream_id, &c->in_info);
925 return AVERROR_EXTERNAL;
926 av_log(avctx, AV_LOG_VERBOSE, "in_info: size=%d, align=%d\n",
927 (int)c->in_info.cbSize, (int)c->in_info.cbAlignment);
929 hr = IMFTransform_GetOutputStreamInfo(c->mft, c->out_stream_id, &c->out_info);
931 return AVERROR_EXTERNAL;
932 c->out_stream_provides_samples =
933 (c->out_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) ||
934 (c->out_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES);
935 av_log(avctx, AV_LOG_VERBOSE, "out_info: size=%d, align=%d%s\n",
936 (int)c->out_info.cbSize, (int)c->out_info.cbAlignment,
937 c->out_stream_provides_samples ? " (provides samples)" : "");
939 if ((ret = mf_output_type_get(avctx)) < 0)
945 static int mf_unlock_async(AVCodecContext *avctx)
947 MFContext *c = avctx->priv_data;
949 IMFAttributes *attrs;
951 int res = AVERROR_EXTERNAL;
953 // For hw encoding we unfortunately need to use async mode, otherwise
954 // play it safe and avoid it.
955 if (!(c->is_video && c->opt_enc_hw))
958 hr = IMFTransform_GetAttributes(c->mft, &attrs);
960 av_log(avctx, AV_LOG_ERROR, "error retrieving MFT attributes: %s\n", ff_hr_str(hr));
964 hr = IMFAttributes_GetUINT32(attrs, &MF_TRANSFORM_ASYNC, &v);
966 av_log(avctx, AV_LOG_ERROR, "error querying async: %s\n", ff_hr_str(hr));
971 av_log(avctx, AV_LOG_ERROR, "hardware MFT is not async\n");
975 hr = IMFAttributes_SetUINT32(attrs, &MF_TRANSFORM_ASYNC_UNLOCK, TRUE);
977 av_log(avctx, AV_LOG_ERROR, "could not set async unlock: %s\n", ff_hr_str(hr));
981 hr = IMFTransform_QueryInterface(c->mft, &IID_IMFMediaEventGenerator, (void **)&c->async_events);
983 av_log(avctx, AV_LOG_ERROR, "could not get async interface\n");
990 IMFAttributes_Release(attrs);
994 static int mf_create(void *log, IMFTransform **mft, const AVCodec *codec, int use_hw)
996 int is_audio = codec->type == AVMEDIA_TYPE_AUDIO;
997 const CLSID *subtype = ff_codec_to_mf_subtype(codec->id);
998 MFT_REGISTER_TYPE_INFO reg = {0};
1005 return AVERROR(ENOSYS);
1007 reg.guidSubtype = *subtype;
1010 reg.guidMajorType = MFMediaType_Audio;
1011 category = MFT_CATEGORY_AUDIO_ENCODER;
1013 reg.guidMajorType = MFMediaType_Video;
1014 category = MFT_CATEGORY_VIDEO_ENCODER;
1017 if ((ret = ff_instantiate_mf(log, category, NULL, ®, use_hw, mft)) < 0)
1023 static int mf_init(AVCodecContext *avctx)
1025 MFContext *c = avctx->priv_data;
1028 const CLSID *subtype = ff_codec_to_mf_subtype(avctx->codec_id);
1031 c->is_audio = avctx->codec_type == AVMEDIA_TYPE_AUDIO;
1032 c->is_video = !c->is_audio;
1033 c->reorder_delay = AV_NOPTS_VALUE;
1035 if (c->is_video && c->opt_enc_hw)
1039 return AVERROR(ENOSYS);
1041 c->main_subtype = *subtype;
1043 if ((ret = mf_create(avctx, &c->mft, avctx->codec, use_hw)) < 0)
1046 if ((ret = mf_unlock_async(avctx)) < 0)
1049 hr = IMFTransform_QueryInterface(c->mft, &IID_ICodecAPI, (void **)&c->codec_api);
1051 av_log(avctx, AV_LOG_VERBOSE, "MFT supports ICodecAPI.\n");
1054 hr = IMFTransform_GetStreamIDs(c->mft, 1, &c->in_stream_id, 1, &c->out_stream_id);
1055 if (hr == E_NOTIMPL) {
1056 c->in_stream_id = c->out_stream_id = 0;
1057 } else if (FAILED(hr)) {
1058 av_log(avctx, AV_LOG_ERROR, "could not get stream IDs (%s)\n", ff_hr_str(hr));
1059 return AVERROR_EXTERNAL;
1062 if ((ret = mf_negotiate_types(avctx)) < 0)
1065 if ((ret = mf_setup_context(avctx)) < 0)
1068 hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
1070 av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
1071 return AVERROR_EXTERNAL;
1074 hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
1076 av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
1077 return AVERROR_EXTERNAL;
1080 if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events &&
1081 c->is_video && !avctx->extradata) {
1082 int sleep = 10000, total = 0;
1083 av_log(avctx, AV_LOG_VERBOSE, "Awaiting extradata\n");
1084 while (total < 70*1000) {
1085 // The Qualcomm H264 encoder on SD835 doesn't provide extradata
1086 // immediately, but it becomes available soon after init (without
1087 // any waitable event). In practice, it's available after less
1088 // than 10 ms, but wait for up to 70 ms before giving up.
1089 // Some encoders (Qualcomm's HEVC encoder on SD835, some versions
1090 // of the QSV H264 encoder at least) don't provide extradata this
1091 // way at all, not even after encoding a frame - it's only
1092 // available prepended to frames.
1095 mf_output_type_get(avctx);
1096 if (avctx->extradata)
1100 av_log(avctx, AV_LOG_VERBOSE, "%s extradata in %d ms\n",
1101 avctx->extradata ? "Got" : "Didn't get", total / 1000);
1107 static int mf_close(AVCodecContext *avctx)
1109 MFContext *c = avctx->priv_data;
1112 ICodecAPI_Release(c->codec_api);
1114 if (c->async_events)
1115 IMFMediaEventGenerator_Release(c->async_events);
1117 ff_free_mf(&c->mft);
1119 av_freep(&avctx->extradata);
1120 avctx->extradata_size = 0;
1125 #define OFFSET(x) offsetof(MFContext, x)
1127 #define MF_ENCODER(MEDIATYPE, NAME, ID, OPTS, EXTRA) \
1128 static const AVClass ff_ ## NAME ## _mf_encoder_class = { \
1129 .class_name = #NAME "_mf", \
1130 .item_name = av_default_item_name, \
1132 .version = LIBAVUTIL_VERSION_INT, \
1134 AVCodec ff_ ## NAME ## _mf_encoder = { \
1135 .priv_class = &ff_ ## NAME ## _mf_encoder_class, \
1136 .name = #NAME "_mf", \
1137 .long_name = NULL_IF_CONFIG_SMALL(#ID " via MediaFoundation"), \
1138 .type = AVMEDIA_TYPE_ ## MEDIATYPE, \
1139 .id = AV_CODEC_ID_ ## ID, \
1140 .priv_data_size = sizeof(MFContext), \
1142 .close = mf_close, \
1143 .send_frame = mf_send_frame, \
1144 .receive_packet = mf_receive_packet, \
1146 .capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID, \
1147 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | \
1148 FF_CODEC_CAP_INIT_CLEANUP, \
1152 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, \
1153 AV_SAMPLE_FMT_NONE },
1155 MF_ENCODER(AUDIO, aac, AAC, NULL, AFMTS);
1156 MF_ENCODER(AUDIO, ac3, AC3, NULL, AFMTS);
1157 MF_ENCODER(AUDIO, mp3, MP3, NULL, AFMTS);
1159 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1160 static const AVOption venc_opts[] = {
1161 {"rate_control", "Select rate control mode", OFFSET(opt_enc_rc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "rate_control"},
1162 { "default", "Default mode", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "rate_control"},
1163 { "cbr", "CBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_CBR}, 0, 0, VE, "rate_control"},
1164 { "pc_vbr", "Peak constrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_PeakConstrainedVBR}, 0, 0, VE, "rate_control"},
1165 { "u_vbr", "Unconstrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_UnconstrainedVBR}, 0, 0, VE, "rate_control"},
1166 { "quality", "Quality mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_Quality}, 0, 0, VE, "rate_control" },
1167 // The following rate_control modes require Windows 8.
1168 { "ld_vbr", "Low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_LowDelayVBR}, 0, 0, VE, "rate_control"},
1169 { "g_vbr", "Global VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalVBR}, 0, 0, VE, "rate_control" },
1170 { "gld_vbr", "Global low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalLowDelayVBR}, 0, 0, VE, "rate_control"},
1172 {"scenario", "Select usage scenario", OFFSET(opt_enc_scenario), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "scenario"},
1173 { "default", "Default scenario", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "scenario"},
1174 { "display_remoting", "Display remoting", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemoting}, 0, 0, VE, "scenario"},
1175 { "video_conference", "Video conference", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_VideoConference}, 0, 0, VE, "scenario"},
1176 { "archive", "Archive", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_Archive}, 0, 0, VE, "scenario"},
1177 { "live_streaming", "Live streaming", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_LiveStreaming}, 0, 0, VE, "scenario"},
1178 { "camera_record", "Camera record", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_CameraRecord}, 0, 0, VE, "scenario"},
1179 { "display_remoting_with_feature_map", "Display remoting with feature map", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemotingWithFeatureMap}, 0, 0, VE, "scenario"},
1181 {"quality", "Quality", OFFSET(opt_enc_quality), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
1182 {"hw_encoding", "Force hardware encoding", OFFSET(opt_enc_hw), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, VE},
1187 .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12, \
1188 AV_PIX_FMT_YUV420P, \
1191 MF_ENCODER(VIDEO, h264, H264, venc_opts, VFMTS);
1192 MF_ENCODER(VIDEO, hevc, HEVC, venc_opts, VFMTS);