git.sesse.net Git - ffmpeg/blob - libavcodec/mfenc.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with FFmpeg; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 #define COBJMACROS
  20 #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
  21 #undef _WIN32_WINNT
  22 #define _WIN32_WINNT 0x0602
  23 #endif
  24
  25 #include "encode.h"
  26 #include "mf_utils.h"
  27 #include "libavutil/imgutils.h"
  28 #include "libavutil/opt.h"
  29 #include "libavutil/time.h"
  30 #include "internal.h"
  31
  32 typedef struct MFContext {
  33     AVClass *av_class;
  34     AVFrame *frame;
  35     int is_video, is_audio;
  36     GUID main_subtype;
  37     IMFTransform *mft;
  38     IMFMediaEventGenerator *async_events;
  39     DWORD in_stream_id, out_stream_id;
  40     MFT_INPUT_STREAM_INFO in_info;
  41     MFT_OUTPUT_STREAM_INFO out_info;
  42     int out_stream_provides_samples;
  43     int draining, draining_done;
  44     int sample_sent;
  45     int async_need_input, async_have_output, async_marker;
  46     int64_t reorder_delay;
  47     ICodecAPI *codec_api;
  48     // set by AVOption
  49     int opt_enc_rc;
  50     int opt_enc_quality;
  51     int opt_enc_scenario;
  52     int opt_enc_hw;
  53 } MFContext;
  54
  55 static int mf_choose_output_type(AVCodecContext *avctx);
  56 static int mf_setup_context(AVCodecContext *avctx);
  57
  58 #define MF_TIMEBASE (AVRational){1, 10000000}
  59 // Sentinel value only used by us.
  60 #define MF_INVALID_TIME AV_NOPTS_VALUE
  61
  62 static int mf_wait_events(AVCodecContext *avctx)
  63 {
  64     MFContext *c = avctx->priv_data;
  65
  66     if (!c->async_events)
  67         return 0;
  68
  69     while (!(c->async_need_input || c->async_have_output || c->draining_done || c->async_marker)) {
  70         IMFMediaEvent *ev = NULL;
  71         MediaEventType ev_id = 0;
  72         HRESULT hr = IMFMediaEventGenerator_GetEvent(c->async_events, 0, &ev);
  73         if (FAILED(hr)) {
  74             av_log(avctx, AV_LOG_ERROR, "IMFMediaEventGenerator_GetEvent() failed: %s\n",
  75                    ff_hr_str(hr));
  76             return AVERROR_EXTERNAL;
  77         }
  78         IMFMediaEvent_GetType(ev, &ev_id);
  79         switch (ev_id) {
  80         case ff_METransformNeedInput:
  81             if (!c->draining)
  82                 c->async_need_input = 1;
  83             break;
  84         case ff_METransformHaveOutput:
  85             c->async_have_output = 1;
  86             break;
  87         case ff_METransformDrainComplete:
  88             c->draining_done = 1;
  89             break;
  90         case ff_METransformMarker:
  91             c->async_marker = 1;
  92             break;
  93         default: ;
  94         }
  95         IMFMediaEvent_Release(ev);
  96     }
  97
  98     return 0;
  99 }
 100
 101 static AVRational mf_get_tb(AVCodecContext *avctx)
 102 {
 103     if (avctx->pkt_timebase.num > 0 && avctx->pkt_timebase.den > 0)
 104         return avctx->pkt_timebase;
 105     if (avctx->time_base.num > 0 && avctx->time_base.den > 0)
 106         return avctx->time_base;
 107     return MF_TIMEBASE;
 108 }
 109
 110 static LONGLONG mf_to_mf_time(AVCodecContext *avctx, int64_t av_pts)
 111 {
 112     if (av_pts == AV_NOPTS_VALUE)
 113         return MF_INVALID_TIME;
 114     return av_rescale_q(av_pts, mf_get_tb(avctx), MF_TIMEBASE);
 115 }
 116
 117 static void mf_sample_set_pts(AVCodecContext *avctx, IMFSample *sample, int64_t av_pts)
 118 {
 119     LONGLONG stime = mf_to_mf_time(avctx, av_pts);
 120     if (stime != MF_INVALID_TIME)
 121         IMFSample_SetSampleTime(sample, stime);
 122 }
 123
 124 static int64_t mf_from_mf_time(AVCodecContext *avctx, LONGLONG stime)
 125 {
 126     return av_rescale_q(stime, MF_TIMEBASE, mf_get_tb(avctx));
 127 }
 128
 129 static int64_t mf_sample_get_pts(AVCodecContext *avctx, IMFSample *sample)
 130 {
 131     LONGLONG pts;
 132     HRESULT hr = IMFSample_GetSampleTime(sample, &pts);
 133     if (FAILED(hr))
 134         return AV_NOPTS_VALUE;
 135     return mf_from_mf_time(avctx, pts);
 136 }
 137
 138 static int mf_enca_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
 139 {
 140     MFContext *c = avctx->priv_data;
 141     HRESULT hr;
 142     UINT32 sz;
 143
 144     if (avctx->codec_id != AV_CODEC_ID_MP3 && avctx->codec_id != AV_CODEC_ID_AC3) {
 145         hr = IMFAttributes_GetBlobSize(type, &MF_MT_USER_DATA, &sz);
 146         if (!FAILED(hr) && sz > 0) {
 147             avctx->extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
 148             if (!avctx->extradata)
 149                 return AVERROR(ENOMEM);
 150             avctx->extradata_size = sz;
 151             hr = IMFAttributes_GetBlob(type, &MF_MT_USER_DATA, avctx->extradata, sz, NULL);
 152             if (FAILED(hr))
 153                 return AVERROR_EXTERNAL;
 154
 155             if (avctx->codec_id == AV_CODEC_ID_AAC && avctx->extradata_size >= 12) {
 156                 // Get rid of HEAACWAVEINFO (after wfx field, 12 bytes).
 157                 avctx->extradata_size = avctx->extradata_size - 12;
 158                 memmove(avctx->extradata, avctx->extradata + 12, avctx->extradata_size);
 159             }
 160         }
 161     }
 162
 163     // I don't know where it's documented that we need this. It happens with the
 164     // MS mp3 encoder MFT. The idea for the workaround is taken from NAudio.
 165     // (Certainly any lossy codec will have frames much smaller than 1 second.)
 166     if (!c->out_info.cbSize && !c->out_stream_provides_samples) {
 167         hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &sz);
 168         if (!FAILED(hr)) {
 169             av_log(avctx, AV_LOG_VERBOSE, "MFT_OUTPUT_STREAM_INFO.cbSize set to 0, "
 170                    "assuming %d bytes instead.\n", (int)sz);
 171             c->out_info.cbSize = sz;
 172         }
 173     }
 174
 175     return 0;
 176 }
 177
 178 static int mf_encv_output_type_get(AVCodecContext *avctx, IMFMediaType *type)
 179 {
 180     HRESULT hr;
 181     UINT32 sz;
 182
 183     hr = IMFAttributes_GetBlobSize(type, &MF_MT_MPEG_SEQUENCE_HEADER, &sz);
 184     if (!FAILED(hr) && sz > 0) {
 185         uint8_t *extradata = av_mallocz(sz + AV_INPUT_BUFFER_PADDING_SIZE);
 186         if (!extradata)
 187             return AVERROR(ENOMEM);
 188         hr = IMFAttributes_GetBlob(type, &MF_MT_MPEG_SEQUENCE_HEADER, extradata, sz, NULL);
 189         if (FAILED(hr)) {
 190             av_free(extradata);
 191             return AVERROR_EXTERNAL;
 192         }
 193         av_freep(&avctx->extradata);
 194         avctx->extradata = extradata;
 195         avctx->extradata_size = sz;
 196     }
 197
 198     return 0;
 199 }
 200
 201 static int mf_output_type_get(AVCodecContext *avctx)
 202 {
 203     MFContext *c = avctx->priv_data;
 204     HRESULT hr;
 205     IMFMediaType *type;
 206     int ret;
 207
 208     hr = IMFTransform_GetOutputCurrentType(c->mft, c->out_stream_id, &type);
 209     if (FAILED(hr)) {
 210         av_log(avctx, AV_LOG_ERROR, "could not get output type\n");
 211         return AVERROR_EXTERNAL;
 212     }
 213
 214     av_log(avctx, AV_LOG_VERBOSE, "final output type:\n");
 215     ff_media_type_dump(avctx, type);
 216
 217     ret = 0;
 218     if (c->is_video) {
 219         ret = mf_encv_output_type_get(avctx, type);
 220     } else if (c->is_audio) {
 221         ret = mf_enca_output_type_get(avctx, type);
 222     }
 223
 224     if (ret < 0)
 225         av_log(avctx, AV_LOG_ERROR, "output type not supported\n");
 226
 227     IMFMediaType_Release(type);
 228     return ret;
 229 }
 230
 231 static int mf_sample_to_avpacket(AVCodecContext *avctx, IMFSample *sample, AVPacket *avpkt)
 232 {
 233     MFContext *c = avctx->priv_data;
 234     HRESULT hr;
 235     int ret;
 236     DWORD len;
 237     IMFMediaBuffer *buffer;
 238     BYTE *data;
 239     UINT64 t;
 240     UINT32 t32;
 241
 242     hr = IMFSample_GetTotalLength(sample, &len);
 243     if (FAILED(hr))
 244         return AVERROR_EXTERNAL;
 245
 246     if ((ret = ff_get_encode_buffer(avctx, avpkt, len, 0)) < 0)
 247         return ret;
 248
 249     IMFSample_ConvertToContiguousBuffer(sample, &buffer);
 250     if (FAILED(hr))
 251         return AVERROR_EXTERNAL;
 252
 253     hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
 254     if (FAILED(hr)) {
 255         IMFMediaBuffer_Release(buffer);
 256         return AVERROR_EXTERNAL;
 257     }
 258
 259     memcpy(avpkt->data, data, len);
 260
 261     IMFMediaBuffer_Unlock(buffer);
 262     IMFMediaBuffer_Release(buffer);
 263
 264     avpkt->pts = avpkt->dts = mf_sample_get_pts(avctx, sample);
 265
 266     hr = IMFAttributes_GetUINT32(sample, &MFSampleExtension_CleanPoint, &t32);
 267     if (c->is_audio || (!FAILED(hr) && t32 != 0))
 268         avpkt->flags |= AV_PKT_FLAG_KEY;
 269
 270     hr = IMFAttributes_GetUINT64(sample, &MFSampleExtension_DecodeTimestamp, &t);
 271     if (!FAILED(hr)) {
 272         avpkt->dts = mf_from_mf_time(avctx, t);
 273         // At least on Qualcomm's HEVC encoder on SD 835, the output dts
 274         // starts from the input pts of the first frame, while the output pts
 275         // is shifted forward. Therefore, shift the output values back so that
 276         // the output pts matches the input.
 277         if (c->reorder_delay == AV_NOPTS_VALUE)
 278             c->reorder_delay = avpkt->pts - avpkt->dts;
 279         avpkt->dts -= c->reorder_delay;
 280         avpkt->pts -= c->reorder_delay;
 281     }
 282
 283     return 0;
 284 }
 285
 286 static IMFSample *mf_a_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
 287 {
 288     MFContext *c = avctx->priv_data;
 289     size_t len;
 290     size_t bps;
 291     IMFSample *sample;
 292
 293     bps = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->channels;
 294     len = frame->nb_samples * bps;
 295
 296     sample = ff_create_memory_sample(frame->data[0], len, c->in_info.cbAlignment);
 297     if (sample)
 298         IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->nb_samples));
 299     return sample;
 300 }
 301
 302 static IMFSample *mf_v_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
 303 {
 304     MFContext *c = avctx->priv_data;
 305     IMFSample *sample;
 306     IMFMediaBuffer *buffer;
 307     BYTE *data;
 308     HRESULT hr;
 309     int ret;
 310     int size;
 311
 312     size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
 313     if (size < 0)
 314         return NULL;
 315
 316     sample = ff_create_memory_sample(NULL, size, c->in_info.cbAlignment);
 317     if (!sample)
 318         return NULL;
 319
 320     hr = IMFSample_GetBufferByIndex(sample, 0, &buffer);
 321     if (FAILED(hr)) {
 322         IMFSample_Release(sample);
 323         return NULL;
 324     }
 325
 326     hr = IMFMediaBuffer_Lock(buffer, &data, NULL, NULL);
 327     if (FAILED(hr)) {
 328         IMFMediaBuffer_Release(buffer);
 329         IMFSample_Release(sample);
 330         return NULL;
 331     }
 332
 333     ret = av_image_copy_to_buffer((uint8_t *)data, size, (void *)frame->data, frame->linesize,
 334                                   avctx->pix_fmt, avctx->width, avctx->height, 1);
 335     IMFMediaBuffer_SetCurrentLength(buffer, size);
 336     IMFMediaBuffer_Unlock(buffer);
 337     IMFMediaBuffer_Release(buffer);
 338     if (ret < 0) {
 339         IMFSample_Release(sample);
 340         return NULL;
 341     }
 342
 343     IMFSample_SetSampleDuration(sample, mf_to_mf_time(avctx, frame->pkt_duration));
 344
 345     return sample;
 346 }
 347
 348 static IMFSample *mf_avframe_to_sample(AVCodecContext *avctx, const AVFrame *frame)
 349 {
 350     MFContext *c = avctx->priv_data;
 351     IMFSample *sample;
 352
 353     if (c->is_audio) {
 354         sample = mf_a_avframe_to_sample(avctx, frame);
 355     } else {
 356         sample = mf_v_avframe_to_sample(avctx, frame);
 357     }
 358
 359     if (sample)
 360         mf_sample_set_pts(avctx, sample, frame->pts);
 361
 362     return sample;
 363 }
 364
 365 static int mf_send_sample(AVCodecContext *avctx, IMFSample *sample)
 366 {
 367     MFContext *c = avctx->priv_data;
 368     HRESULT hr;
 369     int ret;
 370
 371     if (sample) {
 372         if (c->async_events) {
 373             if ((ret = mf_wait_events(avctx)) < 0)
 374                 return ret;
 375             if (!c->async_need_input)
 376                 return AVERROR(EAGAIN);
 377         }
 378         if (!c->sample_sent)
 379             IMFSample_SetUINT32(sample, &MFSampleExtension_Discontinuity, TRUE);
 380         c->sample_sent = 1;
 381         hr = IMFTransform_ProcessInput(c->mft, c->in_stream_id, sample, 0);
 382         if (hr == MF_E_NOTACCEPTING) {
 383             return AVERROR(EAGAIN);
 384         } else if (FAILED(hr)) {
 385             av_log(avctx, AV_LOG_ERROR, "failed processing input: %s\n", ff_hr_str(hr));
 386             return AVERROR_EXTERNAL;
 387         }
 388         c->async_need_input = 0;
 389     } else if (!c->draining) {
 390         hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_COMMAND_DRAIN, 0);
 391         if (FAILED(hr))
 392             av_log(avctx, AV_LOG_ERROR, "failed draining: %s\n", ff_hr_str(hr));
 393         // Some MFTs (AC3) will send a frame after each drain command (???), so
 394         // this is required to make draining actually terminate.
 395         c->draining = 1;
 396         c->async_need_input = 0;
 397     } else {
 398         return AVERROR_EOF;
 399     }
 400     return 0;
 401 }
 402
 403 static int mf_receive_sample(AVCodecContext *avctx, IMFSample **out_sample)
 404 {
 405     MFContext *c = avctx->priv_data;
 406     HRESULT hr;
 407     DWORD st;
 408     MFT_OUTPUT_DATA_BUFFER out_buffers;
 409     IMFSample *sample;
 410     int ret = 0;
 411
 412     while (1) {
 413         *out_sample = NULL;
 414         sample = NULL;
 415
 416         if (c->async_events) {
 417             if ((ret = mf_wait_events(avctx)) < 0)
 418                 return ret;
 419             if (!c->async_have_output || c->draining_done) {
 420                 ret = 0;
 421                 break;
 422             }
 423         }
 424
 425         if (!c->out_stream_provides_samples) {
 426             sample = ff_create_memory_sample(NULL, c->out_info.cbSize, c->out_info.cbAlignment);
 427             if (!sample)
 428                 return AVERROR(ENOMEM);
 429         }
 430
 431         out_buffers = (MFT_OUTPUT_DATA_BUFFER) {
 432             .dwStreamID = c->out_stream_id,
 433             .pSample = sample,
 434         };
 435
 436         st = 0;
 437         hr = IMFTransform_ProcessOutput(c->mft, 0, 1, &out_buffers, &st);
 438
 439         if (out_buffers.pEvents)
 440             IMFCollection_Release(out_buffers.pEvents);
 441
 442         if (!FAILED(hr)) {
 443             *out_sample = out_buffers.pSample;
 444             ret = 0;
 445             break;
 446         }
 447
 448         if (out_buffers.pSample)
 449             IMFSample_Release(out_buffers.pSample);
 450
 451         if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
 452             if (c->draining)
 453                 c->draining_done = 1;
 454             ret = 0;
 455         } else if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
 456             av_log(avctx, AV_LOG_WARNING, "stream format change\n");
 457             ret = mf_choose_output_type(avctx);
 458             if (ret == 0) // we don't expect renegotiating the input type
 459                 ret = AVERROR_EXTERNAL;
 460             if (ret > 0) {
 461                 ret = mf_setup_context(avctx);
 462                 if (ret >= 0) {
 463                     c->async_have_output = 0;
 464                     continue;
 465                 }
 466             }
 467         } else {
 468             av_log(avctx, AV_LOG_ERROR, "failed processing output: %s\n", ff_hr_str(hr));
 469             ret = AVERROR_EXTERNAL;
 470         }
 471
 472         break;
 473     }
 474
 475     c->async_have_output = 0;
 476
 477     if (ret >= 0 && !*out_sample)
 478         ret = c->draining_done ? AVERROR_EOF : AVERROR(EAGAIN);
 479
 480     return ret;
 481 }
 482
 483 static int mf_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
 484 {
 485     MFContext *c = avctx->priv_data;
 486     IMFSample *sample = NULL;
 487     int ret;
 488
 489     if (!c->frame->buf[0]) {
 490         ret = ff_encode_get_frame(avctx, c->frame);
 491         if (ret < 0 && ret != AVERROR_EOF)
 492             return ret;
 493     }
 494
 495     if (c->frame->buf[0]) {
 496         sample = mf_avframe_to_sample(avctx, c->frame);
 497         if (!sample) {
 498             av_frame_unref(c->frame);
 499             return AVERROR(ENOMEM);
 500         }
 501         if (c->is_video && c->codec_api) {
 502             if (c->frame->pict_type == AV_PICTURE_TYPE_I || !c->sample_sent)
 503                 ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncVideoForceKeyFrame, FF_VAL_VT_UI4(1));
 504         }
 505     }
 506
 507     ret = mf_send_sample(avctx, sample);
 508     if (sample)
 509         IMFSample_Release(sample);
 510     if (ret != AVERROR(EAGAIN))
 511         av_frame_unref(c->frame);
 512     if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
 513         return ret;
 514
 515     ret = mf_receive_sample(avctx, &sample);
 516     if (ret < 0)
 517         return ret;
 518
 519     ret = mf_sample_to_avpacket(avctx, sample, avpkt);
 520     IMFSample_Release(sample);
 521
 522     return ret;
 523 }
 524
 525 // Most encoders seem to enumerate supported audio formats on the output types,
 526 // at least as far as channel configuration and sample rate is concerned. Pick
 527 // the one which seems to match best.
 528 static int64_t mf_enca_output_score(AVCodecContext *avctx, IMFMediaType *type)
 529 {
 530     MFContext *c = avctx->priv_data;
 531     HRESULT hr;
 532     UINT32 t;
 533     GUID tg;
 534     int64_t score = 0;
 535
 536     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
 537     if (!FAILED(hr) && t == avctx->sample_rate)
 538         score |= 1LL << 32;
 539
 540     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
 541     if (!FAILED(hr) && t == avctx->channels)
 542         score |= 2LL << 32;
 543
 544     hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
 545     if (!FAILED(hr)) {
 546         if (IsEqualGUID(&c->main_subtype, &tg))
 547             score |= 4LL << 32;
 548     }
 549
 550     // Select the bitrate (lowest priority).
 551     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &t);
 552     if (!FAILED(hr)) {
 553         int diff = (int)t - avctx->bit_rate / 8;
 554         if (diff >= 0) {
 555             score |= (1LL << 31) - diff; // prefer lower bitrate
 556         } else {
 557             score |= (1LL << 30) + diff; // prefer higher bitrate
 558         }
 559     }
 560
 561     hr = IMFAttributes_GetUINT32(type, &MF_MT_AAC_PAYLOAD_TYPE, &t);
 562     if (!FAILED(hr) && t != 0)
 563         return -1;
 564
 565     return score;
 566 }
 567
 568 static int mf_enca_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
 569 {
 570     // (some decoders allow adjusting this freely, but it can also cause failure
 571     //  to set the output type - so it's commented for being too fragile)
 572     //IMFAttributes_SetUINT32(type, &MF_MT_AUDIO_AVG_BYTES_PER_SECOND, avctx->bit_rate / 8);
 573     //IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
 574
 575     return 0;
 576 }
 577
 578 static int64_t mf_enca_input_score(AVCodecContext *avctx, IMFMediaType *type)
 579 {
 580     HRESULT hr;
 581     UINT32 t;
 582     int64_t score = 0;
 583
 584     enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
 585     if (sformat == AV_SAMPLE_FMT_NONE)
 586         return -1; // can not use
 587
 588     if (sformat == avctx->sample_fmt)
 589         score |= 1;
 590
 591     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
 592     if (!FAILED(hr) && t == avctx->sample_rate)
 593         score |= 2;
 594
 595     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
 596     if (!FAILED(hr) && t == avctx->channels)
 597         score |= 4;
 598
 599     return score;
 600 }
 601
 602 static int mf_enca_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
 603 {
 604     HRESULT hr;
 605     UINT32 t;
 606
 607     enum AVSampleFormat sformat = ff_media_type_to_sample_fmt((IMFAttributes *)type);
 608     if (sformat != avctx->sample_fmt) {
 609         av_log(avctx, AV_LOG_ERROR, "unsupported input sample format set\n");
 610         return AVERROR(EINVAL);
 611     }
 612
 613     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_SAMPLES_PER_SECOND, &t);
 614     if (FAILED(hr) || t != avctx->sample_rate) {
 615         av_log(avctx, AV_LOG_ERROR, "unsupported input sample rate set\n");
 616         return AVERROR(EINVAL);
 617     }
 618
 619     hr = IMFAttributes_GetUINT32(type, &MF_MT_AUDIO_NUM_CHANNELS, &t);
 620     if (FAILED(hr) || t != avctx->channels) {
 621         av_log(avctx, AV_LOG_ERROR, "unsupported input channel number set\n");
 622         return AVERROR(EINVAL);
 623     }
 624
 625     return 0;
 626 }
 627
 628 static int64_t mf_encv_output_score(AVCodecContext *avctx, IMFMediaType *type)
 629 {
 630     MFContext *c = avctx->priv_data;
 631     GUID tg;
 632     HRESULT hr;
 633     int score = -1;
 634
 635     hr = IMFAttributes_GetGUID(type, &MF_MT_SUBTYPE, &tg);
 636     if (!FAILED(hr)) {
 637         if (IsEqualGUID(&c->main_subtype, &tg))
 638             score = 1;
 639     }
 640
 641     return score;
 642 }
 643
 644 static int mf_encv_output_adjust(AVCodecContext *avctx, IMFMediaType *type)
 645 {
 646     MFContext *c = avctx->priv_data;
 647     AVRational framerate;
 648
 649     ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
 650     IMFAttributes_SetUINT32(type, &MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
 651
 652     if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
 653         framerate = avctx->framerate;
 654     } else {
 655         framerate = av_inv_q(avctx->time_base);
 656         framerate.den *= avctx->ticks_per_frame;
 657     }
 658
 659     ff_MFSetAttributeRatio((IMFAttributes *)type, &MF_MT_FRAME_RATE, framerate.num, framerate.den);
 660
 661     // (MS HEVC supports eAVEncH265VProfile_Main_420_8 only.)
 662     if (avctx->codec_id == AV_CODEC_ID_H264) {
 663         UINT32 profile = ff_eAVEncH264VProfile_Base;
 664         switch (avctx->profile) {
 665         case FF_PROFILE_H264_MAIN:
 666             profile = ff_eAVEncH264VProfile_Main;
 667             break;
 668         case FF_PROFILE_H264_HIGH:
 669             profile = ff_eAVEncH264VProfile_High;
 670             break;
 671         }
 672         IMFAttributes_SetUINT32(type, &MF_MT_MPEG2_PROFILE, profile);
 673     }
 674
 675     IMFAttributes_SetUINT32(type, &MF_MT_AVG_BITRATE, avctx->bit_rate);
 676
 677     // Note that some of the ICodecAPI options must be set before SetOutputType.
 678     if (c->codec_api) {
 679         if (avctx->bit_rate)
 680             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonMeanBitRate, FF_VAL_VT_UI4(avctx->bit_rate));
 681
 682         if (c->opt_enc_rc >= 0)
 683             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonRateControlMode, FF_VAL_VT_UI4(c->opt_enc_rc));
 684
 685         if (c->opt_enc_quality >= 0)
 686             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncCommonQuality, FF_VAL_VT_UI4(c->opt_enc_quality));
 687
 688         // Always set the number of b-frames. Qualcomm's HEVC encoder on SD835
 689         // defaults this to 1, and that setting is buggy with many of the
 690         // rate control modes. (0 or 2 b-frames works fine with most rate
 691         // control modes, but 2 seems buggy with the u_vbr mode.) Setting
 692         // "scenario" to "camera_record" sets it in CFR mode (where the default
 693         // is VFR), which makes the encoder avoid dropping frames.
 694         ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncMPVDefaultBPictureCount, FF_VAL_VT_UI4(avctx->max_b_frames));
 695         avctx->has_b_frames = avctx->max_b_frames > 0;
 696
 697         ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVEncH264CABACEnable, FF_VAL_VT_BOOL(1));
 698
 699         if (c->opt_enc_scenario >= 0)
 700             ICodecAPI_SetValue(c->codec_api, &ff_CODECAPI_AVScenarioInfo, FF_VAL_VT_UI4(c->opt_enc_scenario));
 701     }
 702
 703     return 0;
 704 }
 705
 706 static int64_t mf_encv_input_score(AVCodecContext *avctx, IMFMediaType *type)
 707 {
 708     enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
 709     if (pix_fmt != avctx->pix_fmt)
 710         return -1; // can not use
 711
 712     return 0;
 713 }
 714
 715 static int mf_encv_input_adjust(AVCodecContext *avctx, IMFMediaType *type)
 716 {
 717     enum AVPixelFormat pix_fmt = ff_media_type_to_pix_fmt((IMFAttributes *)type);
 718     if (pix_fmt != avctx->pix_fmt) {
 719         av_log(avctx, AV_LOG_ERROR, "unsupported input pixel format set\n");
 720         return AVERROR(EINVAL);
 721     }
 722
 723     //ff_MFSetAttributeSize((IMFAttributes *)type, &MF_MT_FRAME_SIZE, avctx->width, avctx->height);
 724
 725     return 0;
 726 }
 727
 728 static int mf_choose_output_type(AVCodecContext *avctx)
 729 {
 730     MFContext *c = avctx->priv_data;
 731     HRESULT hr;
 732     int ret;
 733     IMFMediaType *out_type = NULL;
 734     int64_t out_type_score = -1;
 735     int out_type_index = -1;
 736     int n;
 737
 738     av_log(avctx, AV_LOG_VERBOSE, "output types:\n");
 739     for (n = 0; ; n++) {
 740         IMFMediaType *type;
 741         int64_t score = -1;
 742
 743         hr = IMFTransform_GetOutputAvailableType(c->mft, c->out_stream_id, n, &type);
 744         if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
 745             break;
 746         if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
 747             av_log(avctx, AV_LOG_VERBOSE, "(need to set input type)\n");
 748             ret = 0;
 749             goto done;
 750         }
 751         if (FAILED(hr)) {
 752             av_log(avctx, AV_LOG_ERROR, "error getting output type: %s\n", ff_hr_str(hr));
 753             ret = AVERROR_EXTERNAL;
 754             goto done;
 755         }
 756
 757         av_log(avctx, AV_LOG_VERBOSE, "output type %d:\n", n);
 758         ff_media_type_dump(avctx, type);
 759
 760         if (c->is_video) {
 761             score = mf_encv_output_score(avctx, type);
 762         } else if (c->is_audio) {
 763             score = mf_enca_output_score(avctx, type);
 764         }
 765
 766         if (score > out_type_score) {
 767             if (out_type)
 768                 IMFMediaType_Release(out_type);
 769             out_type = type;
 770             out_type_score = score;
 771             out_type_index = n;
 772             IMFMediaType_AddRef(out_type);
 773         }
 774
 775         IMFMediaType_Release(type);
 776     }
 777
 778     if (out_type) {
 779         av_log(avctx, AV_LOG_VERBOSE, "picking output type %d.\n", out_type_index);
 780     } else {
 781         hr = MFCreateMediaType(&out_type);
 782         if (FAILED(hr)) {
 783             ret = AVERROR(ENOMEM);
 784             goto done;
 785         }
 786     }
 787
 788     ret = 0;
 789     if (c->is_video) {
 790         ret = mf_encv_output_adjust(avctx, out_type);
 791     } else if (c->is_audio) {
 792         ret = mf_enca_output_adjust(avctx, out_type);
 793     }
 794
 795     if (ret >= 0) {
 796         av_log(avctx, AV_LOG_VERBOSE, "setting output type:\n");
 797         ff_media_type_dump(avctx, out_type);
 798
 799         hr = IMFTransform_SetOutputType(c->mft, c->out_stream_id, out_type, 0);
 800         if (!FAILED(hr)) {
 801             ret = 1;
 802         } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
 803             av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set input type\n");
 804             ret = 0;
 805         } else {
 806             av_log(avctx, AV_LOG_ERROR, "could not set output type (%s)\n", ff_hr_str(hr));
 807             ret = AVERROR_EXTERNAL;
 808         }
 809     }
 810
 811 done:
 812     if (out_type)
 813         IMFMediaType_Release(out_type);
 814     return ret;
 815 }
 816
 817 static int mf_choose_input_type(AVCodecContext *avctx)
 818 {
 819     MFContext *c = avctx->priv_data;
 820     HRESULT hr;
 821     int ret;
 822     IMFMediaType *in_type = NULL;
 823     int64_t in_type_score = -1;
 824     int in_type_index = -1;
 825     int n;
 826
 827     av_log(avctx, AV_LOG_VERBOSE, "input types:\n");
 828     for (n = 0; ; n++) {
 829         IMFMediaType *type = NULL;
 830         int64_t score = -1;
 831
 832         hr = IMFTransform_GetInputAvailableType(c->mft, c->in_stream_id, n, &type);
 833         if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL)
 834             break;
 835         if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
 836             av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 1)\n");
 837             ret = 0;
 838             goto done;
 839         }
 840         if (FAILED(hr)) {
 841             av_log(avctx, AV_LOG_ERROR, "error getting input type: %s\n", ff_hr_str(hr));
 842             ret = AVERROR_EXTERNAL;
 843             goto done;
 844         }
 845
 846         av_log(avctx, AV_LOG_VERBOSE, "input type %d:\n", n);
 847         ff_media_type_dump(avctx, type);
 848
 849         if (c->is_video) {
 850             score = mf_encv_input_score(avctx, type);
 851         } else if (c->is_audio) {
 852             score = mf_enca_input_score(avctx, type);
 853         }
 854
 855         if (score > in_type_score) {
 856             if (in_type)
 857                 IMFMediaType_Release(in_type);
 858             in_type = type;
 859             in_type_score = score;
 860             in_type_index = n;
 861             IMFMediaType_AddRef(in_type);
 862         }
 863
 864         IMFMediaType_Release(type);
 865     }
 866
 867     if (in_type) {
 868         av_log(avctx, AV_LOG_VERBOSE, "picking input type %d.\n", in_type_index);
 869     } else {
 870         // Some buggy MFTs (WMA encoder) fail to return MF_E_TRANSFORM_TYPE_NOT_SET.
 871         av_log(avctx, AV_LOG_VERBOSE, "(need to set output type 2)\n");
 872         ret = 0;
 873         goto done;
 874     }
 875
 876     ret = 0;
 877     if (c->is_video) {
 878         ret = mf_encv_input_adjust(avctx, in_type);
 879     } else if (c->is_audio) {
 880         ret = mf_enca_input_adjust(avctx, in_type);
 881     }
 882
 883     if (ret >= 0) {
 884         av_log(avctx, AV_LOG_VERBOSE, "setting input type:\n");
 885         ff_media_type_dump(avctx, in_type);
 886
 887         hr = IMFTransform_SetInputType(c->mft, c->in_stream_id, in_type, 0);
 888         if (!FAILED(hr)) {
 889             ret = 1;
 890         } else if (hr == MF_E_TRANSFORM_TYPE_NOT_SET) {
 891             av_log(avctx, AV_LOG_VERBOSE, "rejected - need to set output type\n");
 892             ret = 0;
 893         } else {
 894             av_log(avctx, AV_LOG_ERROR, "could not set input type (%s)\n", ff_hr_str(hr));
 895             ret = AVERROR_EXTERNAL;
 896         }
 897     }
 898
 899 done:
 900     if (in_type)
 901         IMFMediaType_Release(in_type);
 902     return ret;
 903 }
 904
 905 static int mf_negotiate_types(AVCodecContext *avctx)
 906 {
 907     // This follows steps 1-5 on:
 908     //  https://msdn.microsoft.com/en-us/library/windows/desktop/aa965264(v=vs.85).aspx
 909     // If every MFT implementer does this correctly, this loop should at worst
 910     // be repeated once.
 911     int need_input = 1, need_output = 1;
 912     int n;
 913     for (n = 0; n < 2 && (need_input || need_output); n++) {
 914         int ret;
 915         ret = mf_choose_input_type(avctx);
 916         if (ret < 0)
 917             return ret;
 918         need_input = ret < 1;
 919         ret = mf_choose_output_type(avctx);
 920         if (ret < 0)
 921             return ret;
 922         need_output = ret < 1;
 923     }
 924     if (need_input || need_output) {
 925         av_log(avctx, AV_LOG_ERROR, "format negotiation failed (%d/%d)\n",
 926                need_input, need_output);
 927         return AVERROR_EXTERNAL;
 928     }
 929     return 0;
 930 }
 931
 932 static int mf_setup_context(AVCodecContext *avctx)
 933 {
 934     MFContext *c = avctx->priv_data;
 935     HRESULT hr;
 936     int ret;
 937
 938     hr = IMFTransform_GetInputStreamInfo(c->mft, c->in_stream_id, &c->in_info);
 939     if (FAILED(hr))
 940         return AVERROR_EXTERNAL;
 941     av_log(avctx, AV_LOG_VERBOSE, "in_info: size=%d, align=%d\n",
 942            (int)c->in_info.cbSize, (int)c->in_info.cbAlignment);
 943
 944     hr = IMFTransform_GetOutputStreamInfo(c->mft, c->out_stream_id, &c->out_info);
 945     if (FAILED(hr))
 946         return AVERROR_EXTERNAL;
 947     c->out_stream_provides_samples =
 948         (c->out_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) ||
 949         (c->out_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES);
 950     av_log(avctx, AV_LOG_VERBOSE, "out_info: size=%d, align=%d%s\n",
 951            (int)c->out_info.cbSize, (int)c->out_info.cbAlignment,
 952            c->out_stream_provides_samples ? " (provides samples)" : "");
 953
 954     if ((ret = mf_output_type_get(avctx)) < 0)
 955         return ret;
 956
 957     return 0;
 958 }
 959
 960 static int mf_unlock_async(AVCodecContext *avctx)
 961 {
 962     MFContext *c = avctx->priv_data;
 963     HRESULT hr;
 964     IMFAttributes *attrs;
 965     UINT32 v;
 966     int res = AVERROR_EXTERNAL;
 967
 968     // For hw encoding we unfortunately need to use async mode, otherwise
 969     // play it safe and avoid it.
 970     if (!(c->is_video && c->opt_enc_hw))
 971         return 0;
 972
 973     hr = IMFTransform_GetAttributes(c->mft, &attrs);
 974     if (FAILED(hr)) {
 975         av_log(avctx, AV_LOG_ERROR, "error retrieving MFT attributes: %s\n", ff_hr_str(hr));
 976         goto err;
 977     }
 978
 979     hr = IMFAttributes_GetUINT32(attrs, &MF_TRANSFORM_ASYNC, &v);
 980     if (FAILED(hr)) {
 981         av_log(avctx, AV_LOG_ERROR, "error querying async: %s\n", ff_hr_str(hr));
 982         goto err;
 983     }
 984
 985     if (!v) {
 986         av_log(avctx, AV_LOG_ERROR, "hardware MFT is not async\n");
 987         goto err;
 988     }
 989
 990     hr = IMFAttributes_SetUINT32(attrs, &MF_TRANSFORM_ASYNC_UNLOCK, TRUE);
 991     if (FAILED(hr)) {
 992         av_log(avctx, AV_LOG_ERROR, "could not set async unlock: %s\n", ff_hr_str(hr));
 993         goto err;
 994     }
 995
 996     hr = IMFTransform_QueryInterface(c->mft, &IID_IMFMediaEventGenerator, (void **)&c->async_events);
 997     if (FAILED(hr)) {
 998         av_log(avctx, AV_LOG_ERROR, "could not get async interface\n");
 999         goto err;
1000     }
1001
1002     res = 0;
1003
1004 err:
1005     IMFAttributes_Release(attrs);
1006     return res;
1007 }
1008
1009 static int mf_create(void *log, IMFTransform **mft, const AVCodec *codec, int use_hw)
1010 {
1011     int is_audio = codec->type == AVMEDIA_TYPE_AUDIO;
1012     const CLSID *subtype = ff_codec_to_mf_subtype(codec->id);
1013     MFT_REGISTER_TYPE_INFO reg = {0};
1014     GUID category;
1015     int ret;
1016
1017     *mft = NULL;
1018
1019     if (!subtype)
1020         return AVERROR(ENOSYS);
1021
1022     reg.guidSubtype = *subtype;
1023
1024     if (is_audio) {
1025         reg.guidMajorType = MFMediaType_Audio;
1026         category = MFT_CATEGORY_AUDIO_ENCODER;
1027     } else {
1028         reg.guidMajorType = MFMediaType_Video;
1029         category = MFT_CATEGORY_VIDEO_ENCODER;
1030     }
1031
1032     if ((ret = ff_instantiate_mf(log, category, NULL, &reg, use_hw, mft)) < 0)
1033         return ret;
1034
1035     return 0;
1036 }
1037
1038 static int mf_init(AVCodecContext *avctx)
1039 {
1040     MFContext *c = avctx->priv_data;
1041     HRESULT hr;
1042     int ret;
1043     const CLSID *subtype = ff_codec_to_mf_subtype(avctx->codec_id);
1044     int use_hw = 0;
1045
1046     c->frame = av_frame_alloc();
1047     if (!c->frame)
1048         return AVERROR(ENOMEM);
1049
1050     c->is_audio = avctx->codec_type == AVMEDIA_TYPE_AUDIO;
1051     c->is_video = !c->is_audio;
1052     c->reorder_delay = AV_NOPTS_VALUE;
1053
1054     if (c->is_video && c->opt_enc_hw)
1055         use_hw = 1;
1056
1057     if (!subtype)
1058         return AVERROR(ENOSYS);
1059
1060     c->main_subtype = *subtype;
1061
1062     if ((ret = mf_create(avctx, &c->mft, avctx->codec, use_hw)) < 0)
1063         return ret;
1064
1065     if ((ret = mf_unlock_async(avctx)) < 0)
1066         return ret;
1067
1068     hr = IMFTransform_QueryInterface(c->mft, &IID_ICodecAPI, (void **)&c->codec_api);
1069     if (!FAILED(hr))
1070         av_log(avctx, AV_LOG_VERBOSE, "MFT supports ICodecAPI.\n");
1071
1072
1073     hr = IMFTransform_GetStreamIDs(c->mft, 1, &c->in_stream_id, 1, &c->out_stream_id);
1074     if (hr == E_NOTIMPL) {
1075         c->in_stream_id = c->out_stream_id = 0;
1076     } else if (FAILED(hr)) {
1077         av_log(avctx, AV_LOG_ERROR, "could not get stream IDs (%s)\n", ff_hr_str(hr));
1078         return AVERROR_EXTERNAL;
1079     }
1080
1081     if ((ret = mf_negotiate_types(avctx)) < 0)
1082         return ret;
1083
1084     if ((ret = mf_setup_context(avctx)) < 0)
1085         return ret;
1086
1087     hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
1088     if (FAILED(hr)) {
1089         av_log(avctx, AV_LOG_ERROR, "could not start streaming (%s)\n", ff_hr_str(hr));
1090         return AVERROR_EXTERNAL;
1091     }
1092
1093     hr = IMFTransform_ProcessMessage(c->mft, MFT_MESSAGE_NOTIFY_START_OF_STREAM, 0);
1094     if (FAILED(hr)) {
1095         av_log(avctx, AV_LOG_ERROR, "could not start stream (%s)\n", ff_hr_str(hr));
1096         return AVERROR_EXTERNAL;
1097     }
1098
1099     if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && c->async_events &&
1100         c->is_video && !avctx->extradata) {
1101         int sleep = 10000, total = 0;
1102         av_log(avctx, AV_LOG_VERBOSE, "Awaiting extradata\n");
1103         while (total < 70*1000) {
1104             // The Qualcomm H264 encoder on SD835 doesn't provide extradata
1105             // immediately, but it becomes available soon after init (without
1106             // any waitable event). In practice, it's available after less
1107             // than 10 ms, but wait for up to 70 ms before giving up.
1108             // Some encoders (Qualcomm's HEVC encoder on SD835, some versions
1109             // of the QSV H264 encoder at least) don't provide extradata this
1110             // way at all, not even after encoding a frame - it's only
1111             // available prepended to frames.
1112             av_usleep(sleep);
1113             total += sleep;
1114             mf_output_type_get(avctx);
1115             if (avctx->extradata)
1116                 break;
1117             sleep *= 2;
1118         }
1119         av_log(avctx, AV_LOG_VERBOSE, "%s extradata in %d ms\n",
1120                avctx->extradata ? "Got" : "Didn't get", total / 1000);
1121     }
1122
1123     return 0;
1124 }
1125
1126 static int mf_close(AVCodecContext *avctx)
1127 {
1128     MFContext *c = avctx->priv_data;
1129
1130     if (c->codec_api)
1131         ICodecAPI_Release(c->codec_api);
1132
1133     if (c->async_events)
1134         IMFMediaEventGenerator_Release(c->async_events);
1135
1136     ff_free_mf(&c->mft);
1137
1138     av_frame_free(&c->frame);
1139
1140     av_freep(&avctx->extradata);
1141     avctx->extradata_size = 0;
1142
1143     return 0;
1144 }
1145
1146 #define OFFSET(x) offsetof(MFContext, x)
1147
1148 #define MF_ENCODER(MEDIATYPE, NAME, ID, OPTS, EXTRA) \
1149     static const AVClass ff_ ## NAME ## _mf_encoder_class = {                  \
1150         .class_name = #NAME "_mf",                                             \
1151         .item_name  = av_default_item_name,                                    \
1152         .option     = OPTS,                                                    \
1153         .version    = LIBAVUTIL_VERSION_INT,                                   \
1154     };                                                                         \
1155     const AVCodec ff_ ## NAME ## _mf_encoder = {                               \
1156         .priv_class     = &ff_ ## NAME ## _mf_encoder_class,                   \
1157         .name           = #NAME "_mf",                                         \
1158         .long_name      = NULL_IF_CONFIG_SMALL(#ID " via MediaFoundation"),    \
1159         .type           = AVMEDIA_TYPE_ ## MEDIATYPE,                          \
1160         .id             = AV_CODEC_ID_ ## ID,                                  \
1161         .priv_data_size = sizeof(MFContext),                                   \
1162         .init           = mf_init,                                             \
1163         .close          = mf_close,                                            \
1164         .receive_packet = mf_receive_packet,                                   \
1165         EXTRA                                                                  \
1166         .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HYBRID |           \
1167                           AV_CODEC_CAP_DR1,                                    \
1168         .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |                       \
1169                           FF_CODEC_CAP_INIT_CLEANUP,                           \
1170     };
1171
1172 #define AFMTS \
1173         .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,    \
1174                                                          AV_SAMPLE_FMT_NONE },
1175
1176 MF_ENCODER(AUDIO, aac,         AAC, NULL, AFMTS);
1177 MF_ENCODER(AUDIO, ac3,         AC3, NULL, AFMTS);
1178 MF_ENCODER(AUDIO, mp3,         MP3, NULL, AFMTS);
1179
1180 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1181 static const AVOption venc_opts[] = {
1182     {"rate_control",  "Select rate control mode", OFFSET(opt_enc_rc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "rate_control"},
1183     { "default",      "Default mode", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "rate_control"},
1184     { "cbr",          "CBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_CBR}, 0, 0, VE, "rate_control"},
1185     { "pc_vbr",       "Peak constrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_PeakConstrainedVBR}, 0, 0, VE, "rate_control"},
1186     { "u_vbr",        "Unconstrained VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_UnconstrainedVBR}, 0, 0, VE, "rate_control"},
1187     { "quality",      "Quality mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_Quality}, 0, 0, VE, "rate_control" },
1188     // The following rate_control modes require Windows 8.
1189     { "ld_vbr",       "Low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_LowDelayVBR}, 0, 0, VE, "rate_control"},
1190     { "g_vbr",        "Global VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalVBR}, 0, 0, VE, "rate_control" },
1191     { "gld_vbr",      "Global low delay VBR mode", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVEncCommonRateControlMode_GlobalLowDelayVBR}, 0, 0, VE, "rate_control"},
1192
1193     {"scenario",          "Select usage scenario", OFFSET(opt_enc_scenario), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, VE, "scenario"},
1194     { "default",          "Default scenario", 0, AV_OPT_TYPE_CONST, {.i64 = -1}, 0, 0, VE, "scenario"},
1195     { "display_remoting", "Display remoting", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemoting}, 0, 0, VE, "scenario"},
1196     { "video_conference", "Video conference", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_VideoConference}, 0, 0, VE, "scenario"},
1197     { "archive",          "Archive", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_Archive}, 0, 0, VE, "scenario"},
1198     { "live_streaming",   "Live streaming", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_LiveStreaming}, 0, 0, VE, "scenario"},
1199     { "camera_record",    "Camera record", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_CameraRecord}, 0, 0, VE, "scenario"},
1200     { "display_remoting_with_feature_map", "Display remoting with feature map", 0, AV_OPT_TYPE_CONST, {.i64 = ff_eAVScenarioInfo_DisplayRemotingWithFeatureMap}, 0, 0, VE, "scenario"},
1201
1202     {"quality",       "Quality", OFFSET(opt_enc_quality), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 100, VE},
1203     {"hw_encoding",   "Force hardware encoding", OFFSET(opt_enc_hw), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, VE},
1204     {NULL}
1205 };
1206
1207 #define VFMTS \
1208         .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,       \
1209                                                         AV_PIX_FMT_YUV420P,    \
1210                                                         AV_PIX_FMT_NONE },
1211
1212 MF_ENCODER(VIDEO, h264,        H264, venc_opts, VFMTS);
1213 MF_ENCODER(VIDEO, hevc,        HEVC, venc_opts, VFMTS);