git.sesse.net Git - casparcg/blob - modules/ffmpeg/consumer/ffmpeg_consumer.cpp

   1 #include "../StdAfx.h"
   2
   3 #include "ffmpeg_consumer.h"
   4
   5 #include "../ffmpeg_error.h"
   6 #include "../producer/util/util.h"
   7 #include "../producer/filter/filter.h"
   8 #include "../producer/filter/audio_filter.h"
   9
  10 #include <common/except.h>
  11 #include <common/executor.h>
  12 #include <common/assert.h>
  13 #include <common/utf.h>
  14 #include <common/future.h>
  15 #include <common/diagnostics/graph.h>
  16 #include <common/env.h>
  17 #include <common/scope_exit.h>
  18 #include <common/ptree.h>
  19 #include <common/param.h>
  20 #include <common/semaphore.h>
  21
  22 #include <core/consumer/frame_consumer.h>
  23 #include <core/frame/frame.h>
  24 #include <core/frame/audio_channel_layout.h>
  25 #include <core/video_format.h>
  26 #include <core/monitor/monitor.h>
  27 #include <core/help/help_repository.h>
  28 #include <core/help/help_sink.h>
  29
  30 #include <boost/noncopyable.hpp>
  31 #include <boost/rational.hpp>
  32 #include <boost/format.hpp>
  33 #include <boost/algorithm/string/predicate.hpp>
  34 #include <boost/property_tree/ptree.hpp>
  35
  36 #pragma warning(push)
  37 #pragma warning(disable: 4244)
  38 #pragma warning(disable: 4245)
  39 #include <boost/crc.hpp>
  40 #pragma warning(pop)
  41
  42 #include <tbb/atomic.h>
  43 #include <tbb/concurrent_queue.h>
  44 #include <tbb/parallel_invoke.h>
  45 #include <tbb/parallel_for.h>
  46
  47 #include <numeric>
  48
  49 #pragma warning(push)
  50 #pragma warning(disable: 4244)
  51
  52 extern "C"
  53 {
  54         #define __STDC_CONSTANT_MACROS
  55         #define __STDC_LIMIT_MACROS
  56         #include <libavformat/avformat.h>
  57         #include <libavcodec/avcodec.h>
  58         #include <libavutil/avutil.h>
  59         #include <libavutil/frame.h>
  60         #include <libavutil/opt.h>
  61         #include <libavutil/imgutils.h>
  62         #include <libavutil/parseutils.h>
  63         #include <libavfilter/avfilter.h>
  64         #include <libavfilter/buffersink.h>
  65         #include <libavfilter/buffersrc.h>
  66 }
  67
  68 #pragma warning(pop)
  69
  70 namespace caspar { namespace ffmpeg {
  71
  72 void set_pixel_format(AVFilterContext* sink, AVPixelFormat pix_fmt)
  73 {
  74 #pragma warning (push)
  75 #pragma warning (disable : 4245)
  76
  77         FF(av_opt_set_int_list(
  78                 sink,
  79                 "pix_fmts",
  80                 std::vector<AVPixelFormat>({ pix_fmt, AVPixelFormat::AV_PIX_FMT_NONE }).data(),
  81                 -1,
  82                 AV_OPT_SEARCH_CHILDREN));
  83
  84 #pragma warning (pop)
  85 }
  86
  87 void adjust_video_filter(const AVCodec& codec, const core::video_format_desc& in_format, AVFilterContext* sink, std::string& filter)
  88 {
  89         switch (codec.id)
  90         {
  91         case AV_CODEC_ID_DVVIDEO:
  92                 // Crop
  93                 if (in_format.format == core::video_format::ntsc)
  94                         filter = u8(append_filter(u16(filter), L"crop=720:480:0:2"));
  95
  96                 // Pixel format selection
  97                 if (in_format.format == core::video_format::ntsc)
  98                         set_pixel_format(sink, AVPixelFormat::AV_PIX_FMT_YUV411P);
  99                 else if (in_format.format == core::video_format::pal)
 100                         set_pixel_format(sink, AVPixelFormat::AV_PIX_FMT_YUV420P);
 101                 else
 102                         set_pixel_format(sink, AVPixelFormat::AV_PIX_FMT_YUV422P);
 103
 104                 // Scale
 105                 if (in_format.height == 1080)
 106                         filter = u8(append_filter(u16(filter), in_format.duration == 1001
 107                                 ? L"scale=1280:1080"
 108                                 : L"scale=1440:1080"));
 109                 else if (in_format.height == 720)
 110                         filter = u8(append_filter(u16(filter), L"scale=960:720"));
 111
 112                 break;
 113         }
 114 }
 115
 116 void setup_codec_defaults(AVCodecContext& encoder)
 117 {
 118         static const int MEGABIT = 1000000;
 119
 120         switch (encoder.codec_id)
 121         {
 122         case AV_CODEC_ID_DNXHD:
 123                 encoder.bit_rate = 220 * MEGABIT;
 124
 125                 break;
 126         case AV_CODEC_ID_PRORES:
 127                 encoder.bit_rate = encoder.width < 1280
 128                                 ?  63 * MEGABIT
 129                                 : 220 * MEGABIT;
 130
 131                 break;
 132         case AV_CODEC_ID_H264:
 133                 av_opt_set(encoder.priv_data,   "preset",       "ultrafast",    0);
 134                 av_opt_set(encoder.priv_data,   "tune",         "fastdecode",   0);
 135                 av_opt_set(encoder.priv_data,   "crf",          "5",                    0);
 136
 137                 break;
 138         }
 139 }
 140
 141 bool is_pcm_s24le_not_supported(const AVFormatContext& container)
 142 {
 143         auto name = std::string(container.oformat->name);
 144
 145         if (name == "mp4" || name == "dv")
 146                 return true;
 147
 148         return false;
 149 }
 150
 151 template<typename Out, typename In>
 152 std::vector<Out> from_terminated_array(const In* array, In terminator)
 153 {
 154         std::vector<Out> result;
 155
 156         while (array != nullptr && *array != terminator)
 157         {
 158                 In val          = *array;
 159                 Out casted      = static_cast<Out>(val);
 160
 161                 result.push_back(casted);
 162
 163                 ++array;
 164         }
 165
 166         return result;
 167 }
 168
 169 class ffmpeg_consumer
 170 {
 171 private:
 172         const spl::shared_ptr<diagnostics::graph>       graph_;
 173         core::monitor::subject                                          subject_;
 174         std::string                                                                     path_;
 175         boost::filesystem::path                                         full_path_;
 176
 177         std::map<std::string, std::string>                      options_;
 178         bool                                                                            mono_streams_;
 179
 180         core::video_format_desc                                         in_video_format_;
 181         core::audio_channel_layout                                      in_channel_layout_                      = core::audio_channel_layout::invalid();
 182
 183         std::shared_ptr<AVFormatContext>                        oc_;
 184         tbb::atomic<bool>                                                       abort_request_;
 185
 186         std::shared_ptr<AVStream>                                       video_st_;
 187         std::vector<std::shared_ptr<AVStream>>          audio_sts_;
 188
 189         std::int64_t                                                            video_pts_                                      = 0;
 190         std::int64_t                                                            audio_pts_                                      = 0;
 191
 192         std::unique_ptr<audio_filter>                           audio_filter_;
 193
 194         // TODO: make use of already existent avfilter abstraction for video also
 195     AVFilterContext*                                                    video_graph_in_;
 196     AVFilterContext*                                                    video_graph_out_;
 197     std::shared_ptr<AVFilterGraph>                              video_graph_;
 198
 199         executor                                                                        video_encoder_executor_;
 200         executor                                                                        audio_encoder_executor_;
 201
 202         semaphore                                                                       tokens_                                         { 0 };
 203
 204         tbb::atomic<int64_t>                                            current_encoding_delay_;
 205
 206         executor                                                                        write_executor_;
 207
 208 public:
 209
 210         ffmpeg_consumer(
 211                         std::string path,
 212                         std::string options,
 213                         bool mono_streams)
 214                 : path_(path)
 215                 , full_path_(path)
 216                 , mono_streams_(mono_streams)
 217                 , audio_encoder_executor_(print() + L" audio_encoder")
 218                 , video_encoder_executor_(print() + L" video_encoder")
 219                 , write_executor_(print() + L" io")
 220         {
 221                 abort_request_ = false;
 222                 current_encoding_delay_ = 0;
 223
 224                 for(auto it =
 225                                 boost::sregex_iterator(
 226                                         options.begin(),
 227                                         options.end(),
 228                                         boost::regex("-(?<NAME>[^-\\s]+)(\\s+(?<VALUE>[^\\s]+))?"));
 229                         it != boost::sregex_iterator();
 230                         ++it)
 231                 {
 232                         options_[(*it)["NAME"].str()] = (*it)["VALUE"].matched ? (*it)["VALUE"].str() : "";
 233                 }
 234
 235         if (options_.find("threads") == options_.end())
 236             options_["threads"] = "auto";
 237
 238                 tokens_.release(
 239                         std::max(
 240                                 1,
 241                                 try_remove_arg<int>(
 242                                         options_,
 243                                         boost::regex("tokens")).get_value_or(2)));
 244         }
 245
 246         ~ffmpeg_consumer()
 247         {
 248                 if(oc_)
 249                 {
 250                         try
 251                         {
 252                                 video_encoder_executor_.begin_invoke([&] { encode_video(core::const_frame::empty(), nullptr); });
 253                                 audio_encoder_executor_.begin_invoke([&] { encode_audio(core::const_frame::empty(), nullptr); });
 254
 255                                 video_encoder_executor_.stop();
 256                                 audio_encoder_executor_.stop();
 257                                 video_encoder_executor_.join();
 258                                 audio_encoder_executor_.join();
 259
 260                                 video_graph_.reset();
 261                                 audio_filter_.reset();
 262                                 video_st_.reset();
 263                                 audio_sts_.clear();
 264
 265                                 write_packet(nullptr, nullptr);
 266
 267                                 write_executor_.stop();
 268                                 write_executor_.join();
 269
 270                                 FF(av_write_trailer(oc_.get()));
 271
 272                                 if (!(oc_->oformat->flags & AVFMT_NOFILE) && oc_->pb)
 273                                         avio_close(oc_->pb);
 274
 275                                 oc_.reset();
 276                         }
 277                         catch (...)
 278                         {
 279                                 CASPAR_LOG_CURRENT_EXCEPTION();
 280                         }
 281                 }
 282         }
 283
 284         void initialize(
 285                         const core::video_format_desc& format_desc,
 286                         const core::audio_channel_layout& channel_layout)
 287         {
 288                 try
 289                 {
 290                         static boost::regex prot_exp("^.+:.*" );
 291
 292                         if(!boost::regex_match(
 293                                         path_,
 294                                         prot_exp))
 295                         {
 296                                 if(!full_path_.is_complete())
 297                                 {
 298                                         full_path_ =
 299                                                 u8(
 300                                                         env::media_folder()) +
 301                                                         path_;
 302                                 }
 303
 304                                 if(boost::filesystem::exists(full_path_))
 305                                         boost::filesystem::remove(full_path_);
 306
 307                                 boost::filesystem::create_directories(full_path_.parent_path());
 308                         }
 309
 310                         graph_->set_color("frame-time", diagnostics::color(0.1f, 1.0f, 0.1f));
 311                         graph_->set_color("dropped-frame", diagnostics::color(0.3f, 0.6f, 0.3f));
 312                         graph_->set_text(print());
 313                         diagnostics::register_graph(graph_);
 314
 315                         const auto oformat_name =
 316                                 try_remove_arg<std::string>(
 317                                         options_,
 318                                         boost::regex("^f|format$"));
 319
 320                         AVFormatContext* oc;
 321
 322                         FF(avformat_alloc_output_context2(
 323                                 &oc,
 324                                 nullptr,
 325                                 oformat_name && !oformat_name->empty() ? oformat_name->c_str() : nullptr,
 326                                 full_path_.string().c_str()));
 327
 328                         oc_.reset(
 329                                 oc,
 330                                 avformat_free_context);
 331
 332                         CASPAR_VERIFY(oc_->oformat);
 333
 334                         oc_->interrupt_callback.callback = ffmpeg_consumer::interrupt_cb;
 335                         oc_->interrupt_callback.opaque   = this;
 336
 337                         CASPAR_VERIFY(format_desc.format != core::video_format::invalid);
 338
 339                         in_video_format_ = format_desc;
 340                         in_channel_layout_ = channel_layout;
 341
 342                         CASPAR_VERIFY(oc_->oformat);
 343
 344                         const auto video_codec_name =
 345                                 try_remove_arg<std::string>(
 346                                         options_,
 347                                         boost::regex("^c:v|codec:v|vcodec$"));
 348
 349                         const auto video_codec =
 350                                 video_codec_name
 351                                         ? avcodec_find_encoder_by_name(video_codec_name->c_str())
 352                                         : avcodec_find_encoder(oc_->oformat->video_codec);
 353
 354                         const auto audio_codec_name =
 355                                 try_remove_arg<std::string>(
 356                                         options_,
 357                                          boost::regex("^c:a|codec:a|acodec$"));
 358
 359                         const auto audio_codec =
 360                                 audio_codec_name
 361                                         ? avcodec_find_encoder_by_name(audio_codec_name->c_str())
 362                                         : (is_pcm_s24le_not_supported(*oc_)
 363                                                 ? avcodec_find_encoder(oc_->oformat->audio_codec)
 364                                                 : avcodec_find_encoder_by_name("pcm_s24le"));
 365
 366                         if (!video_codec)
 367                                 CASPAR_THROW_EXCEPTION(user_error() << msg_info(
 368                                                 "Failed to find video codec " + (video_codec_name
 369                                                                 ? *video_codec_name
 370                                                                 : "with id " + boost::lexical_cast<std::string>(
 371                                                                                 oc_->oformat->video_codec))));
 372                         if (!audio_codec)
 373                                 CASPAR_THROW_EXCEPTION(user_error() << msg_info(
 374                                                 "Failed to find audio codec " + (audio_codec_name
 375                                                                 ? *audio_codec_name
 376                                                                 : "with id " + boost::lexical_cast<std::string>(
 377                                                                                 oc_->oformat->audio_codec))));
 378
 379                         // Filters
 380
 381                         {
 382                                 configure_video_filters(
 383                                         *video_codec,
 384                                         try_remove_arg<std::string>(options_,
 385                                         boost::regex("vf|f:v|filter:v")).get_value_or(""));
 386
 387                                 configure_audio_filters(
 388                                         *audio_codec,
 389                                         try_remove_arg<std::string>(options_,
 390                                         boost::regex("af|f:a|filter:a")).get_value_or(""));
 391                         }
 392
 393                         // Encoders
 394
 395                         {
 396                                 auto video_options = options_;
 397                                 auto audio_options = options_;
 398
 399                                 video_st_ = open_encoder(
 400                                         *video_codec,
 401                                         video_options,
 402                                         0);
 403
 404                                 for (int i = 0; i < audio_filter_->get_num_output_pads(); ++i)
 405                                         audio_sts_.push_back(open_encoder(
 406                                                         *audio_codec,
 407                                                         audio_options,
 408                                                         i));
 409
 410                                 auto it = options_.begin();
 411                                 while(it != options_.end())
 412                                 {
 413                                         if(video_options.find(it->first) == video_options.end() || audio_options.find(it->first) == audio_options.end())
 414                                                 it = options_.erase(it);
 415                                         else
 416                                                 ++it;
 417                                 }
 418                         }
 419
 420                         // Output
 421                         {
 422                                 AVDictionary* av_opts = nullptr;
 423
 424                                 to_dict(
 425                                         &av_opts,
 426                                         std::move(options_));
 427
 428                                 CASPAR_SCOPE_EXIT
 429                                 {
 430                                         av_dict_free(&av_opts);
 431                                 };
 432
 433                                 if (!(oc_->oformat->flags & AVFMT_NOFILE))
 434                                 {
 435                                         FF(avio_open2(
 436                                                 &oc_->pb,
 437                                                 full_path_.string().c_str(),
 438                                                 AVIO_FLAG_WRITE,
 439                                                 &oc_->interrupt_callback,
 440                                                 &av_opts));
 441                                 }
 442
 443                                 FF(avformat_write_header(
 444                                         oc_.get(),
 445                                         &av_opts));
 446
 447                                 options_ = to_map(av_opts);
 448                         }
 449
 450                         // Dump Info
 451
 452                         av_dump_format(
 453                                 oc_.get(),
 454                                 0,
 455                                 oc_->filename,
 456                                 1);
 457
 458                         for (const auto& option : options_)
 459                         {
 460                                 CASPAR_LOG(warning)
 461                                         << L"Invalid option: -"
 462                                         << u16(option.first)
 463                                         << L" "
 464                                         << u16(option.second);
 465                         }
 466                 }
 467                 catch(...)
 468                 {
 469                         video_st_.reset();
 470                         audio_sts_.clear();
 471                         oc_.reset();
 472                         throw;
 473                 }
 474         }
 475
 476         core::monitor::subject& monitor_output()
 477         {
 478                 return subject_;
 479         }
 480
 481         void send(core::const_frame frame)
 482         {
 483                 CASPAR_VERIFY(in_video_format_.format != core::video_format::invalid);
 484
 485                 auto frame_timer = spl::make_shared<caspar::timer>();
 486
 487                 std::shared_ptr<void> token(
 488                         nullptr,
 489                         [this, frame, frame_timer](void*)
 490                         {
 491                                 tokens_.release();
 492                                 current_encoding_delay_ = frame.get_age_millis();
 493                                 graph_->set_value("frame-time", frame_timer->elapsed() * in_video_format_.fps * 0.5);
 494                         });
 495                 tokens_.acquire();
 496
 497                 video_encoder_executor_.begin_invoke([=]() mutable
 498                 {
 499                         encode_video(
 500                                 frame,
 501                                 token);
 502                 });
 503
 504                 audio_encoder_executor_.begin_invoke([=]() mutable
 505                 {
 506                         encode_audio(
 507                                 frame,
 508                                 token);
 509                 });
 510         }
 511
 512         bool ready_for_frame() const
 513         {
 514                 return tokens_.permits() > 0;
 515         }
 516
 517         void mark_dropped()
 518         {
 519                 graph_->set_tag(diagnostics::tag_severity::WARNING, "dropped-frame");
 520         }
 521
 522         std::wstring print() const
 523         {
 524                 return L"ffmpeg_consumer[" + u16(path_) + L"]";
 525         }
 526
 527         int64_t presentation_frame_age_millis() const
 528         {
 529                 return current_encoding_delay_;
 530         }
 531
 532 private:
 533
 534         static int interrupt_cb(void* ctx)
 535         {
 536                 CASPAR_ASSERT(ctx);
 537                 return reinterpret_cast<ffmpeg_consumer*>(ctx)->abort_request_;
 538         }
 539
 540         std::shared_ptr<AVStream> open_encoder(
 541                         const AVCodec& codec,
 542                         std::map<std::string,
 543                         std::string>& options,
 544                         int stream_number_for_media_type)
 545         {
 546                 auto st =
 547                         avformat_new_stream(
 548                                 oc_.get(),
 549                                 &codec);
 550
 551                 if (!st)
 552                         CASPAR_THROW_EXCEPTION(caspar_exception() << msg_info("Could not allocate video-stream.") << boost::errinfo_api_function("avformat_new_stream"));
 553
 554                 auto enc = st->codec;
 555
 556                 CASPAR_VERIFY(enc);
 557
 558                 switch(enc->codec_type)
 559                 {
 560                         case AVMEDIA_TYPE_VIDEO:
 561                         {
 562                                 enc->time_base                          = video_graph_out_->inputs[0]->time_base;
 563                                 enc->pix_fmt                                    = static_cast<AVPixelFormat>(video_graph_out_->inputs[0]->format);
 564                                 enc->sample_aspect_ratio                = st->sample_aspect_ratio = video_graph_out_->inputs[0]->sample_aspect_ratio;
 565                                 enc->width                                      = video_graph_out_->inputs[0]->w;
 566                                 enc->height                                     = video_graph_out_->inputs[0]->h;
 567                                 enc->bit_rate_tolerance         = 400 * 1000000;
 568
 569                                 break;
 570                         }
 571                         case AVMEDIA_TYPE_AUDIO:
 572                         {
 573                                 enc->time_base                          = audio_filter_->get_output_pad_info(stream_number_for_media_type).time_base;
 574                                 enc->sample_fmt                         = static_cast<AVSampleFormat>(audio_filter_->get_output_pad_info(stream_number_for_media_type).format);
 575                                 enc->sample_rate                                = audio_filter_->get_output_pad_info(stream_number_for_media_type).sample_rate;
 576                                 enc->channel_layout                     = audio_filter_->get_output_pad_info(stream_number_for_media_type).channel_layout;
 577                                 enc->channels                           = audio_filter_->get_output_pad_info(stream_number_for_media_type).channels;
 578
 579                                 break;
 580                         }
 581                 }
 582
 583                 setup_codec_defaults(*enc);
 584
 585                 if(oc_->oformat->flags & AVFMT_GLOBALHEADER)
 586                         enc->flags |= CODEC_FLAG_GLOBAL_HEADER;
 587
 588                 static const std::array<std::string, 4> char_id_map = {{"v", "a", "d", "s"}};
 589
 590                 const auto char_id = char_id_map.at(enc->codec_type);
 591
 592                 const auto codec_opts =
 593                         remove_options(
 594                                 options,
 595                                 boost::regex("^(" + char_id + "?[^:]+):" + char_id + "$"));
 596
 597                 AVDictionary* av_codec_opts = nullptr;
 598
 599                 to_dict(
 600                         &av_codec_opts,
 601                         options);
 602
 603                 to_dict(
 604                         &av_codec_opts,
 605                         codec_opts);
 606
 607                 options.clear();
 608
 609                 FF(avcodec_open2(
 610                         enc,
 611                         &codec,
 612                         av_codec_opts ? &av_codec_opts : nullptr));
 613
 614                 if(av_codec_opts)
 615                 {
 616                         auto t =
 617                                 av_dict_get(
 618                                         av_codec_opts,
 619                                         "",
 620                                          nullptr,
 621                                         AV_DICT_IGNORE_SUFFIX);
 622
 623                         while(t)
 624                         {
 625                                 options[t->key + (codec_opts.find(t->key) != codec_opts.end() ? ":" + char_id : "")] = t->value;
 626
 627                                 t = av_dict_get(
 628                                                 av_codec_opts,
 629                                                 "",
 630                                                 t,
 631                                                 AV_DICT_IGNORE_SUFFIX);
 632                         }
 633
 634                         av_dict_free(&av_codec_opts);
 635                 }
 636
 637                 if(enc->codec_type == AVMEDIA_TYPE_AUDIO && !(codec.capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE))
 638                 {
 639                         CASPAR_ASSERT(enc->frame_size > 0);
 640                         audio_filter_->set_guaranteed_output_num_samples_per_frame(
 641                                         stream_number_for_media_type,
 642                                         enc->frame_size);
 643                 }
 644
 645                 return std::shared_ptr<AVStream>(st, [this](AVStream* st)
 646                 {
 647                         avcodec_close(st->codec);
 648                 });
 649         }
 650
 651         void configure_video_filters(
 652                         const AVCodec& codec,
 653                         std::string filtergraph)
 654         {
 655                 video_graph_.reset(
 656                                 avfilter_graph_alloc(),
 657                                 [](AVFilterGraph* p)
 658                                 {
 659                                         avfilter_graph_free(&p);
 660                                 });
 661
 662                 video_graph_->nb_threads  = boost::thread::hardware_concurrency()/2;
 663                 video_graph_->thread_type = AVFILTER_THREAD_SLICE;
 664
 665                 const auto sample_aspect_ratio =
 666                         boost::rational<int>(
 667                                         in_video_format_.square_width,
 668                                         in_video_format_.square_height) /
 669                         boost::rational<int>(
 670                                         in_video_format_.width,
 671                                         in_video_format_.height);
 672
 673                 const auto vsrc_options = (boost::format("video_size=%1%x%2%:pix_fmt=%3%:time_base=%4%/%5%:pixel_aspect=%6%/%7%:frame_rate=%8%/%9%")
 674                         % in_video_format_.width % in_video_format_.height
 675                         % AVPixelFormat::AV_PIX_FMT_BGRA
 676                         % in_video_format_.duration     % in_video_format_.time_scale
 677                         % sample_aspect_ratio.numerator() % sample_aspect_ratio.denominator()
 678                         % in_video_format_.time_scale % in_video_format_.duration).str();
 679
 680                 AVFilterContext* filt_vsrc = nullptr;
 681                 FF(avfilter_graph_create_filter(
 682                                 &filt_vsrc,
 683                                 avfilter_get_by_name("buffer"),
 684                                 "ffmpeg_consumer_buffer",
 685                                 vsrc_options.c_str(),
 686                                 nullptr,
 687                                 video_graph_.get()));
 688
 689                 AVFilterContext* filt_vsink = nullptr;
 690                 FF(avfilter_graph_create_filter(
 691                                 &filt_vsink,
 692                                 avfilter_get_by_name("buffersink"),
 693                                 "ffmpeg_consumer_buffersink",
 694                                 nullptr,
 695                                 nullptr,
 696                                 video_graph_.get()));
 697
 698 #pragma warning (push)
 699 #pragma warning (disable : 4245)
 700
 701                 FF(av_opt_set_int_list(
 702                                 filt_vsink,
 703                                 "pix_fmts",
 704                                 codec.pix_fmts,
 705                                 -1,
 706                                 AV_OPT_SEARCH_CHILDREN));
 707
 708 #pragma warning (pop)
 709
 710                 adjust_video_filter(codec, in_video_format_, filt_vsink, filtergraph);
 711
 712                 if (in_video_format_.width < 1280)
 713                         video_graph_->scale_sws_opts = "out_color_matrix=bt601";
 714                 else
 715                         video_graph_->scale_sws_opts = "out_color_matrix=bt709";
 716
 717                 configure_filtergraph(
 718                                 *video_graph_,
 719                                 filtergraph,
 720                                 *filt_vsrc,
 721                                 *filt_vsink);
 722
 723                 video_graph_in_  = filt_vsrc;
 724                 video_graph_out_ = filt_vsink;
 725
 726                 CASPAR_LOG(info)
 727                         <<      u16(std::string("\n")
 728                                 + avfilter_graph_dump(
 729                                                 video_graph_.get(),
 730                                                 nullptr));
 731         }
 732
 733         void configure_audio_filters(
 734                         const AVCodec& codec,
 735                         std::string filtergraph)
 736         {
 737                 int num_output_pads = 1;
 738
 739                 if (mono_streams_)
 740                 {
 741                         num_output_pads = in_channel_layout_.num_channels;
 742                 }
 743
 744                 if (num_output_pads > 1)
 745                 {
 746                         std::string splitfilter = "[a:0]channelsplit=channel_layout=";
 747
 748                         splitfilter += (boost::format("0x%|1$x|") % create_channel_layout_bitmask(in_channel_layout_.num_channels)).str();
 749
 750                         for (int i = 0; i < num_output_pads; ++i)
 751                                 splitfilter += "[aout:" + boost::lexical_cast<std::string>(i) + "]";
 752
 753                         filtergraph = u8(append_filter(u16(filtergraph), u16(splitfilter)));
 754                 }
 755
 756                 std::vector<audio_output_pad> output_pads(
 757                                 num_output_pads,
 758                                 audio_output_pad(
 759                                                 from_terminated_array<int>(                             codec.supported_samplerates,    0),
 760                                                 from_terminated_array<AVSampleFormat>(  codec.sample_fmts,                              AVSampleFormat::AV_SAMPLE_FMT_NONE),
 761                                                 from_terminated_array<uint64_t>(                codec.channel_layouts,                  static_cast<uint64_t>(0))));
 762
 763                 audio_filter_.reset(new audio_filter(
 764                                 { audio_input_pad(
 765                                                 boost::rational<int>(1, in_video_format_.audio_sample_rate),
 766                                                 in_video_format_.audio_sample_rate,
 767                                                 AVSampleFormat::AV_SAMPLE_FMT_S32,
 768                                                 create_channel_layout_bitmask(in_channel_layout_.num_channels)) },
 769                                                 output_pads,
 770                                                 filtergraph));
 771         }
 772
 773         void configure_filtergraph(
 774                         AVFilterGraph& graph,
 775                         const std::string& filtergraph,
 776                         AVFilterContext& source_ctx,
 777                         AVFilterContext& sink_ctx)
 778         {
 779                 AVFilterInOut* outputs = nullptr;
 780                 AVFilterInOut* inputs = nullptr;
 781
 782                 if(!filtergraph.empty())
 783                 {
 784                         outputs = avfilter_inout_alloc();
 785                         inputs  = avfilter_inout_alloc();
 786
 787                         try
 788                         {
 789                                 CASPAR_VERIFY(outputs && inputs);
 790
 791                                 outputs->name           = av_strdup("in");
 792                                 outputs->filter_ctx     = &source_ctx;
 793                                 outputs->pad_idx                = 0;
 794                                 outputs->next           = nullptr;
 795
 796                                 inputs->name                    = av_strdup("out");
 797                                 inputs->filter_ctx      = &sink_ctx;
 798                                 inputs->pad_idx         = 0;
 799                                 inputs->next                    = nullptr;
 800                         }
 801                         catch (...)
 802                         {
 803                                 avfilter_inout_free(&outputs);
 804                                 avfilter_inout_free(&inputs);
 805                                 throw;
 806                         }
 807
 808                         FF(avfilter_graph_parse(
 809                                         &graph,
 810                                         filtergraph.c_str(),
 811                                         inputs,
 812                                         outputs,
 813                                         nullptr));
 814                 }
 815                 else
 816                 {
 817                         FF(avfilter_link(
 818                                         &source_ctx,
 819                                         0,
 820                                         &sink_ctx,
 821                                         0));
 822                 }
 823
 824                 FF(avfilter_graph_config(
 825                                 &graph,
 826                                 nullptr));
 827         }
 828
 829         void encode_video(core::const_frame frame_ptr, std::shared_ptr<void> token)
 830         {
 831                 if(!video_st_)
 832                         return;
 833
 834                 auto enc = video_st_->codec;
 835
 836                 if(frame_ptr != core::const_frame::empty())
 837                 {
 838                         auto src_av_frame = create_frame();
 839
 840                         const auto sample_aspect_ratio =
 841                                 boost::rational<int>(
 842                                         in_video_format_.square_width,
 843                                         in_video_format_.square_height) /
 844                                 boost::rational<int>(
 845                                         in_video_format_.width,
 846                                         in_video_format_.height);
 847
 848                         src_av_frame->format                                            = AVPixelFormat::AV_PIX_FMT_BGRA;
 849                         src_av_frame->width                                             = in_video_format_.width;
 850                         src_av_frame->height                                            = in_video_format_.height;
 851                         src_av_frame->sample_aspect_ratio.num   = sample_aspect_ratio.numerator();
 852                         src_av_frame->sample_aspect_ratio.den   = sample_aspect_ratio.denominator();
 853                         src_av_frame->pts                                               = video_pts_;
 854
 855                         video_pts_ += 1;
 856
 857                         subject_
 858                                         << core::monitor::message("/frame")     % video_pts_
 859                                         << core::monitor::message("/path")      % path_
 860                                         << core::monitor::message("/fps")       % in_video_format_.fps;
 861
 862                         FF(av_image_fill_arrays(
 863                                 src_av_frame->data,
 864                                 src_av_frame->linesize,
 865                                 frame_ptr.image_data().begin(),
 866                                 static_cast<AVPixelFormat>(src_av_frame->format),
 867                                 in_video_format_.width,
 868                                 in_video_format_.height,
 869                                 1));
 870
 871                         FF(av_buffersrc_add_frame(
 872                                 video_graph_in_,
 873                                 src_av_frame.get()));
 874                 }
 875
 876                 int ret = 0;
 877
 878                 while(ret >= 0)
 879                 {
 880                         auto filt_frame = create_frame();
 881
 882                         ret = av_buffersink_get_frame(
 883                                 video_graph_out_,
 884                                 filt_frame.get());
 885
 886                         video_encoder_executor_.begin_invoke([=]
 887                         {
 888                                 if(ret == AVERROR_EOF)
 889                                 {
 890                                         if(enc->codec->capabilities & CODEC_CAP_DELAY)
 891                                         {
 892                                                 while(encode_av_frame(
 893                                                                 *video_st_,
 894                                                                 avcodec_encode_video2,
 895                                                                 nullptr, token))
 896                                                 {
 897                                                         boost::this_thread::yield(); // TODO:
 898                                                 }
 899                                         }
 900                                 }
 901                                 else if(ret != AVERROR(EAGAIN))
 902                                 {
 903                                         FF_RET(ret, "av_buffersink_get_frame");
 904
 905                                         if (filt_frame->interlaced_frame)
 906                                         {
 907                                                 if (enc->codec->id == AV_CODEC_ID_MJPEG)
 908                                                         enc->field_order = filt_frame->top_field_first ? AV_FIELD_TT : AV_FIELD_BB;
 909                                                 else
 910                                                         enc->field_order = filt_frame->top_field_first ? AV_FIELD_TB : AV_FIELD_BT;
 911                                         }
 912                                         else
 913                                                 enc->field_order = AV_FIELD_PROGRESSIVE;
 914
 915                                         filt_frame->quality = enc->global_quality;
 916
 917                                         if (!enc->me_threshold)
 918                                                 filt_frame->pict_type = AV_PICTURE_TYPE_NONE;
 919
 920                                         encode_av_frame(
 921                                                 *video_st_,
 922                                                 avcodec_encode_video2,
 923                                                 filt_frame,
 924                                                 token);
 925
 926                                         boost::this_thread::yield(); // TODO:
 927                                 }
 928                         });
 929                 }
 930         }
 931
 932         void encode_audio(core::const_frame frame_ptr, std::shared_ptr<void> token)
 933         {
 934                 if(audio_sts_.empty())
 935                         return;
 936
 937                 if(frame_ptr != core::const_frame::empty())
 938                 {
 939                         auto src_av_frame = create_frame();
 940
 941                         src_av_frame->channels                  = in_channel_layout_.num_channels;
 942                         src_av_frame->channel_layout            = create_channel_layout_bitmask(in_channel_layout_.num_channels);
 943                         src_av_frame->sample_rate               = in_video_format_.audio_sample_rate;
 944                         src_av_frame->nb_samples                        = static_cast<int>(frame_ptr.audio_data().size()) / src_av_frame->channels;
 945                         src_av_frame->format                            = AV_SAMPLE_FMT_S32;
 946                         src_av_frame->pts                               = audio_pts_;
 947
 948                         audio_pts_ += src_av_frame->nb_samples;
 949
 950                         FF(av_samples_fill_arrays(
 951                                         src_av_frame->extended_data,
 952                                         src_av_frame->linesize,
 953                                         reinterpret_cast<const std::uint8_t*>(&*frame_ptr.audio_data().begin()),
 954                                         src_av_frame->channels,
 955                                         src_av_frame->nb_samples,
 956                                         static_cast<AVSampleFormat>(src_av_frame->format),
 957                                         16));
 958
 959                         audio_filter_->push(0, src_av_frame);
 960                 }
 961
 962                 for (int pad_id = 0; pad_id < audio_filter_->get_num_output_pads(); ++pad_id)
 963                 {
 964                         for (auto filt_frame : audio_filter_->poll_all(pad_id))
 965                         {
 966                                 audio_encoder_executor_.begin_invoke([=]
 967                                 {
 968                                         encode_av_frame(
 969                                                         *audio_sts_.at(pad_id),
 970                                                         avcodec_encode_audio2,
 971                                                         filt_frame,
 972                                                         token);
 973
 974                                         boost::this_thread::yield(); // TODO:
 975                                 });
 976                         }
 977                 }
 978
 979                 bool eof = frame_ptr == core::const_frame::empty();
 980
 981                 if (eof)
 982                 {
 983                         audio_encoder_executor_.begin_invoke([=]
 984                         {
 985                                 for (int pad_id = 0; pad_id < audio_filter_->get_num_output_pads(); ++pad_id)
 986                                 {
 987                                         auto enc = audio_sts_.at(pad_id)->codec;
 988
 989                                         if (enc->codec->capabilities & CODEC_CAP_DELAY)
 990                                         {
 991                                                 while (encode_av_frame(
 992                                                                 *audio_sts_.at(pad_id),
 993                                                                 avcodec_encode_audio2,
 994                                                                 nullptr,
 995                                                                 token))
 996                                                 {
 997                                                         boost::this_thread::yield(); // TODO:
 998                                                 }
 999                                         }
1000                                 }
1001                         });
1002                 }
1003         }
1004
1005         template<typename F>
1006         bool encode_av_frame(
1007                         AVStream& st,
1008                         const F& func,
1009                         const std::shared_ptr<AVFrame>& src_av_frame,
1010                         std::shared_ptr<void> token)
1011         {
1012                 AVPacket pkt = {};
1013                 av_init_packet(&pkt);
1014
1015                 int got_packet = 0;
1016
1017                 FF(func(
1018                         st.codec,
1019                         &pkt,
1020                         src_av_frame.get(),
1021                         &got_packet));
1022
1023                 if(!got_packet || pkt.size <= 0)
1024                         return false;
1025
1026                 pkt.stream_index = st.index;
1027
1028                 if (pkt.pts != AV_NOPTS_VALUE)
1029                 {
1030                         pkt.pts =
1031                                 av_rescale_q(
1032                                         pkt.pts,
1033                                         st.codec->time_base,
1034                                         st.time_base);
1035                 }
1036
1037                 if (pkt.dts != AV_NOPTS_VALUE)
1038                 {
1039                         pkt.dts =
1040                                 av_rescale_q(
1041                                         pkt.dts,
1042                                         st.codec->time_base,
1043                                         st.time_base);
1044                 }
1045
1046                 pkt.duration =
1047                         static_cast<int>(
1048                                 av_rescale_q(
1049                                         pkt.duration,
1050                                         st.codec->time_base, st.time_base));
1051
1052                 write_packet(
1053                         std::shared_ptr<AVPacket>(
1054                                 new AVPacket(pkt),
1055                                 [](AVPacket* p)
1056                                 {
1057                                         av_free_packet(p);
1058                                         delete p;
1059                                 }), token);
1060
1061                 return true;
1062         }
1063
1064         void write_packet(
1065                         const std::shared_ptr<AVPacket>& pkt_ptr,
1066                         std::shared_ptr<void> token)
1067         {
1068                 write_executor_.begin_invoke([this, pkt_ptr, token]() mutable
1069                 {
1070                         FF(av_interleaved_write_frame(
1071                                 oc_.get(),
1072                                 pkt_ptr.get()));
1073                 });
1074         }
1075
1076         template<typename T>
1077         static boost::optional<T> try_remove_arg(
1078                         std::map<std::string, std::string>& options,
1079                         const boost::regex& expr)
1080         {
1081                 for(auto it = options.begin(); it != options.end(); ++it)
1082                 {
1083                         if(boost::regex_search(it->first, expr))
1084                         {
1085                                 auto arg = it->second;
1086                                 options.erase(it);
1087                                 return boost::lexical_cast<T>(arg);
1088                         }
1089                 }
1090
1091                 return boost::optional<T>();
1092         }
1093
1094         static std::map<std::string, std::string> remove_options(
1095                         std::map<std::string, std::string>& options,
1096                         const boost::regex& expr)
1097         {
1098                 std::map<std::string, std::string> result;
1099
1100                 auto it = options.begin();
1101                 while(it != options.end())
1102                 {
1103                         boost::smatch what;
1104                         if(boost::regex_search(it->first, what, expr))
1105                         {
1106                                 result[
1107                                         what.size() > 0 && what[1].matched
1108                                                 ? what[1].str()
1109                                                 : it->first] = it->second;
1110                                 it = options.erase(it);
1111                         }
1112                         else
1113                                 ++it;
1114                 }
1115
1116                 return result;
1117         }
1118
1119         static void to_dict(AVDictionary** dest, const std::map<std::string, std::string>& c)
1120         {
1121                 for (const auto& entry : c)
1122                 {
1123                         av_dict_set(
1124                                 dest,
1125                                 entry.first.c_str(),
1126                                 entry.second.c_str(), 0);
1127                 }
1128         }
1129
1130         static std::map<std::string, std::string> to_map(AVDictionary* dict)
1131         {
1132                 std::map<std::string, std::string> result;
1133
1134                 for(auto t = dict
1135                                 ? av_dict_get(
1136                                         dict,
1137                                         "",
1138                                         nullptr,
1139                                         AV_DICT_IGNORE_SUFFIX)
1140                                 : nullptr;
1141                         t;
1142                         t = av_dict_get(
1143                                 dict,
1144                                 "",
1145                                 t,
1146                                 AV_DICT_IGNORE_SUFFIX))
1147                 {
1148                         result[t->key] = t->value;
1149                 }
1150
1151                 return result;
1152         }
1153 };
1154
1155 int crc16(const std::string& str)
1156 {
1157         boost::crc_16_type result;
1158
1159         result.process_bytes(str.data(), str.length());
1160
1161         return result.checksum();
1162 }
1163
1164 struct ffmpeg_consumer_proxy : public core::frame_consumer
1165 {
1166         const std::string                                       path_;
1167         const std::string                                       options_;
1168         const bool                                                      separate_key_;
1169         const bool                                                      mono_streams_;
1170         const bool                                                      compatibility_mode_;
1171         int                                                                     consumer_index_offset_;
1172
1173         std::unique_ptr<ffmpeg_consumer>        consumer_;
1174         std::unique_ptr<ffmpeg_consumer>        key_only_consumer_;
1175
1176 public:
1177
1178         ffmpeg_consumer_proxy(const std::string& path, const std::string& options, bool separate_key, bool mono_streams, bool compatibility_mode)
1179                 : path_(path)
1180                 , options_(options)
1181                 , separate_key_(separate_key)
1182                 , mono_streams_(mono_streams)
1183                 , compatibility_mode_(compatibility_mode)
1184                 , consumer_index_offset_(crc16(path))
1185         {
1186         }
1187
1188         void initialize(const core::video_format_desc& format_desc, const core::audio_channel_layout& channel_layout, int) override
1189         {
1190                 if (consumer_)
1191                         CASPAR_THROW_EXCEPTION(invalid_operation() << msg_info("Cannot reinitialize ffmpeg-consumer."));
1192
1193                 consumer_.reset(new ffmpeg_consumer(path_, options_, mono_streams_));
1194                 consumer_->initialize(format_desc, channel_layout);
1195
1196                 if (separate_key_)
1197                 {
1198                         boost::filesystem::path fill_file(path_);
1199                         auto without_extension = u16(fill_file.parent_path().string() + "/" + fill_file.stem().string());
1200                         auto key_file = without_extension + L"_A" + u16(fill_file.extension().string());
1201
1202                         key_only_consumer_.reset(new ffmpeg_consumer(u8(key_file), options_, mono_streams_));
1203                         key_only_consumer_->initialize(format_desc, channel_layout);
1204                 }
1205         }
1206
1207         int64_t presentation_frame_age_millis() const override
1208         {
1209                 return consumer_ ? static_cast<int64_t>(consumer_->presentation_frame_age_millis()) : 0;
1210         }
1211
1212         std::future<bool> send(core::const_frame frame) override
1213         {
1214                 bool ready_for_frame = consumer_->ready_for_frame();
1215
1216                 if (ready_for_frame && separate_key_)
1217                         ready_for_frame = ready_for_frame && key_only_consumer_->ready_for_frame();
1218
1219                 if (ready_for_frame)
1220                 {
1221                         consumer_->send(frame);
1222
1223                         if (separate_key_)
1224                                 key_only_consumer_->send(frame.key_only());
1225                 }
1226                 else
1227                 {
1228                         consumer_->mark_dropped();
1229
1230                         if (separate_key_)
1231                                 key_only_consumer_->mark_dropped();
1232                 }
1233
1234                 return make_ready_future(true);
1235         }
1236
1237         std::wstring print() const override
1238         {
1239                 return consumer_ ? consumer_->print() : L"[ffmpeg_consumer]";
1240         }
1241
1242         std::wstring name() const override
1243         {
1244                 return L"ffmpeg";
1245         }
1246
1247         boost::property_tree::wptree info() const override
1248         {
1249                 boost::property_tree::wptree info;
1250
1251                 info.add(L"type",                       L"ffmpeg");
1252                 info.add(L"path",                       u16(path_));
1253                 info.add(L"separate_key",       separate_key_);
1254                 info.add(L"mono_streams",       mono_streams_);
1255
1256                 return info;
1257         }
1258
1259         bool has_synchronization_clock() const override
1260         {
1261                 return false;
1262         }
1263
1264         int buffer_depth() const override
1265         {
1266                 return -1;
1267         }
1268
1269         int index() const override
1270         {
1271                 return compatibility_mode_ ? 200 : 100000 + consumer_index_offset_;
1272         }
1273
1274         core::monitor::subject& monitor_output() override
1275         {
1276                 return consumer_->monitor_output();
1277         }
1278 };
1279
1280 void describe_ffmpeg_consumer(core::help_sink& sink, const core::help_repository& repo)
1281 {
1282         sink.short_description(L"For streaming/recording the contents of a channel using FFmpeg.");
1283         sink.syntax(L"FILE,STREAM [filename:string],[url:string] {-[ffmpeg_param1:string] [value1:string] {-[ffmpeg_param2:string] [value2:string] {...}}} {[separate_key:SEPARATE_KEY]} {[mono_streams:MONO_STREAMS]}");
1284         sink.para()->text(L"For recording or streaming the contents of a channel using FFmpeg");
1285         sink.definitions()
1286                 ->item(L"filename",                     L"The filename under the media folder including the extension (decides which kind of container format that will be used).")
1287                 ->item(L"url",                          L"If the filename is given in the form of an URL a network stream will be created instead of a file on disk.")
1288                 ->item(L"ffmpeg_paramX",                L"A parameter supported by FFmpeg. For example vcodec or acodec etc.")
1289                 ->item(L"separate_key",         L"If defined will create two files simultaneously -- One for fill and one for key (_A will be appended).")
1290                 ->item(L"mono_streams",         L"If defined every audio channel will be written to its own audio stream.");
1291         sink.para()->text(L"Examples:");
1292         sink.example(L">> ADD 1 FILE output.mov -vcodec dnxhd");
1293         sink.example(L">> ADD 1 FILE output.mov -vcodec prores");
1294         sink.example(L">> ADD 1 FILE output.mov -vcodec dvvideo");
1295         sink.example(L">> ADD 1 FILE output.mov -vcodec libx264 -preset ultrafast -tune fastdecode -crf 25");
1296         sink.example(L">> ADD 1 FILE output.mov -vcodec dnxhd SEPARATE_KEY", L"for creating output.mov with fill and output_A.mov with key/alpha");
1297         sink.example(L">> ADD 1 FILE output.mxf -vcodec dnxhd MONO_STREAMS", L"for creating output.mxf with every audio channel encoded in its own mono stream.");
1298         sink.example(L">> ADD 1 STREAM udp://<client_ip_address>:9250 -format mpegts -vcodec libx264 -crf 25 -tune zerolatency -preset ultrafast",
1299                 L"for streaming over UDP instead of creating a local file.");
1300 }
1301
1302 spl::shared_ptr<core::frame_consumer> create_ffmpeg_consumer(
1303                 const std::vector<std::wstring>& params, core::interaction_sink*, std::vector<spl::shared_ptr<core::video_channel>> channels)
1304 {
1305         if (params.size() < 1 || (!boost::iequals(params.at(0), L"STREAM") && !boost::iequals(params.at(0), L"FILE")))
1306                 return core::frame_consumer::empty();
1307
1308         auto params2                    = params;
1309         bool separate_key               = get_and_consume_flag(L"SEPARATE_KEY", params2);
1310         bool mono_streams               = get_and_consume_flag(L"MONO_STREAMS", params2);
1311         auto compatibility_mode = boost::iequals(params.at(0), L"FILE");
1312         auto path                               = u8(params2.size() > 1 ? params2.at(1) : L"");
1313
1314         // remove FILE or STREAM
1315         params2.erase(params2.begin());
1316
1317         // remove path
1318         if (!path.empty())
1319                 params2.erase(params2.begin());
1320
1321         // join only the args
1322         auto args                               = u8(boost::join(params2, L" "));
1323
1324         return spl::make_shared<ffmpeg_consumer_proxy>(path, args, separate_key, mono_streams, compatibility_mode);
1325 }
1326
1327 spl::shared_ptr<core::frame_consumer> create_preconfigured_ffmpeg_consumer(
1328                 const boost::property_tree::wptree& ptree, core::interaction_sink*, std::vector<spl::shared_ptr<core::video_channel>> channels)
1329 {
1330         return spl::make_shared<ffmpeg_consumer_proxy>(
1331                         u8(ptree_get<std::wstring>(ptree, L"path")),
1332                         u8(ptree.get<std::wstring>(L"args", L"")),
1333                         ptree.get<bool>(L"separate-key", false),
1334                         ptree.get<bool>(L"mono-streams", false),
1335                         false);
1336 }
1337
1338 }}