git.sesse.net Git - nageru/blob - nageru/kaeru.cpp

   1 // Kaeru (換える), a simple transcoder intended for use with Nageru.
   2
   3 #include "audio_encoder.h"
   4 #include "basic_stats.h"
   5 #include "defs.h"
   6 #include "flags.h"
   7 #include "ffmpeg_capture.h"
   8 #include "mixer.h"
   9 #include "shared/mux.h"
  10 #include "quittable_sleeper.h"
  11 #include "shared/timebase.h"
  12 #include "x264_encoder.h"
  13
  14 #include <assert.h>
  15 #include <fcntl.h>
  16 #include <signal.h>
  17 #include <unistd.h>
  18 #include <chrono>
  19 #include <string>
  20
  21 using namespace bmusb;
  22 using namespace movit;
  23 using namespace std;
  24 using namespace std::chrono;
  25 using namespace std::placeholders;
  26
  27 Mixer *global_mixer = nullptr;
  28 X264Encoder *global_x264_encoder = nullptr;
  29 int frame_num = 0;
  30 BasicStats *global_basic_stats = nullptr;
  31 QuittableSleeper should_quit;
  32 MuxMetrics stream_mux_metrics;
  33
  34 namespace {
  35
  36 int write_packet(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
  37 {
  38         static bool seen_sync_markers = false;
  39         static string stream_mux_header;
  40         HTTPD *httpd = (HTTPD *)opaque;
  41
  42         if (type == AVIO_DATA_MARKER_SYNC_POINT || type == AVIO_DATA_MARKER_BOUNDARY_POINT) {
  43                 seen_sync_markers = true;
  44         } else if (type == AVIO_DATA_MARKER_UNKNOWN && !seen_sync_markers) {
  45                 // We don't know if this is a keyframe or not (the muxer could
  46                 // avoid marking it), so we just have to make the best of it.
  47                 type = AVIO_DATA_MARKER_SYNC_POINT;
  48         }
  49
  50         HTTPD::StreamID stream_id{ HTTPD::MAIN_STREAM, 0 };
  51         if (type == AVIO_DATA_MARKER_HEADER) {
  52                 stream_mux_header.append((char *)buf, buf_size);
  53                 httpd->set_header(stream_id, stream_mux_header);
  54         } else {
  55                 httpd->add_data(stream_id, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
  56         }
  57         return buf_size;
  58 }
  59
  60 }  // namespace
  61
  62 unique_ptr<Mux> create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x264_encoder, AudioEncoder *audio_encoder)
  63 {
  64         AVFormatContext *avctx = avformat_alloc_context();
  65         avctx->oformat = oformat;
  66
  67         uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
  68         avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, httpd, nullptr, nullptr, nullptr);
  69         avctx->pb->write_data_type = &write_packet;
  70         avctx->pb->ignore_boundary_point = 1;
  71         avctx->flags = AVFMT_FLAG_CUSTOM_IO;
  72
  73         string video_extradata = x264_encoder->get_global_headers();
  74
  75         // If audio is disabled (ie., we won't ever see any audio packets),
  76         // set nullptr here to also not include the stream in the mux.
  77         AVCodecParameters *audio_codecpar =
  78                 global_flags.enable_audio ? audio_encoder->get_codec_parameters().release() : nullptr;
  79
  80         unique_ptr<Mux> mux;
  81         mux.reset(new Mux(avctx, global_flags.width, global_flags.height, Mux::CODEC_H264, video_extradata, audio_codecpar,
  82                 get_color_space(global_flags.ycbcr_rec709_coefficients), COARSE_TIMEBASE,
  83                 /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, { &stream_mux_metrics }));
  84         stream_mux_metrics.init({{ "destination", "http" }});
  85         return mux;
  86 }
  87
  88 void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, AudioEncoder *audio_encoder,
  89                           int64_t video_pts, AVRational video_timebase,
  90                           int64_t audio_pts, AVRational audio_timebase,
  91                           uint16_t timecode,
  92                           FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format,
  93                           FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format)
  94 {
  95         if (video_pts >= 0 && video_frame.len > 0) {
  96                 ReceivedTimestamps ts;
  97                 ts.ts.push_back(steady_clock::now());
  98
  99                 video_pts = av_rescale_q(video_pts, video_timebase, AVRational{ 1, TIMEBASE });
 100                 int64_t frame_duration = int64_t(TIMEBASE) * video_format.frame_rate_den / video_format.frame_rate_nom;
 101                 x264_encoder->add_frame(video_pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ts);
 102                 global_basic_stats->update(frame_num++, /*dropped_frames=*/0);
 103         }
 104         if (audio_frame.len > 0) {
 105                 // FFmpegCapture takes care of this for us.
 106                 assert(audio_format.num_channels == 2);
 107                 assert(audio_format.sample_rate == OUTPUT_FREQUENCY);
 108
 109                 // TODO: Reduce some duplication against AudioMixer here.
 110                 size_t num_samples = audio_frame.len / (audio_format.bits_per_sample / 8);
 111                 vector<float> float_samples;
 112                 float_samples.resize(num_samples);
 113
 114                 if (audio_format.bits_per_sample == 16) {
 115                         const int16_t *src = (const int16_t *)audio_frame.data;
 116                         float *dst = &float_samples[0];
 117                         for (size_t i = 0; i < num_samples; ++i) {
 118                                 *dst++ = int16_t(le16toh(*src++)) * (1.0f / 32768.0f);
 119                         }
 120                 } else if (audio_format.bits_per_sample == 32) {
 121                         const int32_t *src = (const int32_t *)audio_frame.data;
 122                         float *dst = &float_samples[0];
 123                         for (size_t i = 0; i < num_samples; ++i) {
 124                                 *dst++ = int32_t(le32toh(*src++)) * (1.0f / 2147483648.0f);
 125                         }
 126                 } else {
 127                         assert(false);
 128                 }
 129                 audio_pts = av_rescale_q(audio_pts, audio_timebase, AVRational{ 1, TIMEBASE });
 130                 audio_encoder->encode_audio(float_samples, audio_pts);
 131         }
 132
 133         if (video_frame.owner) {
 134                 video_frame.owner->release_frame(video_frame);
 135         }
 136         if (audio_frame.owner) {
 137                 audio_frame.owner->release_frame(audio_frame);
 138         }
 139 }
 140
 141 void raw_packet_callback(Mux *mux, int stream_index, const AVPacket *pkt, AVRational timebase)
 142 {
 143         mux->add_packet(*pkt, pkt->pts, pkt->dts == AV_NOPTS_VALUE ? pkt->pts : pkt->dts, timebase, stream_index);
 144 }
 145
 146 void filter_packet_callback(Mux *mux, int stream_index, AVBSFContext *bsfctx, const AVPacket *pkt, AVRational timebase)
 147 {
 148         if (pkt->size <= 2 || pkt->data[0] != 0xff || (pkt->data[1] & 0xf0) != 0xf0) {
 149                 // Not ADTS data, so just pass it through.
 150                 mux->add_packet(*pkt, pkt->pts, pkt->dts == AV_NOPTS_VALUE ? pkt->pts : pkt->dts, timebase, stream_index);
 151                 return;
 152         }
 153
 154         AVPacket *in_pkt = av_packet_clone(pkt);
 155         unique_ptr<AVPacket, decltype(av_packet_unref) *> in_pkt_cleanup(in_pkt, av_packet_unref);
 156         int err = av_bsf_send_packet(bsfctx, in_pkt);
 157         if (err < 0) {
 158                 fprintf(stderr, "av_bsf_send_packet() failed with %d, ignoring\n", err);
 159         }
 160         for ( ;; ) {
 161                 AVPacket out_pkt;
 162                 unique_ptr<AVPacket, decltype(av_packet_unref) *> pkt_cleanup(&out_pkt, av_packet_unref);
 163                 av_init_packet(&out_pkt);
 164                 err = av_bsf_receive_packet(bsfctx, &out_pkt);
 165                 if (err == AVERROR(EAGAIN)) {
 166                         break;
 167                 }
 168                 if (err < 0) {
 169                         fprintf(stderr, "av_bsf_receive_packet() failed with %d, ignoring\n", err);
 170                         return;
 171                 }
 172                 mux->add_packet(out_pkt, out_pkt.pts, out_pkt.dts == AV_NOPTS_VALUE ? out_pkt.pts : out_pkt.dts, timebase, stream_index);
 173         }
 174 }
 175
 176 void adjust_bitrate(int signal)
 177 {
 178         int new_bitrate = global_flags.x264_bitrate;
 179         if (signal == SIGUSR1) {
 180                 new_bitrate += 100;
 181                 if (new_bitrate > 100000) {
 182                         fprintf(stderr, "Ignoring SIGUSR1, can't increase bitrate below 100000 kbit/sec (currently at %d kbit/sec)\n",
 183                                 global_flags.x264_bitrate);
 184                 } else {
 185                         fprintf(stderr, "Increasing bitrate to %d kbit/sec due to SIGUSR1.\n", new_bitrate);
 186                         global_flags.x264_bitrate = new_bitrate;
 187                         global_x264_encoder->change_bitrate(new_bitrate);
 188                 }
 189         } else if (signal == SIGUSR2) {
 190                 new_bitrate -= 100;
 191                 if (new_bitrate < 100) {
 192                         fprintf(stderr, "Ignoring SIGUSR2, can't decrease bitrate below 100 kbit/sec (currently at %d kbit/sec)\n",
 193                                 global_flags.x264_bitrate);
 194                 } else {
 195                         fprintf(stderr, "Decreasing bitrate to %d kbit/sec due to SIGUSR2.\n", new_bitrate);
 196                         global_flags.x264_bitrate = new_bitrate;
 197                         global_x264_encoder->change_bitrate(new_bitrate);
 198                 }
 199         }
 200 }
 201
 202 void request_quit(int signal)
 203 {
 204         should_quit.quit();
 205 }
 206
 207 int main(int argc, char *argv[])
 208 {
 209         parse_flags(PROGRAM_KAERU, argc, argv);
 210         if (optind + 1 != argc) {
 211                 usage(PROGRAM_KAERU);
 212                 abort();
 213         }
 214         global_flags.max_num_cards = 1;  // For latency metrics.
 215
 216 #if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(58, 9, 100)
 217         av_register_all();
 218 #endif
 219         avformat_network_init();
 220
 221         HTTPD httpd;
 222
 223         AVOutputFormat *oformat = av_guess_format(global_flags.stream_mux_name.c_str(), nullptr, nullptr);
 224         assert(oformat != nullptr);
 225
 226         unique_ptr<AudioEncoder> audio_encoder;
 227         if (global_flags.stream_audio_codec_name.empty()) {
 228                 audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
 229         } else {
 230                 audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
 231         }
 232
 233         unique_ptr<X264Encoder> x264_encoder(new X264Encoder(oformat));
 234         unique_ptr<Mux> http_mux = create_mux(&httpd, oformat, x264_encoder.get(), audio_encoder.get());
 235         if (global_flags.transcode_audio) {
 236                 audio_encoder->add_mux(http_mux.get());
 237         }
 238         if (global_flags.transcode_video) {
 239                 x264_encoder->add_mux(http_mux.get());
 240         }
 241         global_x264_encoder = x264_encoder.get();
 242
 243         FFmpegCapture video(argv[optind], global_flags.width, global_flags.height);
 244         video.set_pixel_format(FFmpegCapture::PixelFormat_NV12);
 245         if (global_flags.transcode_video) {
 246                 video.set_frame_callback(bind(video_frame_callback, &video, x264_encoder.get(), audio_encoder.get(), _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11));
 247         } else {
 248                 video.set_video_callback(bind(raw_packet_callback, http_mux.get(), /*stream_index=*/0, _1, _2));
 249         }
 250         if (!global_flags.transcode_audio && global_flags.enable_audio) {
 251                 AVBSFContext *bsfctx = nullptr;
 252                 if (strcmp(oformat->name, "mp4") == 0 && strcmp(audio_encoder->get_codec()->name, "aac") == 0) {
 253                         // We need to insert the aac_adtstoasc filter, seemingly (or we will get warnings to do so).
 254                         const AVBitStreamFilter *filter = av_bsf_get_by_name("aac_adtstoasc");
 255                         int err = av_bsf_alloc(filter, &bsfctx);
 256                         if (err < 0) {
 257                                 fprintf(stderr, "av_bsf_alloc() failed with %d\n", err);
 258                                 exit(1);
 259                         }
 260                 }
 261                 if (bsfctx == nullptr) {
 262                         video.set_audio_callback(bind(raw_packet_callback, http_mux.get(), /*stream_index=*/1, _1, _2));
 263                 } else {
 264                         video.set_audio_callback(bind(filter_packet_callback, http_mux.get(), /*stream_index=*/1, bsfctx, _1, _2));
 265                 }
 266         }
 267         video.configure_card();
 268         video.start_bm_capture();
 269         video.change_rate(10.0);  // Play as fast as possible.
 270
 271         BasicStats basic_stats(/*verbose=*/false, /*use_opengl=*/false);
 272         global_basic_stats = &basic_stats;
 273         httpd.start(global_flags.http_port);
 274
 275         signal(SIGUSR1, adjust_bitrate);
 276         signal(SIGUSR2, adjust_bitrate);
 277         signal(SIGINT, request_quit);
 278
 279         while (!should_quit.should_quit()) {
 280                 should_quit.sleep_for(hours(1000));
 281         }
 282
 283         video.stop_dequeue_thread();
 284         // Stop the x264 encoder before killing the mux it's writing to.
 285         global_x264_encoder = nullptr;
 286         x264_encoder.reset();
 287         return 0;
 288 }