From: Steinar H. Gunderson Date: Fri, 7 Jul 2017 16:02:04 +0000 (+0200) Subject: Initial check-in of Kaeru, a simple transcoder based on Nageru code. X-Git-Tag: 1.6.1~6 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=ad1641ad3ab50ecd17af2e1c2d980f26e6adf0bb;p=nageru Initial check-in of Kaeru, a simple transcoder based on Nageru code. --- diff --git a/Makefile b/Makefile index a8f35f6..e924f15 100644 --- a/Makefile +++ b/Makefile @@ -30,9 +30,12 @@ OBJS += quicksync_encoder.o x264_encoder.o x264_dynamic.o x264_speed_control.o v # DeckLink OBJS += decklink_capture.o decklink_util.o decklink_output.o decklink/DeckLinkAPIDispatch.o +KAERU_OBJS = kaeru.o x264_encoder.o mux.o metrics.o flags.o audio_encoder.o x264_speed_control.o print_latency.o x264_dynamic.o ffmpeg_raii.o ffmpeg_capture.o ffmpeg_util.o httpd.o metacube2.o + # bmusb ifeq ($(EMBEDDED_BMUSB),yes) OBJS += bmusb/bmusb.o bmusb/fake_capture.o + KAERU_OBJS += bmusb/bmusb.o endif # FFmpeg input @@ -58,6 +61,8 @@ all: nageru benchmark_audio_mixer nageru: $(OBJS) $(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS) +kaeru: $(KAERU_OBJS) + $(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS) benchmark_audio_mixer: $(BM_OBJS) $(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS) @@ -73,11 +78,11 @@ mainwindow.o: midi_mapping.pb.h midi_mapper.o: midi_mapping.pb.h midi_mapping_dialog.o: ui_midi_mapping.h midi_mapping.pb.h -DEPS=$(OBJS:.o=.d) $(BM_OBJS:.o=.d) +DEPS=$(OBJS:.o=.d) $(BM_OBJS:.o=.d) $(KAERU_OBJS:.o=.d) -include $(DEPS) clean: - $(RM) $(OBJS) $(BM_OBJS) $(DEPS) nageru benchmark_audio_mixer ui_aboutdialog.h ui_analyzer.h ui_mainwindow.h ui_display.h ui_about.h ui_audio_miniview.h ui_audio_expanded_view.h ui_input_mapping.h ui_midi_mapping.h chain-*.frag *.dot *.pb.cc *.pb.h $(OBJS_WITH_MOC:.o=.moc.cpp) ellipsis_label.moc.cpp clickable_label.moc.cpp + $(RM) $(OBJS) $(BM_OBJS) $(KAERU_OBJS) $(DEPS) nageru benchmark_audio_mixer ui_aboutdialog.h ui_analyzer.h ui_mainwindow.h ui_display.h ui_about.h ui_audio_miniview.h ui_audio_expanded_view.h ui_input_mapping.h ui_midi_mapping.h chain-*.frag *.dot *.pb.cc *.pb.h $(OBJS_WITH_MOC:.o=.moc.cpp) ellipsis_label.moc.cpp clickable_label.moc.cpp PREFIX=/usr/local install: diff --git a/bmusb b/bmusb index 32043c9..6a012a4 160000 --- a/bmusb +++ b/bmusb @@ -1 +1 @@ -Subproject commit 32043c95d3b9f8cb97d6d28b9996fa1bec2ce11b +Subproject commit 6a012a41c5422092cdac1f18a9019f37c0b85368 diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp index be27e6f..48a395e 100644 --- a/ffmpeg_capture.cpp +++ b/ffmpeg_capture.cpp @@ -31,6 +31,7 @@ extern "C" { #include "ffmpeg_util.h" #include "flags.h" #include "image_input.h" +#include "timebase.h" #define FRAME_SIZE (8 << 20) // 8 MB. @@ -71,6 +72,9 @@ AVPixelFormat decide_dst_format(AVPixelFormat src_format, bmusb::PixelFormat dst if (dst_format_type == bmusb::PixelFormat_8BitBGRA) { return AV_PIX_FMT_BGRA; } + if (dst_format_type == FFmpegCapture::PixelFormat_NV12) { + return AV_PIX_FMT_NV12; + } assert(dst_format_type == bmusb::PixelFormat_8BitYCbCrPlanar); @@ -313,7 +317,7 @@ void FFmpegCapture::send_disconnected_frame() video_frame.len = width * height * 4; memset(video_frame.data, 0, video_frame.len); - frame_callback(timecode++, + frame_callback(-1, AVRational{1, TIMEBASE}, timecode++, video_frame, /*video_offset=*/0, video_format, FrameAllocator::Frame(), /*audio_offset=*/0, AudioFormat()); } @@ -350,6 +354,8 @@ bool FFmpegCapture::play_video(const string &pathname) return false; } + int audio_stream_index = find_stream_index(format_ctx.get(), AVMEDIA_TYPE_AUDIO); + const AVCodecParameters *codecpar = format_ctx->streams[video_stream_index]->codecpar; video_timebase = format_ctx->streams[video_stream_index]->time_base; AVCodecContextWithDeleter codec_ctx = avcodec_alloc_context3_unique(nullptr); @@ -378,7 +384,7 @@ bool FFmpegCapture::play_video(const string &pathname) } bool error; - AVFrameWithDeleter frame = decode_frame(format_ctx.get(), codec_ctx.get(), pathname, video_stream_index, &error); + AVFrameWithDeleter frame = decode_frame(format_ctx.get(), codec_ctx.get(), pathname, video_stream_index, audio_stream_index, &error); if (error) { return false; } @@ -418,7 +424,7 @@ bool FFmpegCapture::play_video(const string &pathname) video_frame.received_timestamp = next_frame_start; bool finished_wakeup = producer_thread_should_quit.sleep_until(next_frame_start); if (finished_wakeup) { - frame_callback(timecode++, + frame_callback(frame->pts, video_timebase, timecode++, video_frame, 0, video_format, audio_frame, 0, audio_format); break; @@ -494,7 +500,7 @@ bool FFmpegCapture::process_queued_commands(AVFormatContext *format_ctx, const s return false; } -AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, bool *error) +AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, bool *error) { *error = false; @@ -510,6 +516,9 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo pkt.data = nullptr; pkt.size = 0; if (av_read_frame(format_ctx, &pkt) == 0) { + if (pkt.stream_index == audio_stream_index && audio_callback != nullptr) { + audio_callback(&pkt, format_ctx->streams[audio_stream_index]->time_base); + } if (pkt.stream_index != video_stream_index) { // Ignore audio for now. continue; @@ -547,6 +556,8 @@ VideoFormat FFmpegCapture::construct_video_format(const AVFrame *frame, AVRation video_format.height = height; if (pixel_format == bmusb::PixelFormat_8BitBGRA) { video_format.stride = width * 4; + } else if (pixel_format == FFmpegCapture::PixelFormat_NV12) { + video_format.stride = width; } else { assert(pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar); video_format.stride = width; @@ -597,6 +608,17 @@ FrameAllocator::Frame FFmpegCapture::make_video_frame(const AVFrame *frame, cons pic_data[0] = video_frame.data; linesizes[0] = width * 4; video_frame.len = (width * 4) * height; + } else if (pixel_format == PixelFormat_NV12) { + pic_data[0] = video_frame.data; + linesizes[0] = width; + + pic_data[1] = pic_data[0] + width * height; + linesizes[1] = width; + + video_frame.len = (width * 2) * height; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sws_dst_format); + current_frame_ycbcr_format = decode_ycbcr_format(desc, frame); } else { assert(pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar); const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sws_dst_format); diff --git a/ffmpeg_capture.h b/ffmpeg_capture.h index 85e436e..afca641 100644 --- a/ffmpeg_capture.h +++ b/ffmpeg_capture.h @@ -42,6 +42,7 @@ extern "C" { struct AVFormatContext; struct AVFrame; struct AVRational; +struct AVPacket; class FFmpegCapture : public bmusb::CaptureInterface { @@ -101,11 +102,37 @@ public: return audio_frame_allocator; } - void set_frame_callback(bmusb::frame_callback_t callback) override + // FFmpegCapture-specific overload of set_frame_callback that also gives + // the raw original pts from the video. Negative pts means a dummy frame. + typedef std::function + frame_callback_t; + void set_frame_callback(frame_callback_t callback) { frame_callback = callback; } + void set_frame_callback(bmusb::frame_callback_t callback) override + { + frame_callback = std::bind( + callback, + std::placeholders::_3, + std::placeholders::_4, + std::placeholders::_5, + std::placeholders::_6, + std::placeholders::_7, + std::placeholders::_8, + std::placeholders::_9); + } + + // FFmpegCapture-specific callback that gives the raw audio. + typedef std::function audio_callback_t; + void set_audio_callback(audio_callback_t callback) + { + audio_callback = callback; + } + // Used to get precise information about the Y'CbCr format used // for a given frame. Only valid to call during the frame callback, // and only when receiving a frame with pixel format PixelFormat_8BitYCbCrPlanar. @@ -135,8 +162,9 @@ public: void set_video_mode(uint32_t video_mode_id) override {} // Ignore. uint32_t get_current_video_mode() const override { return 0; } + static constexpr bmusb::PixelFormat PixelFormat_NV12 = static_cast(100); // In the private range. std::set get_available_pixel_formats() const override { - return std::set{ bmusb::PixelFormat_8BitBGRA, bmusb::PixelFormat_8BitYCbCrPlanar }; + return std::set{ bmusb::PixelFormat_8BitBGRA, bmusb::PixelFormat_8BitYCbCrPlanar, PixelFormat_NV12 }; } void set_pixel_format(bmusb::PixelFormat pixel_format) override { this->pixel_format = pixel_format; @@ -166,7 +194,7 @@ private: bool process_queued_commands(AVFormatContext *format_ctx, const std::string &pathname, timespec last_modified, bool *rewound); // Returns nullptr if no frame was decoded (e.g. EOF). - AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, bool *error); + AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, bool *error); bmusb::VideoFormat construct_video_format(const AVFrame *frame, AVRational video_timebase); bmusb::FrameAllocator::Frame make_video_frame(const AVFrame *frame, const std::string &pathname, bool *error); @@ -188,7 +216,8 @@ private: bmusb::FrameAllocator *audio_frame_allocator = nullptr; std::unique_ptr owned_video_frame_allocator; std::unique_ptr owned_audio_frame_allocator; - bmusb::frame_callback_t frame_callback = nullptr; + frame_callback_t frame_callback = nullptr; + audio_callback_t audio_callback = nullptr; SwsContextWithDeleter sws_ctx; int sws_last_width = -1, sws_last_height = -1, sws_last_src_format = -1; diff --git a/flags.cpp b/flags.cpp index fc1bc88..773750d 100644 --- a/flags.cpp +++ b/flags.cpp @@ -60,29 +60,35 @@ enum LongOption { OPTION_INPUT_YCBCR_INTERPRETATION, }; -void usage() +void usage(Program program) { - fprintf(stderr, "Usage: nageru [OPTION]...\n"); + if (program == PROGRAM_KAERU) { + fprintf(stderr, "Usage: kaeru [OPTION]... SOURCE_URL\n"); + } else { + fprintf(stderr, "Usage: nageru [OPTION]...\n"); + } fprintf(stderr, "\n"); fprintf(stderr, " --help print usage information\n"); fprintf(stderr, " -w, --width output width in pixels (default 1280)\n"); fprintf(stderr, " -h, --height output height in pixels (default 720)\n"); - fprintf(stderr, " -c, --num-cards set number of input cards (default 2)\n"); - fprintf(stderr, " -o, --output-card=CARD also output signal to the given card (default none)\n"); - fprintf(stderr, " -t, --theme=FILE choose theme (default theme.lua)\n"); - fprintf(stderr, " -I, --theme-dir=DIR search for theme in this directory (can be given multiple times)\n"); - fprintf(stderr, " -r, --recording-dir=DIR where to store disk recording\n"); - fprintf(stderr, " -v, --va-display=SPEC VA-API device for H.264 encoding\n"); - fprintf(stderr, " ($DISPLAY spec or /dev/dri/render* path)\n"); - fprintf(stderr, " -m, --map-signal=SIGNAL,CARD set a default card mapping (can be given multiple times)\n"); - fprintf(stderr, " -M, --input-mapping=FILE start with the given audio input mapping (implies --multichannel)\n"); - fprintf(stderr, " --multichannel start in multichannel audio mapping mode\n"); - fprintf(stderr, " --midi-mapping=FILE start with the given MIDI controller mapping (implies --multichannel)\n"); - fprintf(stderr, " --fake-cards-audio make fake (disconnected) cards output a simple tone\n"); - fprintf(stderr, " --http-uncompressed-video send uncompressed NV12 video to HTTP clients\n"); - fprintf(stderr, " --http-x264-video send x264-compressed video to HTTP clients\n"); - fprintf(stderr, " --record-x264-video store x264-compressed video to disk (implies --http-x264-video,\n"); - fprintf(stderr, " removes the need for working VA-API encoding)\n"); + if (program == PROGRAM_NAGERU) { + fprintf(stderr, " -c, --num-cards set number of input cards (default 2)\n"); + fprintf(stderr, " -o, --output-card=CARD also output signal to the given card (default none)\n"); + fprintf(stderr, " -t, --theme=FILE choose theme (default theme.lua)\n"); + fprintf(stderr, " -I, --theme-dir=DIR search for theme in this directory (can be given multiple times)\n"); + fprintf(stderr, " -r, --recording-dir=DIR where to store disk recording\n"); + fprintf(stderr, " -v, --va-display=SPEC VA-API device for H.264 encoding\n"); + fprintf(stderr, " ($DISPLAY spec or /dev/dri/render* path)\n"); + fprintf(stderr, " -m, --map-signal=SIGNAL,CARD set a default card mapping (can be given multiple times)\n"); + fprintf(stderr, " -M, --input-mapping=FILE start with the given audio input mapping (implies --multichannel)\n"); + fprintf(stderr, " --multichannel start in multichannel audio mapping mode\n"); + fprintf(stderr, " --midi-mapping=FILE start with the given MIDI controller mapping (implies --multichannel)\n"); + fprintf(stderr, " --fake-cards-audio make fake (disconnected) cards output a simple tone\n"); + fprintf(stderr, " --http-uncompressed-video send uncompressed NV12 video to HTTP clients\n"); + fprintf(stderr, " --http-x264-video send x264-compressed video to HTTP clients\n"); + fprintf(stderr, " --record-x264-video store x264-compressed video to disk (implies --http-x264-video,\n"); + fprintf(stderr, " removes the need for working VA-API encoding)\n"); + } fprintf(stderr, " --x264-preset x264 quality preset (default " X264_DEFAULT_PRESET ")\n"); fprintf(stderr, " --x264-tune x264 tuning (default " X264_DEFAULT_TUNE ", can be blank)\n"); fprintf(stderr, " --x264-speedcontrol try to match x264 preset to available CPU speed\n"); @@ -103,45 +109,47 @@ void usage() DEFAULT_AUDIO_OUTPUT_BIT_RATE / 1000); fprintf(stderr, " --http-coarse-timebase use less timebase for HTTP (recommended for muxers\n"); fprintf(stderr, " that handle large pts poorly, like e.g. MP4)\n"); - fprintf(stderr, " --flat-audio start with most audio processing turned off\n"); - fprintf(stderr, " (can be overridden by e.g. --enable-limiter)\n"); - fprintf(stderr, " --gain-staging=DB set initial gain staging to the given value\n"); - fprintf(stderr, " (--disable-gain-staging-auto)\n"); - fprintf(stderr, " --disable-locut turn off locut filter (also --enable)\n"); - fprintf(stderr, " --disable-gain-staging-auto turn off automatic gain staging (also --enable)\n"); - fprintf(stderr, " --disable-compressor turn off regular compressor (also --enable)\n"); - fprintf(stderr, " --disable-limiter turn off limiter (also --enable)\n"); - fprintf(stderr, " --disable-makeup-gain-auto turn off auto-adjustment of final makeup gain (also --enable)\n"); - fprintf(stderr, " --disable-alsa-output disable audio monitoring via ALSA\n"); - fprintf(stderr, " --no-flush-pbos do not explicitly signal texture data uploads\n"); - fprintf(stderr, " (will give display corruption, but makes it\n"); - fprintf(stderr, " possible to run with apitrace in real time)\n"); - fprintf(stderr, " --print-video-latency print out measurements of video latency on stdout\n"); - fprintf(stderr, " --max-input-queue-frames=FRAMES never keep more than FRAMES frames for each card\n"); - fprintf(stderr, " (default 6, minimum 1)\n"); - fprintf(stderr, " --audio-queue-length-ms=MS length of audio resampling queue (default 100.0)\n"); - fprintf(stderr, " --output-ycbcr-coefficients={rec601,rec709,auto}\n"); - fprintf(stderr, " Y'CbCr coefficient standard of output (default auto)\n"); - fprintf(stderr, " auto is rec601, unless --output-card is used\n"); - fprintf(stderr, " and a Rec. 709 mode (typically HD modes) is in use\n"); - fprintf(stderr, " --output-buffer-frames=NUM number of frames in output buffer for --output-card,\n"); - fprintf(stderr, " can be fractional (default 6.0); note also\n"); - fprintf(stderr, " the audio queue can't be much longer than this\n"); - fprintf(stderr, " --output-slop-frames=NUM if more less than this number of frames behind for\n"); - fprintf(stderr, " --output-card, try to submit anyway instead of\n"); - fprintf(stderr, " dropping the frame (default 0.5)\n"); - fprintf(stderr, " --timecode-stream show timestamp and timecode in stream\n"); - fprintf(stderr, " --timecode-stdout show timestamp and timecode on standard output\n"); - fprintf(stderr, " --10-bit-input use 10-bit video input (requires compute shaders)\n"); - fprintf(stderr, " --10-bit-output use 10-bit video output (requires compute shaders,\n"); - fprintf(stderr, " implies --record-x264-video)\n"); - fprintf(stderr, " --input-ycbcr-interpretation=CARD,{rec601,rec709,auto}[,{limited,full}]\n"); - fprintf(stderr, " Y'CbCr coefficient standard of card CARD (default auto)\n"); - fprintf(stderr, " auto is rec601 for SD, rec709 for HD, always limited\n"); - fprintf(stderr, " limited means standard 0-240/0-235 input range (for 8-bit)\n"); + if (program == PROGRAM_NAGERU) { + fprintf(stderr, " --flat-audio start with most audio processing turned off\n"); + fprintf(stderr, " (can be overridden by e.g. --enable-limiter)\n"); + fprintf(stderr, " --gain-staging=DB set initial gain staging to the given value\n"); + fprintf(stderr, " (--disable-gain-staging-auto)\n"); + fprintf(stderr, " --disable-locut turn off locut filter (also --enable)\n"); + fprintf(stderr, " --disable-gain-staging-auto turn off automatic gain staging (also --enable)\n"); + fprintf(stderr, " --disable-compressor turn off regular compressor (also --enable)\n"); + fprintf(stderr, " --disable-limiter turn off limiter (also --enable)\n"); + fprintf(stderr, " --disable-makeup-gain-auto turn off auto-adjustment of final makeup gain (also --enable)\n"); + fprintf(stderr, " --disable-alsa-output disable audio monitoring via ALSA\n"); + fprintf(stderr, " --no-flush-pbos do not explicitly signal texture data uploads\n"); + fprintf(stderr, " (will give display corruption, but makes it\n"); + fprintf(stderr, " possible to run with apitrace in real time)\n"); + fprintf(stderr, " --print-video-latency print out measurements of video latency on stdout\n"); + fprintf(stderr, " --max-input-queue-frames=FRAMES never keep more than FRAMES frames for each card\n"); + fprintf(stderr, " (default 6, minimum 1)\n"); + fprintf(stderr, " --audio-queue-length-ms=MS length of audio resampling queue (default 100.0)\n"); + fprintf(stderr, " --output-ycbcr-coefficients={rec601,rec709,auto}\n"); + fprintf(stderr, " Y'CbCr coefficient standard of output (default auto)\n"); + fprintf(stderr, " auto is rec601, unless --output-card is used\n"); + fprintf(stderr, " and a Rec. 709 mode (typically HD modes) is in use\n"); + fprintf(stderr, " --output-buffer-frames=NUM number of frames in output buffer for --output-card,\n"); + fprintf(stderr, " can be fractional (default 6.0); note also\n"); + fprintf(stderr, " the audio queue can't be much longer than this\n"); + fprintf(stderr, " --output-slop-frames=NUM if more less than this number of frames behind for\n"); + fprintf(stderr, " --output-card, try to submit anyway instead of\n"); + fprintf(stderr, " dropping the frame (default 0.5)\n"); + fprintf(stderr, " --timecode-stream show timestamp and timecode in stream\n"); + fprintf(stderr, " --timecode-stdout show timestamp and timecode on standard output\n"); + fprintf(stderr, " --10-bit-input use 10-bit video input (requires compute shaders)\n"); + fprintf(stderr, " --10-bit-output use 10-bit video output (requires compute shaders,\n"); + fprintf(stderr, " implies --record-x264-video)\n"); + fprintf(stderr, " --input-ycbcr-interpretation=CARD,{rec601,rec709,auto}[,{limited,full}]\n"); + fprintf(stderr, " Y'CbCr coefficient standard of card CARD (default auto)\n"); + fprintf(stderr, " auto is rec601 for SD, rec709 for HD, always limited\n"); + fprintf(stderr, " limited means standard 0-240/0-235 input range (for 8-bit)\n"); + } } -void parse_flags(int argc, char * const argv[]) +void parse_flags(Program program, int argc, char * const argv[]) { static const option long_options[] = { { "help", no_argument, 0, OPTION_HELP }, @@ -444,12 +452,12 @@ void parse_flags(int argc, char * const argv[]) break; } case OPTION_HELP: - usage(); + usage(program); exit(0); default: fprintf(stderr, "Unknown option '%s'\n", argv[option_index]); fprintf(stderr, "\n"); - usage(); + usage(program); exit(1); } } diff --git a/flags.h b/flags.h index f26d4db..87b05b1 100644 --- a/flags.h +++ b/flags.h @@ -65,6 +65,11 @@ struct Flags { }; extern Flags global_flags; -void parse_flags(int argc, char * const argv[]); +enum Program { + PROGRAM_NAGERU, + PROGRAM_KAERU +}; +void usage(Program program); +void parse_flags(Program program, int argc, char * const argv[]); #endif // !defined(_FLAGS_H) diff --git a/kaeru.cpp b/kaeru.cpp new file mode 100644 index 0000000..bb1b08f --- /dev/null +++ b/kaeru.cpp @@ -0,0 +1,130 @@ +// Kaeru (変える), a simple transcoder intended for use with Nageru. +// This is experimental code, not yet supported. + +#include "audio_encoder.h" +#include "defs.h" +#include "flags.h" +#include "ffmpeg_capture.h" +#include "mixer.h" +#include "mux.h" +#include "timebase.h" +#include "x264_encoder.h" + +#include +#include +#include +#include + +using namespace bmusb; +using namespace movit; +using namespace std; +using namespace std::placeholders; + +Mixer *global_mixer = nullptr; + +int write_packet(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time) +{ + static bool seen_sync_markers = false; + static string stream_mux_header; + HTTPD *httpd = (HTTPD *)opaque; + + if (type == AVIO_DATA_MARKER_SYNC_POINT || type == AVIO_DATA_MARKER_BOUNDARY_POINT) { + seen_sync_markers = true; + } else if (type == AVIO_DATA_MARKER_UNKNOWN && !seen_sync_markers) { + // We don't know if this is a keyframe or not (the muxer could + // avoid marking it), so we just have to make the best of it. + type = AVIO_DATA_MARKER_SYNC_POINT; + } + + if (type == AVIO_DATA_MARKER_HEADER) { + stream_mux_header.append((char *)buf, buf_size); + httpd->set_header(stream_mux_header); + } else { + httpd->add_data((char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT); + } + return buf_size; +} + +unique_ptr create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x264_encoder, AudioEncoder *audio_encoder) +{ + AVFormatContext *avctx = avformat_alloc_context(); + avctx->oformat = oformat; + + uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE); + avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, httpd, nullptr, nullptr, nullptr); + avctx->pb->write_data_type = &write_packet; + avctx->pb->ignore_boundary_point = 1; + + string video_extradata = x264_encoder->get_global_headers(); + + unique_ptr mux; + int time_base = global_flags.stream_coarse_timebase ? COARSE_TIMEBASE : TIMEBASE; + mux.reset(new Mux(avctx, global_flags.width, global_flags.height, Mux::CODEC_H264, video_extradata, audio_encoder->get_codec_parameters().get(), time_base, + /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {})); + return mux; +} + +void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, int64_t pts, AVRational timebase, uint16_t timecode, + FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format, + FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format) +{ + if (pts >= 0 && video_frame.len > 0) { + pts = av_rescale_q(pts, timebase, AVRational{ 1, TIMEBASE }); + int64_t frame_duration = TIMEBASE * video_format.frame_rate_nom / video_format.frame_rate_den; + x264_encoder->add_frame(pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps()); + } + + if (video_frame.owner) { + video_frame.owner->release_frame(video_frame); + } + if (audio_frame.owner) { + audio_frame.owner->release_frame(audio_frame); + } +} + +void audio_frame_callback(Mux *mux, const AVPacket *pkt, AVRational timebase) +{ + mux->add_packet(*pkt, pkt->pts, pkt->dts == AV_NOPTS_VALUE ? pkt->pts : pkt->dts, timebase); +} + +int main(int argc, char *argv[]) +{ + parse_flags(PROGRAM_KAERU, argc, argv); + if (optind + 1 != argc) { + usage(PROGRAM_KAERU); + exit(1); + } + + av_register_all(); + avformat_network_init(); + + HTTPD httpd; + + AVOutputFormat *oformat = av_guess_format(global_flags.stream_mux_name.c_str(), nullptr, nullptr); + assert(oformat != nullptr); + + unique_ptr audio_encoder; + if (global_flags.stream_audio_codec_name.empty()) { + audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat)); + } else { + audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat)); + } + + X264Encoder x264_encoder(oformat); + unique_ptr http_mux = create_mux(&httpd, oformat, &x264_encoder, audio_encoder.get()); + x264_encoder.add_mux(http_mux.get()); + + FFmpegCapture video(argv[optind], global_flags.width, global_flags.height); + video.set_pixel_format(FFmpegCapture::PixelFormat_NV12); + video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, _1, _2, _3, _4, _5, _6, _7, _8, _9)); + video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2)); + video.configure_card(); + video.start_bm_capture(); + video.change_rate(2.0); // Be sure never to really fall behind, but also don't dump huge amounts of stuff onto x264. + + httpd.start(9095); + + for ( ;; ) { + sleep(3600); + } +} diff --git a/main.cpp b/main.cpp index dcaf2a6..fb67e0e 100644 --- a/main.cpp +++ b/main.cpp @@ -21,7 +21,7 @@ extern "C" { int main(int argc, char *argv[]) { - parse_flags(argc, argv); + parse_flags(PROGRAM_NAGERU, argc, argv); if (global_flags.va_display.empty() || global_flags.va_display[0] != '/') { diff --git a/mux.cpp b/mux.cpp index 5c6a150..f52f795 100644 --- a/mux.cpp +++ b/mux.cpp @@ -145,7 +145,7 @@ Mux::~Mux() avformat_free_context(avctx); } -void Mux::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts) +void Mux::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, AVRational timebase) { AVPacket pkt_copy; if (av_copy_packet(&pkt_copy, &pkt) < 0) { @@ -153,13 +153,13 @@ void Mux::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts) exit(1); } if (pkt.stream_index == 0) { - pkt_copy.pts = av_rescale_q(pts, AVRational{1, TIMEBASE}, avstream_video->time_base); - pkt_copy.dts = av_rescale_q(dts, AVRational{1, TIMEBASE}, avstream_video->time_base); - pkt_copy.duration = av_rescale_q(pkt.duration, AVRational{1, TIMEBASE}, avstream_video->time_base); + pkt_copy.pts = av_rescale_q(pts, timebase, avstream_video->time_base); + pkt_copy.dts = av_rescale_q(dts, timebase, avstream_video->time_base); + pkt_copy.duration = av_rescale_q(pkt.duration, timebase, avstream_video->time_base); } else if (pkt.stream_index == 1) { - pkt_copy.pts = av_rescale_q(pts, AVRational{1, TIMEBASE}, avstream_audio->time_base); - pkt_copy.dts = av_rescale_q(dts, AVRational{1, TIMEBASE}, avstream_audio->time_base); - pkt_copy.duration = av_rescale_q(pkt.duration, AVRational{1, TIMEBASE}, avstream_audio->time_base); + pkt_copy.pts = av_rescale_q(pts, timebase, avstream_audio->time_base); + pkt_copy.dts = av_rescale_q(dts, timebase, avstream_audio->time_base); + pkt_copy.duration = av_rescale_q(pkt.duration, timebase, avstream_audio->time_base); } else { assert(false); } diff --git a/mux.h b/mux.h index 5bf3e41..e6193e0 100644 --- a/mux.h +++ b/mux.h @@ -18,6 +18,8 @@ extern "C" { #include #include +#include "timebase.h" + struct MuxMetrics { // “written” will usually be equal video + audio + mux overhead, // except that there could be buffered packets that count in audio or video @@ -60,7 +62,7 @@ public: // will be added to. Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecParameters *audio_codecpar, int time_base, std::function write_callback, WriteStrategy write_strategy, const std::vector &metrics); ~Mux(); - void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts); + void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, AVRational timebase = { 1, TIMEBASE }); // As long as the mux is plugged, it will not actually write anything to disk, // just queue the packets. Once it is unplugged, the packets are reordered by pts