From 3ed9b0fbb73071284aa7fa221ce0373d2dadbc85 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 11 Oct 2015 16:29:00 +0200 Subject: [PATCH] Send audio all the way through to the encoder and muxer. --- h264encode.cpp | 103 +++++++++++++++++++++++++++++++++++++------------ h264encode.h | 7 +++- mixer.cpp | 45 ++++++++++----------- mixer.h | 3 +- resampler.h | 2 +- 5 files changed, 107 insertions(+), 53 deletions(-) diff --git a/h264encode.cpp b/h264encode.cpp index 99fdc11..ddbcb0a 100644 --- a/h264encode.cpp +++ b/h264encode.cpp @@ -1587,21 +1587,55 @@ int H264Encoder::save_codeddata(storage_task task) } vaUnmapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf); - AVPacket pkt; - memset(&pkt, 0, sizeof(pkt)); - pkt.buf = nullptr; - pkt.pts = av_rescale_q(task.display_order, AVRational{1, frame_rate}, avstream->time_base); - pkt.dts = av_rescale_q(task.encode_order, AVRational{1, frame_rate}, avstream->time_base); - pkt.data = reinterpret_cast(&data[0]); - pkt.size = data.size(); - pkt.stream_index = 0; - if (task.frame_type == FRAME_IDR || task.frame_type == FRAME_I) { - pkt.flags = AV_PKT_FLAG_KEY; - } else { - pkt.flags = 0; + { + // Add video. + AVPacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.buf = nullptr; + pkt.pts = av_rescale_q(task.display_order + 2, AVRational{1, frame_rate}, avstream_video->time_base); // FIXME: delay + pkt.dts = av_rescale_q(task.encode_order + 2, AVRational{1, frame_rate}, avstream_video->time_base); // FIXME: delay + pkt.data = reinterpret_cast(&data[0]); + pkt.size = data.size(); + pkt.stream_index = 0; + if (task.frame_type == FRAME_IDR || task.frame_type == FRAME_I) { + pkt.flags = AV_PKT_FLAG_KEY; + } else { + pkt.flags = 0; + } + //pkt.duration = 1; + av_interleaved_write_frame(avctx, &pkt); + } + { + // Add audio. + AVFrame *frame = avcodec_alloc_frame(); + frame->nb_samples = task.audio.size() / 2; + frame->format = AV_SAMPLE_FMT_FLT; + frame->channel_layout = AV_CH_LAYOUT_STEREO; + + unique_ptr planar_samples(new float[task.audio.size()]); + avcodec_fill_audio_frame(frame, 2, AV_SAMPLE_FMT_FLTP, (const uint8_t*)planar_samples.get(), task.audio.size() * sizeof(float), 0); + for (int i = 0; i < frame->nb_samples; ++i) { + planar_samples[i] = task.audio[i * 2 + 0]; + planar_samples[i + frame->nb_samples] = task.audio[i * 2 + 1]; + } + + AVPacket pkt; + av_init_packet(&pkt); + pkt.data = nullptr; + pkt.size = 0; + int got_output; + avcodec_encode_audio2(avstream_audio->codec, &pkt, frame, &got_output); + if (got_output) { + pkt.pts = av_rescale_q(task.display_order, AVRational{1, frame_rate}, avstream_audio->time_base); // FIXME + pkt.stream_index = 1; + av_interleaved_write_frame(avctx, &pkt); + } + // TODO: Delayed frames. + avcodec_free_frame(&frame); } - pkt.duration = 1; - av_interleaved_write_frame(avctx, &pkt); + + static FILE *audiofp = fopen("audio.raw", "wb"); + fwrite(&task.audio[0], 4 * task.audio.size(), 1, audiofp); #if 0 printf("\r "); /* return back to startpoint */ @@ -1725,17 +1759,37 @@ H264Encoder::H264Encoder(QSurface *surface, int width, int height, const char *o fprintf(stderr, "%s: avio_open2() failed\n", output_filename); exit(1); } - AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_H264); - avstream = avformat_new_stream(avctx, codec); - if (avstream == nullptr) { + AVCodec *codec_video = avcodec_find_encoder(AV_CODEC_ID_H264); + avstream_video = avformat_new_stream(avctx, codec_video); + if (avstream_video == nullptr) { + fprintf(stderr, "%s: avformat_new_stream() failed\n", output_filename); + exit(1); + } + avstream_video->time_base = AVRational{1, frame_rate}; + avstream_video->codec->width = width; + avstream_video->codec->height = height; + avstream_video->codec->time_base = AVRational{1, frame_rate}; + avstream_video->codec->ticks_per_frame = 1; // or 2? + + AVCodec *codec_audio = avcodec_find_encoder(AV_CODEC_ID_MP3); + avstream_audio = avformat_new_stream(avctx, codec_audio); + if (avstream_audio == nullptr) { fprintf(stderr, "%s: avformat_new_stream() failed\n", output_filename); exit(1); } - avstream->time_base = AVRational{1, frame_rate}; - avstream->codec->width = width; - avstream->codec->height = height; - avstream->codec->time_base = AVRational{1, frame_rate}; - avstream->codec->ticks_per_frame = 1; // or 2? + avstream_audio->time_base = AVRational{1, frame_rate}; + avstream_audio->codec->bit_rate = 256000; + avstream_audio->codec->sample_rate = 48000; + avstream_audio->codec->sample_fmt = AV_SAMPLE_FMT_FLTP; + avstream_audio->codec->channels = 2; + avstream_audio->codec->channel_layout = AV_CH_LAYOUT_STEREO; + avstream_audio->codec->time_base = AVRational{1, frame_rate}; + + /* open it */ + if (avcodec_open2(avstream_audio->codec, codec_audio, NULL) < 0) { + fprintf(stderr, "Could not open codec\n"); + exit(1); + } if (avformat_write_header(avctx, NULL) < 0) { fprintf(stderr, "%s: avformat_write_header() failed\n", output_filename); @@ -1862,11 +1916,11 @@ bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex) return true; } -void H264Encoder::end_frame(RefCountedGLsync fence, const std::vector &input_frames) +void H264Encoder::end_frame(RefCountedGLsync fence, std::vector audio, const std::vector &input_frames) { { unique_lock lock(frame_queue_mutex); - pending_frames[current_storage_frame++] = PendingFrame{ fence, input_frames }; + pending_frames[current_storage_frame++] = PendingFrame{ fence, input_frames, move(audio) }; } frame_queue_nonempty.notify_one(); } @@ -1934,6 +1988,7 @@ void H264Encoder::copy_thread_func() tmp.display_order = current_frame_display; tmp.encode_order = current_frame_encoding; tmp.frame_type = current_frame_type; + tmp.audio = move(frame.audio); storage_task_enqueue(move(tmp)); update_ReferenceFrames(); diff --git a/h264encode.h b/h264encode.h index 636d6a8..5811319 100644 --- a/h264encode.h +++ b/h264encode.h @@ -67,13 +67,14 @@ public: void #endif bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); - void end_frame(RefCountedGLsync fence, const std::vector &input_frames); + void end_frame(RefCountedGLsync fence, std::vector audio, const std::vector &input_frames); private: struct storage_task { unsigned long long display_order; unsigned long long encode_order; int frame_type; + std::vector audio; }; void copy_thread_func(); @@ -100,12 +101,14 @@ private: struct PendingFrame { RefCountedGLsync fence; std::vector input_frames; + std::vector audio; }; std::map pending_frames; QSurface *surface; AVFormatContext *avctx; - AVStream *avstream; + AVStream *avstream_video; + AVStream *avstream_audio; }; #endif diff --git a/mixer.cpp b/mixer.cpp index c126c0b..03773b7 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -201,38 +201,22 @@ void Mixer::bm_frame(int card_index, uint16_t timecode, GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); check_error(); assert(fence != nullptr); + + // Convert the audio to stereo fp32 and store it next to the video. + size_t num_samples = (audio_frame.len - audio_offset) / 8 / 3; + vector audio; + audio.resize(num_samples * 2); + convert_fixed24_to_fp32(&audio[0], 2, audio_frame.data + audio_offset, 8, num_samples); + { std::unique_lock lock(bmusb_mutex); card->new_data_ready = true; card->new_frame = RefCountedFrame(video_frame); card->new_data_ready_fence = fence; + card->new_frame_audio = move(audio); card->new_data_ready_changed.notify_all(); } - // As a test of the resampler, send the data from card 0 through it and onto disk. - // TODO: Send the audio on, and encode it through ffmpeg. - if (card_index == 0) { - size_t num_samples = (audio_frame.len - audio_offset) / 8 / 3; - double pts = timecode / 60.0; // FIXME: Unwrap. And rebase. - unique_ptr samplesf(new float[num_samples * 2]); - convert_fixed24_to_fp32(samplesf.get(), 2, audio_frame.data + audio_offset, 8, num_samples); - card->resampler->add_input_samples(pts, samplesf.get(), num_samples); - - float samples_out[(48000 / 60) * 2]; - card->resampler->get_output_samples(pts, samples_out, 48000 / 60); - - static FILE *audiofp = nullptr; - if (audiofp == nullptr) { - audiofp = fopen("audio.raw", "wb"); - } - fwrite(samples_out, sizeof(samples_out), 1, audiofp); - //fwrite(samplesf.get(), num_samples * sizeof(float) * 2, 1, audiofp); - - if (audio_frame.len - audio_offset != 19200) { - printf("%d: %d samples (%d bytes)\n", card_index, int(num_samples), int(audio_frame.len - audio_offset)); - } - } - // Video frame will be released when last user of card->new_frame goes out of scope. card->usb->get_audio_frame_allocator()->release_frame(audio_frame); } @@ -267,6 +251,7 @@ void Mixer::thread_func() card_copy[card_index].new_data_ready = card->new_data_ready; card_copy[card_index].new_frame = card->new_frame; card_copy[card_index].new_data_ready_fence = card->new_data_ready_fence; + card_copy[card_index].new_frame_audio = move(card->new_frame_audio); card->new_data_ready = false; card->new_data_ready_changed.notify_all(); } @@ -321,6 +306,14 @@ void Mixer::thread_func() RefCountedGLsync fence(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); check_error(); + // Resample the audio as needed. + // TODO: Allow using audio from the other card(s) as well. + double pts = frame / 60.0; + cards[0].resampler->add_input_samples(pts, card_copy[0].new_frame_audio.data(), card_copy[0].new_frame_audio.size() / 2); + vector samples_out; + samples_out.resize((48000 / 60) * 2); + cards[0].resampler->get_output_samples(pts, &samples_out[0], 48000 / 60); + // Make sure the H.264 gets a reference to all the // input frames needed, so that they are not released back // until the rendering is done. @@ -328,7 +321,7 @@ void Mixer::thread_func() for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { input_frames.push_back(bmusb_current_rendering_frame[card_index]); } - h264_encoder->end_frame(fence, input_frames); + h264_encoder->end_frame(fence, move(samples_out), input_frames); // The live frame just shows the RGBA texture we just rendered. // It owns rgba_tex now. @@ -364,6 +357,7 @@ void Mixer::thread_func() // chain->print_phase_timing(); } +#if 0 // Reset every 100 frames, so that local variations in frame times // (especially for the first few frames, when the shaders are // compiled etc.) don't make it hard to measure for the entire @@ -372,6 +366,7 @@ void Mixer::thread_func() frame = 0; start = now; } +#endif check_error(); } diff --git a/mixer.h b/mixer.h index f4ea5c9..9b631e4 100644 --- a/mixer.h +++ b/mixer.h @@ -113,9 +113,10 @@ private: QSurface *surface; QOpenGLContext *context; - bool new_data_ready = false; // Whether new_frame contains anything. + bool new_data_ready = false; // Whether new_frame and new_frame_audio contains anything. RefCountedFrame new_frame; GLsync new_data_ready_fence; // Whether new_frame is ready for rendering. + std::vector new_frame_audio; std::condition_variable new_data_ready_changed; // Set whenever new_data_ready is changed. Resampler *resampler = nullptr; }; diff --git a/resampler.h b/resampler.h index 80ed1af..aa7af3d 100644 --- a/resampler.h +++ b/resampler.h @@ -80,7 +80,7 @@ private: // How much delay we are expected to have, in input samples. // If actual delay drifts too much away from this, we will start // changing the resampling ratio to compensate. - double expected_delay = 4800.0; + double expected_delay = 1600.0; // Input samples not yet fed into the resampler. // TODO: Use a circular buffer instead, for efficiency. -- 2.39.2