]> git.sesse.net Git - nageru/blobdiff - h264encode.cpp
Rework entire pts handling.
[nageru] / h264encode.cpp
index ddbcb0a2820f87ed4d59e29236f9ff6e07c973d1..558bf4f7868c192dda50939d99e7aa1320a80442 100644 (file)
@@ -26,6 +26,7 @@
 #include <thread>
 
 #include "context.h"
+#include "timebase.h"
 
 class QOpenGLContext;
 class QSurface;
@@ -117,7 +118,7 @@ static  int initial_qp = 15;
 static  int minimal_qp = 0;
 static  int intra_period = 30;
 static  int intra_idr_period = 60;
-static  int ip_period = 1;
+static  int ip_period = 3;
 static  int rc_mode = -1;
 static  int rc_default_modes[] = {
     VA_RC_VBR,
@@ -343,7 +344,7 @@ static void sps_rbsp(bitstream *bs)
         bitstream_put_ui(bs, 1, 1); /* timing_info_present_flag */
         {
             bitstream_put_ui(bs, 1, 32);  // FPS
-            bitstream_put_ui(bs, frame_rate * 2, 32);  // FPS
+            bitstream_put_ui(bs, TIMEBASE * 2, 32);  // FPS
             bitstream_put_ui(bs, 1, 1);
         }
         bitstream_put_ui(bs, 1, 1); /* nal_hrd_parameters_present_flag */
@@ -1252,7 +1253,7 @@ static int render_sequence(void)
 
     seq_param.max_num_ref_frames = num_ref_frames;
     seq_param.seq_fields.bits.frame_mbs_only_flag = 1;
-    seq_param.time_scale = frame_rate * 2;
+    seq_param.time_scale = TIMEBASE * 2;
     seq_param.num_units_in_tick = 1; /* Tc = num_units_in_tick / scale */
     seq_param.seq_fields.bits.log2_max_pic_order_cnt_lsb_minus4 = Log2MaxPicOrderCntLsb - 4;
     seq_param.seq_fields.bits.log2_max_frame_num_minus4 = Log2MaxFrameNum - 4;;
@@ -1587,13 +1588,23 @@ int H264Encoder::save_codeddata(storage_task task)
     }
     vaUnmapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf);
 
+    const int64_t pts_dts_delay = (ip_period - 1) * (TIMEBASE / frame_rate);  // FIXME: Wrong for variable frame rate.
+    const int64_t av_delay = TIMEBASE / 10;  // Corresponds to the fixed delay in resampler.h. TODO: Make less hard-coded.
+    int64_t pts, dts;
     {
+        {
+             unique_lock<mutex> lock(frame_queue_mutex);
+             assert(timestamps.count(task.display_order));
+             assert(timestamps.count(task.encode_order));
+             pts = timestamps[task.display_order];
+             dts = timestamps[task.encode_order];
+        }
         // Add video.
         AVPacket pkt;
         memset(&pkt, 0, sizeof(pkt));
         pkt.buf = nullptr;
-        pkt.pts = av_rescale_q(task.display_order + 2, AVRational{1, frame_rate}, avstream_video->time_base);  // FIXME: delay
-        pkt.dts = av_rescale_q(task.encode_order + 2, AVRational{1, frame_rate}, avstream_video->time_base);  // FIXME: delay
+        pkt.pts = av_rescale_q(pts + av_delay + pts_dts_delay, AVRational{1, TIMEBASE}, avstream_video->time_base);
+        pkt.dts = av_rescale_q(dts + av_delay, AVRational{1, TIMEBASE}, avstream_video->time_base);
         pkt.data = reinterpret_cast<uint8_t *>(&data[0]);
         pkt.size = data.size();
         pkt.stream_index = 0;
@@ -1605,18 +1616,30 @@ int H264Encoder::save_codeddata(storage_task task)
         //pkt.duration = 1;
         av_interleaved_write_frame(avctx, &pkt);
     }
-    {
-        // Add audio.
+    // Encode and add all audio frames up to and including the pts of this video frame.
+    // (They can never be queued to us after the video frame they belong to, only before.)
+    for ( ;; ) {
+        int64_t audio_pts;
+        std::vector<float> audio;
+        {
+             unique_lock<mutex> lock(frame_queue_mutex);
+             if (pending_audio_frames.empty()) break;
+             auto it = pending_audio_frames.begin();
+             if (it->first > int(pts)) break;
+             audio_pts = it->first;
+             audio = move(it->second);
+             pending_audio_frames.erase(it); 
+        }
         AVFrame *frame = avcodec_alloc_frame();
-        frame->nb_samples = task.audio.size() / 2;
+        frame->nb_samples = audio.size() / 2;
         frame->format = AV_SAMPLE_FMT_FLT;
         frame->channel_layout = AV_CH_LAYOUT_STEREO;
 
-        unique_ptr<float[]> planar_samples(new float[task.audio.size()]);
-        avcodec_fill_audio_frame(frame, 2, AV_SAMPLE_FMT_FLTP, (const uint8_t*)planar_samples.get(), task.audio.size() * sizeof(float), 0);
+        unique_ptr<float[]> planar_samples(new float[audio.size()]);
+        avcodec_fill_audio_frame(frame, 2, AV_SAMPLE_FMT_FLTP, (const uint8_t*)planar_samples.get(), audio.size() * sizeof(float), 0);
         for (int i = 0; i < frame->nb_samples; ++i) {
-            planar_samples[i] = task.audio[i * 2 + 0];
-            planar_samples[i + frame->nb_samples] = task.audio[i * 2 + 1];
+            planar_samples[i] = audio[i * 2 + 0];
+            planar_samples[i + frame->nb_samples] = audio[i * 2 + 1];
         }
 
         AVPacket pkt;
@@ -1626,16 +1649,18 @@ int H264Encoder::save_codeddata(storage_task task)
         int got_output;
         avcodec_encode_audio2(avstream_audio->codec, &pkt, frame, &got_output);
         if (got_output) {
-            pkt.pts = av_rescale_q(task.display_order, AVRational{1, frame_rate}, avstream_audio->time_base);  // FIXME
+            pkt.pts = av_rescale_q(audio_pts + pts_dts_delay, AVRational{1, TIMEBASE}, avstream_audio->time_base);
+            pkt.dts = pkt.pts;
             pkt.stream_index = 1;
             av_interleaved_write_frame(avctx, &pkt);
         }
         // TODO: Delayed frames.
         avcodec_free_frame(&frame);
     }
-
-    static FILE *audiofp = fopen("audio.raw", "wb");
-    fwrite(&task.audio[0], 4 * task.audio.size(), 1, audiofp);
+    {
+        unique_lock<mutex> lock(frame_queue_mutex);
+        timestamps.erase(task.encode_order - (ip_period - 1));
+    }
 
 #if 0
     printf("\r      "); /* return back to startpoint */
@@ -1765,10 +1790,10 @@ H264Encoder::H264Encoder(QSurface *surface, int width, int height, const char *o
                fprintf(stderr, "%s: avformat_new_stream() failed\n", output_filename);
                exit(1);
        }
-       avstream_video->time_base = AVRational{1, frame_rate};
+       avstream_video->time_base = AVRational{1, TIMEBASE};
        avstream_video->codec->width = width;
        avstream_video->codec->height = height;
-       avstream_video->codec->time_base = AVRational{1, frame_rate};
+       avstream_video->codec->time_base = AVRational{1, TIMEBASE};
        avstream_video->codec->ticks_per_frame = 1;  // or 2?
 
        AVCodec *codec_audio = avcodec_find_encoder(AV_CODEC_ID_MP3);
@@ -1777,13 +1802,13 @@ H264Encoder::H264Encoder(QSurface *surface, int width, int height, const char *o
                fprintf(stderr, "%s: avformat_new_stream() failed\n", output_filename);
                exit(1);
        }
-       avstream_audio->time_base = AVRational{1, frame_rate};
+       avstream_audio->time_base = AVRational{1, TIMEBASE};
        avstream_audio->codec->bit_rate = 256000;
        avstream_audio->codec->sample_rate = 48000;
        avstream_audio->codec->sample_fmt = AV_SAMPLE_FMT_FLTP;
        avstream_audio->codec->channels = 2;
        avstream_audio->codec->channel_layout = AV_CH_LAYOUT_STEREO;
-       avstream_audio->codec->time_base = AVRational{1, frame_rate};
+       avstream_audio->codec->time_base = AVRational{1, TIMEBASE};
 
        /* open it */
        if (avcodec_open2(avstream_audio->codec, codec_audio, NULL) < 0) {
@@ -1916,11 +1941,23 @@ bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
        return true;
 }
 
-void H264Encoder::end_frame(RefCountedGLsync fence, std::vector<float> audio, const std::vector<RefCountedFrame> &input_frames)
+void H264Encoder::add_audio(int64_t pts, std::vector<float> audio)
+{
+       {
+               unique_lock<mutex> lock(frame_queue_mutex);
+               pending_audio_frames[pts] = move(audio);
+       }
+       frame_queue_nonempty.notify_one();
+}
+
+
+void H264Encoder::end_frame(RefCountedGLsync fence, int64_t pts, const std::vector<RefCountedFrame> &input_frames)
 {
        {
                unique_lock<mutex> lock(frame_queue_mutex);
-               pending_frames[current_storage_frame++] = PendingFrame{ fence, input_frames, move(audio) };
+               pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames };
+               timestamps[current_storage_frame] = pts;
+               ++current_storage_frame;
        }
        frame_queue_nonempty.notify_one();
 }
@@ -1939,10 +1976,10 @@ void H264Encoder::copy_thread_func()
 
                {
                        unique_lock<mutex> lock(frame_queue_mutex);
-                       frame_queue_nonempty.wait(lock, [this]{ return copy_thread_should_quit || pending_frames.count(current_frame_display) != 0; });
+                       frame_queue_nonempty.wait(lock, [this]{ return copy_thread_should_quit || pending_video_frames.count(current_frame_display) != 0; });
                        if (copy_thread_should_quit) return;
-                       frame = move(pending_frames[current_frame_display]);
-                       pending_frames.erase(current_frame_display);
+                       frame = move(pending_video_frames[current_frame_display]);
+                       pending_video_frames.erase(current_frame_display);
                }
 
                // Wait for the GPU to be done with the frame.
@@ -1988,7 +2025,6 @@ void H264Encoder::copy_thread_func()
                tmp.display_order = current_frame_display;
                tmp.encode_order = current_frame_encoding;
                tmp.frame_type = current_frame_type;
-               tmp.audio = move(frame.audio);
                storage_task_enqueue(move(tmp));
                
                update_ReferenceFrames();