]> git.sesse.net Git - nageru/blob - futatabi/video_stream.cpp
Set CEF autoplay policy to be more lenient.
[nageru] / futatabi / video_stream.cpp
1 #include "video_stream.h"
2
3 extern "C" {
4 #include <libavformat/avformat.h>
5 #include <libavformat/avio.h>
6 #include <libavutil/channel_layout.h>
7 }
8
9 #include "chroma_subsampler.h"
10 #include "exif_parser.h"
11 #include "flags.h"
12 #include "flow.h"
13 #include "jpeg_frame_view.h"
14 #include "movit/util.h"
15 #include "pbo_pool.h"
16 #include "player.h"
17 #include "shared/context.h"
18 #include "shared/ffmpeg_raii.h"
19 #include "shared/httpd.h"
20 #include "shared/metrics.h"
21 #include "shared/shared_defs.h"
22 #include "shared/mux.h"
23 #include "util.h"
24 #include "ycbcr_converter.h"
25
26 #include <epoxy/glx.h>
27 #include <jpeglib.h>
28 #include <unistd.h>
29
30 using namespace movit;
31 using namespace std;
32 using namespace std::chrono;
33
34 namespace {
35
36 once_flag video_metrics_inited;
37 Summary metric_jpeg_encode_time_seconds;
38 Summary metric_fade_latency_seconds;
39 Summary metric_interpolation_latency_seconds;
40 Summary metric_fade_fence_wait_time_seconds;
41 Summary metric_interpolation_fence_wait_time_seconds;
42
43 void wait_for_upload(shared_ptr<Frame> &frame)
44 {
45         if (frame->uploaded_interpolation != nullptr) {
46                 glWaitSync(frame->uploaded_interpolation.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
47                 frame->uploaded_interpolation.reset();
48         }
49 }
50
51 }  // namespace
52
53 extern HTTPD *global_httpd;
54
55 struct VectorDestinationManager {
56         jpeg_destination_mgr pub;
57         string dest;
58
59         VectorDestinationManager()
60         {
61                 pub.init_destination = init_destination_thunk;
62                 pub.empty_output_buffer = empty_output_buffer_thunk;
63                 pub.term_destination = term_destination_thunk;
64         }
65
66         static void init_destination_thunk(j_compress_ptr ptr)
67         {
68                 ((VectorDestinationManager *)(ptr->dest))->init_destination();
69         }
70
71         inline void init_destination()
72         {
73                 make_room(0);
74         }
75
76         static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
77         {
78                 return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
79         }
80
81         inline bool empty_output_buffer()
82         {
83                 make_room(dest.size());  // Should ignore pub.free_in_buffer!
84                 return true;
85         }
86
87         inline void make_room(size_t bytes_used)
88         {
89                 dest.resize(bytes_used + 4096);
90                 dest.resize(dest.capacity());
91                 pub.next_output_byte = (uint8_t *)dest.data() + bytes_used;
92                 pub.free_in_buffer = dest.size() - bytes_used;
93         }
94
95         static void term_destination_thunk(j_compress_ptr ptr)
96         {
97                 ((VectorDestinationManager *)(ptr->dest))->term_destination();
98         }
99
100         inline void term_destination()
101         {
102                 dest.resize(dest.size() - pub.free_in_buffer);
103         }
104 };
105 static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
106
107 string encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t *cr_data, unsigned width, unsigned height, const string exif_data)
108 {
109         steady_clock::time_point start = steady_clock::now();
110         VectorDestinationManager dest;
111
112         jpeg_compress_struct cinfo;
113         jpeg_error_mgr jerr;
114         cinfo.err = jpeg_std_error(&jerr);
115         jpeg_create_compress(&cinfo);
116
117         cinfo.dest = (jpeg_destination_mgr *)&dest;
118         cinfo.input_components = 3;
119         cinfo.in_color_space = JCS_RGB;
120         jpeg_set_defaults(&cinfo);
121         constexpr int quality = 90;
122         jpeg_set_quality(&cinfo, quality, /*force_baseline=*/false);
123
124         cinfo.image_width = width;
125         cinfo.image_height = height;
126         cinfo.raw_data_in = true;
127         jpeg_set_colorspace(&cinfo, JCS_YCbCr);
128         cinfo.comp_info[0].h_samp_factor = 2;
129         cinfo.comp_info[0].v_samp_factor = 1;
130         cinfo.comp_info[1].h_samp_factor = 1;
131         cinfo.comp_info[1].v_samp_factor = 1;
132         cinfo.comp_info[2].h_samp_factor = 1;
133         cinfo.comp_info[2].v_samp_factor = 1;
134         cinfo.CCIR601_sampling = true;  // Seems to be mostly ignored by libjpeg, though.
135         jpeg_start_compress(&cinfo, true);
136
137         // This comment marker is private to FFmpeg. It signals limited Y'CbCr range
138         // (and nothing else).
139         jpeg_write_marker(&cinfo, JPEG_COM, (const JOCTET *)"CS=ITU601", strlen("CS=ITU601"));
140
141         if (!exif_data.empty()) {
142                 jpeg_write_marker(&cinfo, JPEG_APP0 + 1, (const JOCTET *)exif_data.data(), exif_data.size());
143         }
144
145         JSAMPROW yptr[8], cbptr[8], crptr[8];
146         JSAMPARRAY data[3] = { yptr, cbptr, crptr };
147         for (unsigned y = 0; y < height; y += 8) {
148                 for (unsigned yy = 0; yy < 8; ++yy) {
149                         yptr[yy] = const_cast<JSAMPROW>(&y_data[(y + yy) * width]);
150                         cbptr[yy] = const_cast<JSAMPROW>(&cb_data[(y + yy) * width / 2]);
151                         crptr[yy] = const_cast<JSAMPROW>(&cr_data[(y + yy) * width / 2]);
152                 }
153
154                 jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
155         }
156
157         jpeg_finish_compress(&cinfo);
158         jpeg_destroy_compress(&cinfo);
159
160         steady_clock::time_point stop = steady_clock::now();
161         metric_jpeg_encode_time_seconds.count_event(duration<double>(stop - start).count());
162
163         return move(dest.dest);
164 }
165
166 string encode_jpeg_from_pbo(void *contents, unsigned width, unsigned height, const string exif_data)
167 {
168         unsigned chroma_width = width / 2;
169
170         const uint8_t *y = (const uint8_t *)contents;
171         const uint8_t *cb = (const uint8_t *)contents + width * height;
172         const uint8_t *cr = (const uint8_t *)contents + width * height + chroma_width * height;
173         return encode_jpeg(y, cb, cr, width, height, move(exif_data));
174 }
175
176 VideoStream::VideoStream(AVFormatContext *file_avctx)
177         : avctx(file_avctx), output_fast_forward(file_avctx != nullptr)
178 {
179         call_once(video_metrics_inited, [] {
180                 vector<double> quantiles{ 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99 };
181                 metric_jpeg_encode_time_seconds.init(quantiles, 60.0);
182                 global_metrics.add("jpeg_encode_time_seconds", &metric_jpeg_encode_time_seconds);
183                 metric_fade_fence_wait_time_seconds.init(quantiles, 60.0);
184                 global_metrics.add("fade_fence_wait_time_seconds", &metric_fade_fence_wait_time_seconds);
185                 metric_interpolation_fence_wait_time_seconds.init(quantiles, 60.0);
186                 global_metrics.add("interpolation_fence_wait_time_seconds", &metric_interpolation_fence_wait_time_seconds);
187                 metric_fade_latency_seconds.init(quantiles, 60.0);
188                 global_metrics.add("fade_latency_seconds", &metric_fade_latency_seconds);
189                 metric_interpolation_latency_seconds.init(quantiles, 60.0);
190                 global_metrics.add("interpolation_latency_seconds", &metric_interpolation_latency_seconds);
191         });
192
193         ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_DUAL_YCBCR, /*resource_pool=*/nullptr));
194         ycbcr_semiplanar_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_SEMIPLANAR, /*resource_pool=*/nullptr));
195
196         GLuint input_tex[num_interpolate_slots], gray_tex[num_interpolate_slots];
197         GLuint fade_y_output_tex[num_interpolate_slots], fade_cbcr_output_tex[num_interpolate_slots];
198         GLuint cb_tex[num_interpolate_slots], cr_tex[num_interpolate_slots];
199
200         glCreateTextures(GL_TEXTURE_2D_ARRAY, num_interpolate_slots, input_tex);
201         glCreateTextures(GL_TEXTURE_2D_ARRAY, num_interpolate_slots, gray_tex);
202         glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, fade_y_output_tex);
203         glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, fade_cbcr_output_tex);
204         glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, cb_tex);
205         glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, cr_tex);
206         check_error();
207
208         size_t width = global_flags.width, height = global_flags.height;
209         int levels = find_num_levels(width, height);
210         for (size_t i = 0; i < num_interpolate_slots; ++i) {
211                 glTextureStorage3D(input_tex[i], levels, GL_RGBA8, width, height, 2);
212                 check_error();
213                 glTextureStorage3D(gray_tex[i], levels, GL_R8, width, height, 2);
214                 check_error();
215                 glTextureStorage2D(fade_y_output_tex[i], 1, GL_R8, width, height);
216                 check_error();
217                 glTextureStorage2D(fade_cbcr_output_tex[i], 1, GL_RG8, width, height);
218                 check_error();
219                 glTextureStorage2D(cb_tex[i], 1, GL_R8, width / 2, height);
220                 check_error();
221                 glTextureStorage2D(cr_tex[i], 1, GL_R8, width / 2, height);
222                 check_error();
223
224                 unique_ptr<InterpolatedFrameResources> resource(new InterpolatedFrameResources);
225                 resource->owner = this;
226                 resource->input_tex = input_tex[i];
227                 resource->gray_tex = gray_tex[i];
228                 resource->fade_y_output_tex = fade_y_output_tex[i];
229                 resource->fade_cbcr_output_tex = fade_cbcr_output_tex[i];
230                 resource->cb_tex = cb_tex[i];
231                 resource->cr_tex = cr_tex[i];
232                 glCreateFramebuffers(2, resource->input_fbos);
233                 check_error();
234                 glCreateFramebuffers(1, &resource->fade_fbo);
235                 check_error();
236
237                 glNamedFramebufferTextureLayer(resource->input_fbos[0], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 0);
238                 check_error();
239                 glNamedFramebufferTextureLayer(resource->input_fbos[0], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 0);
240                 check_error();
241                 glNamedFramebufferTextureLayer(resource->input_fbos[1], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 1);
242                 check_error();
243                 glNamedFramebufferTextureLayer(resource->input_fbos[1], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 1);
244                 check_error();
245                 glNamedFramebufferTexture(resource->fade_fbo, GL_COLOR_ATTACHMENT0, fade_y_output_tex[i], 0);
246                 check_error();
247                 glNamedFramebufferTexture(resource->fade_fbo, GL_COLOR_ATTACHMENT1, fade_cbcr_output_tex[i], 0);
248                 check_error();
249
250                 GLuint bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 };
251                 glNamedFramebufferDrawBuffers(resource->input_fbos[0], 2, bufs);
252                 check_error();
253                 glNamedFramebufferDrawBuffers(resource->input_fbos[1], 2, bufs);
254                 check_error();
255                 glNamedFramebufferDrawBuffers(resource->fade_fbo, 2, bufs);
256                 check_error();
257
258                 glCreateBuffers(1, &resource->pbo);
259                 check_error();
260                 glNamedBufferStorage(resource->pbo, width * height * 4, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
261                 check_error();
262                 resource->pbo_contents = glMapNamedBufferRange(resource->pbo, 0, width * height * 4, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
263                 interpolate_resources.push_back(move(resource));
264         }
265
266         check_error();
267
268         OperatingPoint op;
269         if (global_flags.interpolation_quality == 0 ||
270             global_flags.interpolation_quality == 1) {
271                 op = operating_point1;
272         } else if (global_flags.interpolation_quality == 2) {
273                 op = operating_point2;
274         } else if (global_flags.interpolation_quality == 3) {
275                 op = operating_point3;
276         } else if (global_flags.interpolation_quality == 4) {
277                 op = operating_point4;
278         } else {
279                 // Quality 0 will be changed to 1 in flags.cpp.
280                 assert(false);
281         }
282
283         compute_flow.reset(new DISComputeFlow(width, height, op));
284         interpolate.reset(new Interpolate(op, /*split_ycbcr_output=*/true));
285         interpolate_no_split.reset(new Interpolate(op, /*split_ycbcr_output=*/false));
286         chroma_subsampler.reset(new ChromaSubsampler);
287         check_error();
288
289         // The “last frame” is initially black.
290         unique_ptr<uint8_t[]> y(new uint8_t[global_flags.width * global_flags.height]);
291         unique_ptr<uint8_t[]> cb_or_cr(new uint8_t[(global_flags.width / 2) * global_flags.height]);
292         memset(y.get(), 16, global_flags.width * global_flags.height);
293         memset(cb_or_cr.get(), 128, (global_flags.width / 2) * global_flags.height);
294         last_frame = encode_jpeg(y.get(), cb_or_cr.get(), cb_or_cr.get(), global_flags.width, global_flags.height, /*exif_data=*/"");
295
296         if (file_avctx != nullptr) {
297                 with_subtitles = Mux::WITHOUT_SUBTITLES;
298         } else {
299                 with_subtitles = Mux::WITH_SUBTITLES;
300         }
301 }
302
303 VideoStream::~VideoStream()
304 {
305         if (last_flow_tex != 0) {
306                 compute_flow->release_texture(last_flow_tex);
307         }
308
309         for (const unique_ptr<InterpolatedFrameResources> &resource : interpolate_resources) {
310                 glUnmapNamedBuffer(resource->pbo);
311                 check_error();
312                 glDeleteBuffers(1, &resource->pbo);
313                 check_error();
314                 glDeleteFramebuffers(2, resource->input_fbos);
315                 check_error();
316                 glDeleteFramebuffers(1, &resource->fade_fbo);
317                 check_error();
318                 glDeleteTextures(1, &resource->input_tex);
319                 check_error();
320                 glDeleteTextures(1, &resource->gray_tex);
321                 check_error();
322                 glDeleteTextures(1, &resource->fade_y_output_tex);
323                 check_error();
324                 glDeleteTextures(1, &resource->fade_cbcr_output_tex);
325                 check_error();
326                 glDeleteTextures(1, &resource->cb_tex);
327                 check_error();
328                 glDeleteTextures(1, &resource->cr_tex);
329                 check_error();
330         }
331         assert(interpolate_resources.size() == num_interpolate_slots);
332 }
333
334 void VideoStream::start()
335 {
336         if (avctx == nullptr) {
337                 avctx = avformat_alloc_context();
338
339                 // We use Matroska, because it's pretty much the only mux where FFmpeg
340                 // allows writing chroma location to override JFIF's default center placement.
341                 // (Note that at the time of writing, however, FFmpeg does not correctly
342                 // _read_ this information!)
343                 avctx->oformat = av_guess_format("matroska", nullptr, nullptr);
344
345                 uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
346                 avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr);
347                 avctx->pb->write_data_type = &VideoStream::write_packet2_thunk;
348                 avctx->pb->ignore_boundary_point = 1;
349
350                 avctx->flags = AVFMT_FLAG_CUSTOM_IO;
351         }
352
353         AVCodecParameters *audio_codecpar = avcodec_parameters_alloc();
354
355         audio_codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
356         audio_codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
357         audio_codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE;
358         audio_codecpar->ch_layout.nb_channels = 2;
359         audio_codecpar->ch_layout.u.mask = AV_CH_LAYOUT_STEREO;
360         audio_codecpar->sample_rate = OUTPUT_FREQUENCY;
361
362         size_t width = global_flags.width, height = global_flags.height;  // Doesn't matter for MJPEG.
363         mux.reset(new Mux(avctx, width, height, Mux::CODEC_MJPEG, /*video_extradata=*/"", audio_codecpar,
364                           AVCOL_SPC_BT709, COARSE_TIMEBASE, /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {}, with_subtitles));
365
366         avcodec_parameters_free(&audio_codecpar);
367         encode_thread = thread(&VideoStream::encode_thread_func, this);
368 }
369
370 void VideoStream::stop()
371 {
372         should_quit = true;
373         queue_changed.notify_all();
374         clear_queue();
375         encode_thread.join();
376 }
377
378 void VideoStream::clear_queue()
379 {
380         deque<QueuedFrame> q;
381
382         {
383                 lock_guard<mutex> lock(queue_lock);
384                 q = move(frame_queue);
385         }
386
387         // These are not RAII-ed, unfortunately, so we'll need to clean them ourselves.
388         // Note that release_texture() is thread-safe.
389         for (const QueuedFrame &qf : q) {
390                 if (qf.type == QueuedFrame::INTERPOLATED ||
391                     qf.type == QueuedFrame::FADED_INTERPOLATED) {
392                         if (qf.flow_tex != 0) {
393                                 compute_flow->release_texture(qf.flow_tex);
394                         }
395                 }
396                 if (qf.type == QueuedFrame::INTERPOLATED) {
397                         interpolate->release_texture(qf.output_tex);
398                         interpolate->release_texture(qf.cbcr_tex);
399                 }
400         }
401
402         // Destroy q outside the mutex, as that would be a double-lock.
403 }
404
405 void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
406                                           int64_t output_pts, function<void()> &&display_func,
407                                           QueueSpotHolder &&queue_spot_holder,
408                                           FrameOnDisk frame, const string &subtitle, bool include_audio)
409 {
410         fprintf(stderr, "output_pts=%" PRId64 "  original      input_pts=%" PRId64 "\n", output_pts, frame.pts);
411
412         QueuedFrame qf;
413         qf.local_pts = local_pts;
414         qf.type = QueuedFrame::ORIGINAL;
415         qf.output_pts = output_pts;
416         qf.display_func = move(display_func);
417         qf.queue_spot_holder = move(queue_spot_holder);
418         qf.subtitle = subtitle;
419         FrameReader::Frame read_frame = frame_reader.read_frame(frame, /*read_video=*/true, include_audio);
420         qf.encoded_jpeg.reset(new string(move(read_frame.video)));
421         qf.audio = move(read_frame.audio);
422
423         lock_guard<mutex> lock(queue_lock);
424         frame_queue.push_back(move(qf));
425         queue_changed.notify_all();
426 }
427
428 void VideoStream::schedule_faded_frame(steady_clock::time_point local_pts, int64_t output_pts,
429                                        function<void()> &&display_func,
430                                        QueueSpotHolder &&queue_spot_holder,
431                                        FrameOnDisk frame1_spec, FrameOnDisk frame2_spec,
432                                        float fade_alpha, const string &subtitle)
433 {
434         fprintf(stderr, "output_pts=%" PRId64 "  faded         input_pts=%" PRId64 ",%" PRId64 "  fade_alpha=%.2f\n", output_pts, frame1_spec.pts, frame2_spec.pts, fade_alpha);
435
436         // Get the temporary OpenGL resources we need for doing the fade.
437         // (We share these with interpolated frames, which is slightly
438         // overkill, but there's no need to waste resources on keeping
439         // separate pools around.)
440         BorrowedInterpolatedFrameResources resources;
441         {
442                 lock_guard<mutex> lock(queue_lock);
443                 if (interpolate_resources.empty()) {
444                         fprintf(stderr, "WARNING: Too many interpolated frames already in transit; dropping one.\n");
445                         return;
446                 }
447                 resources = BorrowedInterpolatedFrameResources(interpolate_resources.front().release());
448                 interpolate_resources.pop_front();
449         }
450
451         bool did_decode;
452
453         shared_ptr<Frame> frame1 = decode_jpeg_with_cache(frame1_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
454         shared_ptr<Frame> frame2 = decode_jpeg_with_cache(frame2_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
455         wait_for_upload(frame1);
456         wait_for_upload(frame2);
457
458         ycbcr_semiplanar_converter->prepare_chain_for_fade(frame1, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, global_flags.width, global_flags.height);
459
460         QueuedFrame qf;
461         qf.local_pts = local_pts;
462         qf.type = QueuedFrame::FADED;
463         qf.output_pts = output_pts;
464         qf.frame1 = frame1_spec;
465         qf.display_func = move(display_func);
466         qf.queue_spot_holder = move(queue_spot_holder);
467         qf.subtitle = subtitle;
468
469         qf.secondary_frame = frame2_spec;
470
471         // Subsample and split Cb/Cr.
472         chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, global_flags.width, global_flags.height, resources->cb_tex, resources->cr_tex);
473
474         // Read it down (asynchronously) to the CPU.
475         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
476         glBindBuffer(GL_PIXEL_PACK_BUFFER, resources->pbo);
477         check_error();
478         glGetTextureImage(resources->fade_y_output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 4, BUFFER_OFFSET(0));
479         check_error();
480         glGetTextureImage(resources->cb_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 3, BUFFER_OFFSET(global_flags.width * global_flags.height));
481         check_error();
482         glGetTextureImage(resources->cr_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 3 - (global_flags.width / 2) * global_flags.height, BUFFER_OFFSET(global_flags.width * global_flags.height + (global_flags.width / 2) * global_flags.height));
483         check_error();
484         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
485
486         // Set a fence we can wait for to make sure the CPU sees the read.
487         glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
488         check_error();
489         qf.fence_created = steady_clock::now();
490         qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
491         check_error();
492         qf.resources = move(resources);
493         qf.local_pts = local_pts;
494
495         lock_guard<mutex> lock(queue_lock);
496         frame_queue.push_back(move(qf));
497         queue_changed.notify_all();
498 }
499
500 void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts,
501                                               int64_t output_pts, function<void(shared_ptr<Frame>)> &&display_func,
502                                               QueueSpotHolder &&queue_spot_holder,
503                                               FrameOnDisk frame1, FrameOnDisk frame2,
504                                               float alpha, FrameOnDisk secondary_frame, float fade_alpha, const string &subtitle,
505                                               bool play_audio)
506 {
507         if (secondary_frame.pts != -1) {
508                 fprintf(stderr, "output_pts=%" PRId64 "  interpolated  input_pts1=%" PRId64 " input_pts2=%" PRId64 " alpha=%.3f  secondary_pts=%" PRId64 "  fade_alpha=%.2f\n", output_pts, frame1.pts, frame2.pts, alpha, secondary_frame.pts, fade_alpha);
509         } else {
510                 fprintf(stderr, "output_pts=%" PRId64 "  interpolated  input_pts1=%" PRId64 " input_pts2=%" PRId64 " alpha=%.3f\n", output_pts, frame1.pts, frame2.pts, alpha);
511         }
512
513         // Get the temporary OpenGL resources we need for doing the interpolation.
514         BorrowedInterpolatedFrameResources resources;
515         {
516                 lock_guard<mutex> lock(queue_lock);
517                 if (interpolate_resources.empty()) {
518                         fprintf(stderr, "WARNING: Too many interpolated frames already in transit; dropping one.\n");
519                         return;
520                 }
521                 resources = BorrowedInterpolatedFrameResources(interpolate_resources.front().release());
522                 interpolate_resources.pop_front();
523         }
524
525         QueuedFrame qf;
526         qf.type = (secondary_frame.pts == -1) ? QueuedFrame::INTERPOLATED : QueuedFrame::FADED_INTERPOLATED;
527         qf.output_pts = output_pts;
528         qf.display_decoded_func = move(display_func);
529         qf.queue_spot_holder = move(queue_spot_holder);
530         qf.local_pts = local_pts;
531         qf.subtitle = subtitle;
532
533         if (play_audio) {
534                 qf.audio = frame_reader.read_frame(frame1, /*read_video=*/false, /*read_audio=*/true).audio;
535         }
536
537         check_error();
538
539         // Convert frame0 and frame1 to OpenGL textures.
540         for (size_t frame_no = 0; frame_no < 2; ++frame_no) {
541                 FrameOnDisk frame_spec = frame_no == 1 ? frame2 : frame1;
542                 bool did_decode;
543                 shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
544                 wait_for_upload(frame);
545                 ycbcr_converter->prepare_chain_for_conversion(frame)->render_to_fbo(resources->input_fbos[frame_no], global_flags.width, global_flags.height);
546                 if (frame_no == 1) {
547                         qf.exif_data = frame->exif_data;  // Use the white point from the last frame.
548                 }
549         }
550
551         glGenerateTextureMipmap(resources->input_tex);
552         check_error();
553         glGenerateTextureMipmap(resources->gray_tex);
554         check_error();
555
556         GLuint flow_tex;
557         if (last_flow_tex != 0 && frame1 == last_frame1 && frame2 == last_frame2) {
558                 // Reuse the flow from previous computation. This frequently happens
559                 // if we slow down by more than 2x, so that there are multiple interpolated
560                 // frames between each original.
561                 flow_tex = last_flow_tex;
562                 qf.flow_tex = 0;
563         } else {
564                 // Cache miss, so release last_flow_tex.
565                 qf.flow_tex = last_flow_tex;
566
567                 // Compute the flow.
568                 flow_tex = compute_flow->exec(resources->gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
569                 check_error();
570
571                 // Store the flow texture for possible reuse next frame.
572                 last_flow_tex = flow_tex;
573                 last_frame1 = frame1;
574                 last_frame2 = frame2;
575         }
576
577         if (secondary_frame.pts != -1) {
578                 // Fade. First kick off the interpolation.
579                 tie(qf.output_tex, ignore) = interpolate_no_split->exec(resources->input_tex, resources->gray_tex, flow_tex, global_flags.width, global_flags.height, alpha);
580                 check_error();
581
582                 // Now decode the image we are fading against.
583                 bool did_decode;
584                 shared_ptr<Frame> frame2 = decode_jpeg_with_cache(secondary_frame, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
585                 wait_for_upload(frame2);
586
587                 // Then fade against it, putting it into the fade Y' and CbCr textures.
588                 RGBTriplet neutral_color = get_neutral_color(qf.exif_data);
589                 ycbcr_semiplanar_converter->prepare_chain_for_fade_from_texture(qf.output_tex, neutral_color, global_flags.width, global_flags.height, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, global_flags.width, global_flags.height);
590
591                 // Subsample and split Cb/Cr.
592                 chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, global_flags.width, global_flags.height, resources->cb_tex, resources->cr_tex);
593
594                 interpolate_no_split->release_texture(qf.output_tex);
595
596                 // We already applied the white balance, so don't have the client redo it.
597                 qf.exif_data.clear();
598         } else {
599                 tie(qf.output_tex, qf.cbcr_tex) = interpolate->exec(resources->input_tex, resources->gray_tex, flow_tex, global_flags.width, global_flags.height, alpha);
600                 check_error();
601
602                 // Subsample and split Cb/Cr.
603                 chroma_subsampler->subsample_chroma(qf.cbcr_tex, global_flags.width, global_flags.height, resources->cb_tex, resources->cr_tex);
604         }
605
606         // We could have released qf.flow_tex here, but to make sure we don't cause a stall
607         // when trying to reuse it for the next frame, we can just as well hold on to it
608         // and release it only when the readback is done.
609         //
610         // TODO: This is maybe less relevant now that qf.flow_tex contains the texture we used
611         // _last_ frame, not this one.
612
613         // Read it down (asynchronously) to the CPU.
614         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
615         glBindBuffer(GL_PIXEL_PACK_BUFFER, resources->pbo);
616         check_error();
617         if (secondary_frame.pts != -1) {
618                 glGetTextureImage(resources->fade_y_output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 4, BUFFER_OFFSET(0));
619         } else {
620                 glGetTextureImage(qf.output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 4, BUFFER_OFFSET(0));
621         }
622         check_error();
623         glGetTextureImage(resources->cb_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 3, BUFFER_OFFSET(global_flags.width * global_flags.height));
624         check_error();
625         glGetTextureImage(resources->cr_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 3 - (global_flags.width / 2) * global_flags.height, BUFFER_OFFSET(global_flags.width * global_flags.height + (global_flags.width / 2) * global_flags.height));
626         check_error();
627         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
628
629         // Set a fence we can wait for to make sure the CPU sees the read.
630         glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
631         check_error();
632         qf.fence_created = steady_clock::now();
633         qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
634         check_error();
635         qf.resources = move(resources);
636
637         lock_guard<mutex> lock(queue_lock);
638         frame_queue.push_back(move(qf));
639         queue_changed.notify_all();
640 }
641
642 void VideoStream::schedule_refresh_frame(steady_clock::time_point local_pts,
643                                          int64_t output_pts, function<void()> &&display_func,
644                                          QueueSpotHolder &&queue_spot_holder, const string &subtitle)
645 {
646         QueuedFrame qf;
647         qf.type = QueuedFrame::REFRESH;
648         qf.output_pts = output_pts;
649         qf.display_func = move(display_func);
650         qf.queue_spot_holder = move(queue_spot_holder);
651         qf.subtitle = subtitle;
652
653         lock_guard<mutex> lock(queue_lock);
654         frame_queue.push_back(move(qf));
655         queue_changed.notify_all();
656 }
657
658 void VideoStream::schedule_silence(steady_clock::time_point local_pts, int64_t output_pts,
659                                    int64_t length_pts, QueueSpotHolder &&queue_spot_holder)
660 {
661         QueuedFrame qf;
662         qf.type = QueuedFrame::SILENCE;
663         qf.output_pts = output_pts;
664         qf.queue_spot_holder = move(queue_spot_holder);
665         qf.silence_length_pts = length_pts;
666
667         lock_guard<mutex> lock(queue_lock);
668         frame_queue.push_back(move(qf));
669         queue_changed.notify_all();
670 }
671
672 namespace {
673
674 RefCountedTexture clone_r8_texture(GLuint src_tex, unsigned width, unsigned height)
675 {
676         GLuint tex;
677         glCreateTextures(GL_TEXTURE_2D, 1, &tex);
678         check_error();
679         glTextureStorage2D(tex, 1, GL_R8, width, height);
680         check_error();
681         glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, 0, 0, 0,
682                            tex, GL_TEXTURE_2D, 0, 0, 0, 0,
683                            width, height, 1);
684         check_error();
685         glTextureParameteri(tex, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
686         check_error();
687         glTextureParameteri(tex, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
688         check_error();
689         glTextureParameteri(tex, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
690         check_error();
691         glTextureParameteri(tex, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
692         check_error();
693
694         return RefCountedTexture(new GLuint(tex), TextureDeleter());
695 }
696
697 }  // namespace
698
699 void VideoStream::encode_thread_func()
700 {
701         pthread_setname_np(pthread_self(), "VideoStream");
702         QSurface *surface = create_surface();
703         QOpenGLContext *context = create_context(surface);
704         bool ok = make_current(context, surface);
705         if (!ok) {
706                 fprintf(stderr, "Video stream couldn't get an OpenGL context\n");
707                 abort();
708         }
709
710         init_pbo_pool();
711
712         while (!should_quit) {
713                 QueuedFrame qf;
714                 {
715                         unique_lock<mutex> lock(queue_lock);
716
717                         // Wait until we have a frame to play.
718                         queue_changed.wait(lock, [this] {
719                                 return !frame_queue.empty() || should_quit;
720                         });
721                         if (should_quit) {
722                                 break;
723                         }
724                         steady_clock::time_point frame_start = frame_queue.front().local_pts;
725
726                         // Now sleep until the frame is supposed to start (the usual case),
727                         // _or_ clear_queue() happened.
728                         bool aborted;
729                         if (output_fast_forward) {
730                                 aborted = frame_queue.empty() || frame_queue.front().local_pts != frame_start;
731                         } else {
732                                 aborted = queue_changed.wait_until(lock, frame_start, [this, frame_start] {
733                                         return frame_queue.empty() || frame_queue.front().local_pts != frame_start;
734                                 });
735                         }
736                         if (aborted) {
737                                 // clear_queue() happened, so don't play this frame after all.
738                                 continue;
739                         }
740                         qf = move(frame_queue.front());
741                         frame_queue.pop_front();
742                 }
743
744                 // Hack: We mux the subtitle packet one time unit before the actual frame,
745                 // so that Nageru is sure to get it first.
746                 if (!qf.subtitle.empty() && with_subtitles == Mux::WITH_SUBTITLES) {
747                         AVPacketWithDeleter pkt = av_packet_alloc_unique();
748                         pkt->stream_index = mux->get_subtitle_stream_idx();
749                         assert(pkt->stream_index != -1);
750                         pkt->data = (uint8_t *)qf.subtitle.data();
751                         pkt->size = qf.subtitle.size();
752                         pkt->flags = 0;
753                         pkt->duration = lrint(TIMEBASE / global_flags.output_framerate);  // Doesn't really matter for Nageru.
754                         mux->add_packet(*pkt, qf.output_pts - 1, qf.output_pts - 1);
755                 }
756
757                 if (qf.type == QueuedFrame::ORIGINAL) {
758                         // Send the JPEG frame on, unchanged.
759                         string jpeg = move(*qf.encoded_jpeg);
760                         AVPacketWithDeleter pkt = av_packet_alloc_unique();
761                         pkt->stream_index = 0;
762                         pkt->data = (uint8_t *)jpeg.data();
763                         pkt->size = jpeg.size();
764                         pkt->flags = AV_PKT_FLAG_KEY;
765                         mux->add_packet(*pkt, qf.output_pts, qf.output_pts);
766                         last_frame = move(jpeg);
767
768                         add_audio_or_silence(qf);
769                 } else if (qf.type == QueuedFrame::FADED) {
770                         steady_clock::time_point start = steady_clock::now();
771                         glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
772                         steady_clock::time_point stop = steady_clock::now();
773                         metric_fade_fence_wait_time_seconds.count_event(duration<double>(stop - start).count());
774                         metric_fade_latency_seconds.count_event(duration<double>(stop - qf.fence_created).count());
775
776                         // Now JPEG encode it, and send it on to the stream.
777                         string jpeg = encode_jpeg_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height, /*exif_data=*/"");
778
779                         AVPacketWithDeleter pkt = av_packet_alloc_unique();
780                         pkt->stream_index = 0;
781                         pkt->data = (uint8_t *)jpeg.data();
782                         pkt->size = jpeg.size();
783                         pkt->flags = AV_PKT_FLAG_KEY;
784                         mux->add_packet(*pkt, qf.output_pts, qf.output_pts);
785                         last_frame = move(jpeg);
786
787                         add_audio_or_silence(qf);
788                 } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) {
789                         steady_clock::time_point start = steady_clock::now();
790                         glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
791                         steady_clock::time_point stop = steady_clock::now();
792                         metric_interpolation_fence_wait_time_seconds.count_event(duration<double>(stop - start).count());
793                         metric_interpolation_latency_seconds.count_event(duration<double>(stop - qf.fence_created).count());
794
795                         // Send it on to display.
796                         if (qf.display_decoded_func != nullptr) {
797                                 shared_ptr<Frame> frame(new Frame);
798                                 if (qf.type == QueuedFrame::FADED_INTERPOLATED) {
799                                         frame->y = clone_r8_texture(qf.resources->fade_y_output_tex, global_flags.width, global_flags.height);
800                                 } else {
801                                         frame->y = clone_r8_texture(qf.output_tex, global_flags.width, global_flags.height);
802                                 }
803                                 frame->cb = clone_r8_texture(qf.resources->cb_tex, global_flags.width / 2, global_flags.height);
804                                 frame->cr = clone_r8_texture(qf.resources->cr_tex, global_flags.width / 2, global_flags.height);
805                                 frame->width = global_flags.width;
806                                 frame->height = global_flags.height;
807                                 frame->chroma_subsampling_x = 2;
808                                 frame->chroma_subsampling_y = 1;
809                                 frame->uploaded_ui_thread = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
810                                 qf.display_decoded_func(move(frame));
811                         }
812
813                         // Now JPEG encode it, and send it on to the stream.
814                         string jpeg = encode_jpeg_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height, move(qf.exif_data));
815                         if (qf.flow_tex != 0) {
816                                 compute_flow->release_texture(qf.flow_tex);
817                         }
818                         if (qf.type != QueuedFrame::FADED_INTERPOLATED) {
819                                 interpolate->release_texture(qf.output_tex);
820                                 interpolate->release_texture(qf.cbcr_tex);
821                         }
822
823                         AVPacketWithDeleter pkt = av_packet_alloc_unique();
824                         pkt->stream_index = 0;
825                         pkt->data = (uint8_t *)jpeg.data();
826                         pkt->size = jpeg.size();
827                         pkt->flags = AV_PKT_FLAG_KEY;
828                         mux->add_packet(*pkt, qf.output_pts, qf.output_pts);
829                         last_frame = move(jpeg);
830
831                         add_audio_or_silence(qf);
832                 } else if (qf.type == QueuedFrame::REFRESH) {
833                         AVPacketWithDeleter pkt = av_packet_alloc_unique();
834                         pkt->stream_index = 0;
835                         pkt->data = (uint8_t *)last_frame.data();
836                         pkt->size = last_frame.size();
837                         pkt->flags = AV_PKT_FLAG_KEY;
838                         mux->add_packet(*pkt, qf.output_pts, qf.output_pts);
839
840                         add_audio_or_silence(qf);  // Definitely silence.
841                 } else if (qf.type == QueuedFrame::SILENCE) {
842                         add_silence(qf.output_pts, qf.silence_length_pts);
843                 } else {
844                         assert(false);
845                 }
846                 if (qf.display_func != nullptr) {
847                         qf.display_func();
848                 }
849         }
850 }
851
852 int VideoStream::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
853 {
854         VideoStream *video_stream = (VideoStream *)opaque;
855         return video_stream->write_packet2(buf, buf_size, type, time);
856 }
857
858 int VideoStream::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
859 {
860         if (type == AVIO_DATA_MARKER_SYNC_POINT || type == AVIO_DATA_MARKER_BOUNDARY_POINT) {
861                 seen_sync_markers = true;
862         } else if (type == AVIO_DATA_MARKER_UNKNOWN && !seen_sync_markers) {
863                 // We don't know if this is a keyframe or not (the muxer could
864                 // avoid marking it), so we just have to make the best of it.
865                 type = AVIO_DATA_MARKER_SYNC_POINT;
866         }
867
868         HTTPD::StreamID stream_id{ HTTPD::MAIN_STREAM, 0 };
869         if (type == AVIO_DATA_MARKER_HEADER) {
870                 stream_mux_header.append((char *)buf, buf_size);
871                 global_httpd->set_header(stream_id, stream_mux_header);
872         } else {
873                 global_httpd->add_data(stream_id, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
874         }
875         return buf_size;
876 }
877
878 void VideoStream::add_silence(int64_t pts, int64_t length_pts)
879 {
880         // At 59.94, this will never quite add up (even discounting refresh frames,
881         // which have unpredictable length), but hopefully, the player in the other
882         // end should be able to stretch silence easily enough.
883         long num_samples = lrint(length_pts * double(OUTPUT_FREQUENCY) / double(TIMEBASE)) * 2;
884         uint8_t *zero = (uint8_t *)calloc(num_samples, sizeof(int32_t));
885
886         AVPacketWithDeleter pkt = av_packet_alloc_unique();
887         pkt->stream_index = 1;
888         pkt->data = zero;
889         pkt->size = num_samples * sizeof(int32_t);
890         pkt->flags = AV_PKT_FLAG_KEY;
891         mux->add_packet(*pkt, pts, pts);
892
893         free(zero);
894 }
895
896 void VideoStream::add_audio_or_silence(const QueuedFrame &qf)
897 {
898         if (qf.audio.empty()) {
899                 int64_t frame_length = lrint(double(TIMEBASE) / global_flags.output_framerate);
900                 add_silence(qf.output_pts, frame_length);
901         } else {
902                 AVPacketWithDeleter pkt = av_packet_alloc_unique();
903                 pkt->stream_index = 1;
904                 pkt->data = (uint8_t *)qf.audio.data();
905                 pkt->size = qf.audio.size();
906                 pkt->flags = AV_PKT_FLAG_KEY;
907                 mux->add_packet(*pkt, qf.output_pts, qf.output_pts);
908         }
909 }