]> git.sesse.net Git - nageru/blob - futatabi/video_stream.cpp
Add audio output when playing at 100% speed.
[nageru] / futatabi / video_stream.cpp
1 #include "video_stream.h"
2
3 extern "C" {
4 #include <libavformat/avformat.h>
5 #include <libavformat/avio.h>
6 }
7
8 #include "chroma_subsampler.h"
9 #include "flags.h"
10 #include "flow.h"
11 #include "jpeg_frame_view.h"
12 #include "movit/util.h"
13 #include "player.h"
14 #include "shared/context.h"
15 #include "shared/httpd.h"
16 #include "shared/shared_defs.h"
17 #include "shared/mux.h"
18 #include "util.h"
19 #include "ycbcr_converter.h"
20
21 #include <epoxy/glx.h>
22 #include <jpeglib.h>
23 #include <unistd.h>
24
25 using namespace std;
26 using namespace std::chrono;
27
28 extern HTTPD *global_httpd;
29
30 struct VectorDestinationManager {
31         jpeg_destination_mgr pub;
32         string dest;
33
34         VectorDestinationManager()
35         {
36                 pub.init_destination = init_destination_thunk;
37                 pub.empty_output_buffer = empty_output_buffer_thunk;
38                 pub.term_destination = term_destination_thunk;
39         }
40
41         static void init_destination_thunk(j_compress_ptr ptr)
42         {
43                 ((VectorDestinationManager *)(ptr->dest))->init_destination();
44         }
45
46         inline void init_destination()
47         {
48                 make_room(0);
49         }
50
51         static boolean empty_output_buffer_thunk(j_compress_ptr ptr)
52         {
53                 return ((VectorDestinationManager *)(ptr->dest))->empty_output_buffer();
54         }
55
56         inline bool empty_output_buffer()
57         {
58                 make_room(dest.size());  // Should ignore pub.free_in_buffer!
59                 return true;
60         }
61
62         inline void make_room(size_t bytes_used)
63         {
64                 dest.resize(bytes_used + 4096);
65                 dest.resize(dest.capacity());
66                 pub.next_output_byte = (uint8_t *)dest.data() + bytes_used;
67                 pub.free_in_buffer = dest.size() - bytes_used;
68         }
69
70         static void term_destination_thunk(j_compress_ptr ptr)
71         {
72                 ((VectorDestinationManager *)(ptr->dest))->term_destination();
73         }
74
75         inline void term_destination()
76         {
77                 dest.resize(dest.size() - pub.free_in_buffer);
78         }
79 };
80 static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
81
82 string encode_jpeg(const uint8_t *y_data, const uint8_t *cb_data, const uint8_t *cr_data, unsigned width, unsigned height)
83 {
84         VectorDestinationManager dest;
85
86         jpeg_compress_struct cinfo;
87         jpeg_error_mgr jerr;
88         cinfo.err = jpeg_std_error(&jerr);
89         jpeg_create_compress(&cinfo);
90
91         cinfo.dest = (jpeg_destination_mgr *)&dest;
92         cinfo.input_components = 3;
93         cinfo.in_color_space = JCS_RGB;
94         jpeg_set_defaults(&cinfo);
95         constexpr int quality = 90;
96         jpeg_set_quality(&cinfo, quality, /*force_baseline=*/false);
97
98         cinfo.image_width = width;
99         cinfo.image_height = height;
100         cinfo.raw_data_in = true;
101         jpeg_set_colorspace(&cinfo, JCS_YCbCr);
102         cinfo.comp_info[0].h_samp_factor = 2;
103         cinfo.comp_info[0].v_samp_factor = 1;
104         cinfo.comp_info[1].h_samp_factor = 1;
105         cinfo.comp_info[1].v_samp_factor = 1;
106         cinfo.comp_info[2].h_samp_factor = 1;
107         cinfo.comp_info[2].v_samp_factor = 1;
108         cinfo.CCIR601_sampling = true;  // Seems to be mostly ignored by libjpeg, though.
109         jpeg_start_compress(&cinfo, true);
110
111         // This comment marker is private to FFmpeg. It signals limited Y'CbCr range
112         // (and nothing else).
113         jpeg_write_marker(&cinfo, JPEG_COM, (const JOCTET *)"CS=ITU601", strlen("CS=ITU601"));
114
115         JSAMPROW yptr[8], cbptr[8], crptr[8];
116         JSAMPARRAY data[3] = { yptr, cbptr, crptr };
117         for (unsigned y = 0; y < height; y += 8) {
118                 for (unsigned yy = 0; yy < 8; ++yy) {
119                         yptr[yy] = const_cast<JSAMPROW>(&y_data[(y + yy) * width]);
120                         cbptr[yy] = const_cast<JSAMPROW>(&cb_data[(y + yy) * width / 2]);
121                         crptr[yy] = const_cast<JSAMPROW>(&cr_data[(y + yy) * width / 2]);
122                 }
123
124                 jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
125         }
126
127         jpeg_finish_compress(&cinfo);
128         jpeg_destroy_compress(&cinfo);
129
130         return move(dest.dest);
131 }
132
133 VideoStream::VideoStream(AVFormatContext *file_avctx)
134         : avctx(file_avctx), output_fast_forward(file_avctx != nullptr)
135 {
136         ycbcr_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_DUAL_YCBCR, /*resource_pool=*/nullptr));
137         ycbcr_semiplanar_converter.reset(new YCbCrConverter(YCbCrConverter::OUTPUT_TO_SEMIPLANAR, /*resource_pool=*/nullptr));
138
139         GLuint input_tex[num_interpolate_slots], gray_tex[num_interpolate_slots];
140         GLuint fade_y_output_tex[num_interpolate_slots], fade_cbcr_output_tex[num_interpolate_slots];
141         GLuint cb_tex[num_interpolate_slots], cr_tex[num_interpolate_slots];
142
143         glCreateTextures(GL_TEXTURE_2D_ARRAY, num_interpolate_slots, input_tex);
144         glCreateTextures(GL_TEXTURE_2D_ARRAY, num_interpolate_slots, gray_tex);
145         glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, fade_y_output_tex);
146         glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, fade_cbcr_output_tex);
147         glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, cb_tex);
148         glCreateTextures(GL_TEXTURE_2D, num_interpolate_slots, cr_tex);
149         check_error();
150
151         size_t width = global_flags.width, height = global_flags.height;
152         int levels = find_num_levels(width, height);
153         for (size_t i = 0; i < num_interpolate_slots; ++i) {
154                 glTextureStorage3D(input_tex[i], levels, GL_RGBA8, width, height, 2);
155                 check_error();
156                 glTextureStorage3D(gray_tex[i], levels, GL_R8, width, height, 2);
157                 check_error();
158                 glTextureStorage2D(fade_y_output_tex[i], 1, GL_R8, width, height);
159                 check_error();
160                 glTextureStorage2D(fade_cbcr_output_tex[i], 1, GL_RG8, width, height);
161                 check_error();
162                 glTextureStorage2D(cb_tex[i], 1, GL_R8, width / 2, height);
163                 check_error();
164                 glTextureStorage2D(cr_tex[i], 1, GL_R8, width / 2, height);
165                 check_error();
166
167                 unique_ptr<InterpolatedFrameResources> resource(new InterpolatedFrameResources);
168                 resource->owner = this;
169                 resource->input_tex = input_tex[i];
170                 resource->gray_tex = gray_tex[i];
171                 resource->fade_y_output_tex = fade_y_output_tex[i];
172                 resource->fade_cbcr_output_tex = fade_cbcr_output_tex[i];
173                 resource->cb_tex = cb_tex[i];
174                 resource->cr_tex = cr_tex[i];
175                 glCreateFramebuffers(2, resource->input_fbos);
176                 check_error();
177                 glCreateFramebuffers(1, &resource->fade_fbo);
178                 check_error();
179
180                 glNamedFramebufferTextureLayer(resource->input_fbos[0], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 0);
181                 check_error();
182                 glNamedFramebufferTextureLayer(resource->input_fbos[0], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 0);
183                 check_error();
184                 glNamedFramebufferTextureLayer(resource->input_fbos[1], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 1);
185                 check_error();
186                 glNamedFramebufferTextureLayer(resource->input_fbos[1], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 1);
187                 check_error();
188                 glNamedFramebufferTexture(resource->fade_fbo, GL_COLOR_ATTACHMENT0, fade_y_output_tex[i], 0);
189                 check_error();
190                 glNamedFramebufferTexture(resource->fade_fbo, GL_COLOR_ATTACHMENT1, fade_cbcr_output_tex[i], 0);
191                 check_error();
192
193                 GLuint bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 };
194                 glNamedFramebufferDrawBuffers(resource->input_fbos[0], 2, bufs);
195                 check_error();
196                 glNamedFramebufferDrawBuffers(resource->input_fbos[1], 2, bufs);
197                 check_error();
198                 glNamedFramebufferDrawBuffers(resource->fade_fbo, 2, bufs);
199                 check_error();
200
201                 glCreateBuffers(1, &resource->pbo);
202                 check_error();
203                 glNamedBufferStorage(resource->pbo, width * height * 4, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
204                 check_error();
205                 resource->pbo_contents = glMapNamedBufferRange(resource->pbo, 0, width * height * 4, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
206                 interpolate_resources.push_back(move(resource));
207         }
208
209         check_error();
210
211         OperatingPoint op;
212         if (global_flags.interpolation_quality == 0 ||
213             global_flags.interpolation_quality == 1) {
214                 op = operating_point1;
215         } else if (global_flags.interpolation_quality == 2) {
216                 op = operating_point2;
217         } else if (global_flags.interpolation_quality == 3) {
218                 op = operating_point3;
219         } else if (global_flags.interpolation_quality == 4) {
220                 op = operating_point4;
221         } else {
222                 // Quality 0 will be changed to 1 in flags.cpp.
223                 assert(false);
224         }
225
226         compute_flow.reset(new DISComputeFlow(width, height, op));
227         interpolate.reset(new Interpolate(op, /*split_ycbcr_output=*/true));
228         interpolate_no_split.reset(new Interpolate(op, /*split_ycbcr_output=*/false));
229         chroma_subsampler.reset(new ChromaSubsampler);
230         check_error();
231
232         // The “last frame” is initially black.
233         unique_ptr<uint8_t[]> y(new uint8_t[global_flags.width * global_flags.height]);
234         unique_ptr<uint8_t[]> cb_or_cr(new uint8_t[(global_flags.width / 2) * global_flags.height]);
235         memset(y.get(), 16, global_flags.width * global_flags.height);
236         memset(cb_or_cr.get(), 128, (global_flags.width / 2) * global_flags.height);
237         last_frame = encode_jpeg(y.get(), cb_or_cr.get(), cb_or_cr.get(), global_flags.width, global_flags.height);
238 }
239
240 VideoStream::~VideoStream()
241 {
242         if (last_flow_tex != 0) {
243                 compute_flow->release_texture(last_flow_tex);
244         }
245
246         for (const unique_ptr<InterpolatedFrameResources> &resource : interpolate_resources) {
247                 glUnmapNamedBuffer(resource->pbo);
248                 check_error();
249                 glDeleteBuffers(1, &resource->pbo);
250                 check_error();
251                 glDeleteFramebuffers(2, resource->input_fbos);
252                 check_error();
253                 glDeleteFramebuffers(1, &resource->fade_fbo);
254                 check_error();
255                 glDeleteTextures(1, &resource->input_tex);
256                 check_error();
257                 glDeleteTextures(1, &resource->gray_tex);
258                 check_error();
259                 glDeleteTextures(1, &resource->fade_y_output_tex);
260                 check_error();
261                 glDeleteTextures(1, &resource->fade_cbcr_output_tex);
262                 check_error();
263                 glDeleteTextures(1, &resource->cb_tex);
264                 check_error();
265                 glDeleteTextures(1, &resource->cr_tex);
266                 check_error();
267         }
268         assert(interpolate_resources.size() == num_interpolate_slots);
269 }
270
271 void VideoStream::start()
272 {
273         if (avctx == nullptr) {
274                 avctx = avformat_alloc_context();
275
276                 // We use Matroska, because it's pretty much the only mux where FFmpeg
277                 // allows writing chroma location to override JFIF's default center placement.
278                 // (Note that at the time of writing, however, FFmpeg does not correctly
279                 // _read_ this information!)
280                 avctx->oformat = av_guess_format("matroska", nullptr, nullptr);
281
282                 uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
283                 avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr);
284                 avctx->pb->write_data_type = &VideoStream::write_packet2_thunk;
285                 avctx->pb->ignore_boundary_point = 1;
286
287                 avctx->flags = AVFMT_FLAG_CUSTOM_IO;
288         }
289
290         AVCodecParameters *audio_codecpar = avcodec_parameters_alloc();
291
292         audio_codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
293         audio_codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
294         audio_codecpar->channel_layout = AV_CH_LAYOUT_STEREO;
295         audio_codecpar->channels = 2;
296         audio_codecpar->sample_rate = OUTPUT_FREQUENCY;
297
298         size_t width = global_flags.width, height = global_flags.height;  // Doesn't matter for MJPEG.
299         mux.reset(new Mux(avctx, width, height, Mux::CODEC_MJPEG, /*video_extradata=*/"", audio_codecpar,
300                           AVCOL_SPC_BT709, COARSE_TIMEBASE, /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {}, Mux::WITH_SUBTITLES));
301
302         avcodec_parameters_free(&audio_codecpar);
303         encode_thread = thread(&VideoStream::encode_thread_func, this);
304 }
305
306 void VideoStream::stop()
307 {
308         should_quit = true;
309         queue_changed.notify_all();
310         clear_queue();
311         encode_thread.join();
312 }
313
314 void VideoStream::clear_queue()
315 {
316         deque<QueuedFrame> q;
317
318         {
319                 lock_guard<mutex> lock(queue_lock);
320                 q = move(frame_queue);
321         }
322
323         // These are not RAII-ed, unfortunately, so we'll need to clean them ourselves.
324         // Note that release_texture() is thread-safe.
325         for (const QueuedFrame &qf : q) {
326                 if (qf.type == QueuedFrame::INTERPOLATED ||
327                     qf.type == QueuedFrame::FADED_INTERPOLATED) {
328                         if (qf.flow_tex != 0) {
329                                 compute_flow->release_texture(qf.flow_tex);
330                         }
331                 }
332                 if (qf.type == QueuedFrame::INTERPOLATED) {
333                         interpolate->release_texture(qf.output_tex);
334                         interpolate->release_texture(qf.cbcr_tex);
335                 }
336         }
337
338         // Destroy q outside the mutex, as that would be a double-lock.
339 }
340
341 void VideoStream::schedule_original_frame(steady_clock::time_point local_pts,
342                                           int64_t output_pts, function<void()> &&display_func,
343                                           QueueSpotHolder &&queue_spot_holder,
344                                           FrameOnDisk frame, const string &subtitle, bool include_audio)
345 {
346         fprintf(stderr, "output_pts=%" PRId64 "  original      input_pts=%" PRId64 "\n", output_pts, frame.pts);
347
348         QueuedFrame qf;
349         qf.local_pts = local_pts;
350         qf.type = QueuedFrame::ORIGINAL;
351         qf.output_pts = output_pts;
352         qf.display_func = move(display_func);
353         qf.queue_spot_holder = move(queue_spot_holder);
354         qf.subtitle = subtitle;
355         FrameReader::Frame read_frame = frame_reader.read_frame(frame, /*read_video=*/true, include_audio);
356         qf.encoded_jpeg.reset(new string(move(read_frame.video)));
357         qf.audio = move(read_frame.audio);
358
359         lock_guard<mutex> lock(queue_lock);
360         frame_queue.push_back(move(qf));
361         queue_changed.notify_all();
362 }
363
364 void VideoStream::schedule_faded_frame(steady_clock::time_point local_pts, int64_t output_pts,
365                                        function<void()> &&display_func,
366                                        QueueSpotHolder &&queue_spot_holder,
367                                        FrameOnDisk frame1_spec, FrameOnDisk frame2_spec,
368                                        float fade_alpha, const string &subtitle)
369 {
370         fprintf(stderr, "output_pts=%" PRId64 "  faded         input_pts=%" PRId64 ",%" PRId64 "  fade_alpha=%.2f\n", output_pts, frame1_spec.pts, frame2_spec.pts, fade_alpha);
371
372         // Get the temporary OpenGL resources we need for doing the fade.
373         // (We share these with interpolated frames, which is slightly
374         // overkill, but there's no need to waste resources on keeping
375         // separate pools around.)
376         BorrowedInterpolatedFrameResources resources;
377         {
378                 lock_guard<mutex> lock(queue_lock);
379                 if (interpolate_resources.empty()) {
380                         fprintf(stderr, "WARNING: Too many interpolated frames already in transit; dropping one.\n");
381                         return;
382                 }
383                 resources = BorrowedInterpolatedFrameResources(interpolate_resources.front().release());
384                 interpolate_resources.pop_front();
385         }
386
387         bool did_decode;
388
389         shared_ptr<Frame> frame1 = decode_jpeg_with_cache(frame1_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
390         shared_ptr<Frame> frame2 = decode_jpeg_with_cache(frame2_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
391
392         ycbcr_semiplanar_converter->prepare_chain_for_fade(frame1, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, global_flags.width, global_flags.height);
393
394         QueuedFrame qf;
395         qf.local_pts = local_pts;
396         qf.type = QueuedFrame::FADED;
397         qf.output_pts = output_pts;
398         qf.frame1 = frame1_spec;
399         qf.display_func = move(display_func);
400         qf.queue_spot_holder = move(queue_spot_holder);
401         qf.subtitle = subtitle;
402
403         qf.secondary_frame = frame2_spec;
404
405         // Subsample and split Cb/Cr.
406         chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, global_flags.width, global_flags.height, resources->cb_tex, resources->cr_tex);
407
408         // Read it down (asynchronously) to the CPU.
409         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
410         glBindBuffer(GL_PIXEL_PACK_BUFFER, resources->pbo);
411         check_error();
412         glGetTextureImage(resources->fade_y_output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 4, BUFFER_OFFSET(0));
413         check_error();
414         glGetTextureImage(resources->cb_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 3, BUFFER_OFFSET(global_flags.width * global_flags.height));
415         check_error();
416         glGetTextureImage(resources->cr_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 3 - (global_flags.width / 2) * global_flags.height, BUFFER_OFFSET(global_flags.width * global_flags.height + (global_flags.width / 2) * global_flags.height));
417         check_error();
418         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
419
420         // Set a fence we can wait for to make sure the CPU sees the read.
421         glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
422         check_error();
423         qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
424         check_error();
425         qf.resources = move(resources);
426         qf.local_pts = local_pts;
427
428         lock_guard<mutex> lock(queue_lock);
429         frame_queue.push_back(move(qf));
430         queue_changed.notify_all();
431 }
432
433 void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts,
434                                               int64_t output_pts, function<void(shared_ptr<Frame>)> &&display_func,
435                                               QueueSpotHolder &&queue_spot_holder,
436                                               FrameOnDisk frame1, FrameOnDisk frame2,
437                                               float alpha, FrameOnDisk secondary_frame, float fade_alpha, const string &subtitle,
438                                               bool play_audio)
439 {
440         if (secondary_frame.pts != -1) {
441                 fprintf(stderr, "output_pts=%" PRId64 "  interpolated  input_pts1=%" PRId64 " input_pts2=%" PRId64 " alpha=%.3f  secondary_pts=%" PRId64 "  fade_alpha=%.2f\n", output_pts, frame1.pts, frame2.pts, alpha, secondary_frame.pts, fade_alpha);
442         } else {
443                 fprintf(stderr, "output_pts=%" PRId64 "  interpolated  input_pts1=%" PRId64 " input_pts2=%" PRId64 " alpha=%.3f\n", output_pts, frame1.pts, frame2.pts, alpha);
444         }
445
446         // Get the temporary OpenGL resources we need for doing the interpolation.
447         BorrowedInterpolatedFrameResources resources;
448         {
449                 lock_guard<mutex> lock(queue_lock);
450                 if (interpolate_resources.empty()) {
451                         fprintf(stderr, "WARNING: Too many interpolated frames already in transit; dropping one.\n");
452                         return;
453                 }
454                 resources = BorrowedInterpolatedFrameResources(interpolate_resources.front().release());
455                 interpolate_resources.pop_front();
456         }
457
458         QueuedFrame qf;
459         qf.type = (secondary_frame.pts == -1) ? QueuedFrame::INTERPOLATED : QueuedFrame::FADED_INTERPOLATED;
460         qf.output_pts = output_pts;
461         qf.display_decoded_func = move(display_func);
462         qf.queue_spot_holder = move(queue_spot_holder);
463         qf.local_pts = local_pts;
464         qf.subtitle = subtitle;
465
466         if (play_audio) {
467                 qf.audio = frame_reader.read_frame(frame1, /*read_video=*/false, /*read_audio=*/true).audio;
468         }
469
470         check_error();
471
472         // Convert frame0 and frame1 to OpenGL textures.
473         for (size_t frame_no = 0; frame_no < 2; ++frame_no) {
474                 FrameOnDisk frame_spec = frame_no == 1 ? frame2 : frame1;
475                 bool did_decode;
476                 shared_ptr<Frame> frame = decode_jpeg_with_cache(frame_spec, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
477                 ycbcr_converter->prepare_chain_for_conversion(frame)->render_to_fbo(resources->input_fbos[frame_no], global_flags.width, global_flags.height);
478         }
479
480         glGenerateTextureMipmap(resources->input_tex);
481         check_error();
482         glGenerateTextureMipmap(resources->gray_tex);
483         check_error();
484
485         GLuint flow_tex;
486         if (last_flow_tex != 0 && frame1 == last_frame1 && frame2 == last_frame2) {
487                 // Reuse the flow from previous computation. This frequently happens
488                 // if we slow down by more than 2x, so that there are multiple interpolated
489                 // frames between each original.
490                 flow_tex = last_flow_tex;
491                 qf.flow_tex = 0;
492         } else {
493                 // Cache miss, so release last_flow_tex.
494                 qf.flow_tex = last_flow_tex;
495
496                 // Compute the flow.
497                 flow_tex = compute_flow->exec(resources->gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
498                 check_error();
499
500                 // Store the flow texture for possible reuse next frame.
501                 last_flow_tex = flow_tex;
502                 last_frame1 = frame1;
503                 last_frame2 = frame2;
504         }
505
506         if (secondary_frame.pts != -1) {
507                 // Fade. First kick off the interpolation.
508                 tie(qf.output_tex, ignore) = interpolate_no_split->exec(resources->input_tex, resources->gray_tex, flow_tex, global_flags.width, global_flags.height, alpha);
509                 check_error();
510
511                 // Now decode the image we are fading against.
512                 bool did_decode;
513                 shared_ptr<Frame> frame2 = decode_jpeg_with_cache(secondary_frame, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
514
515                 // Then fade against it, putting it into the fade Y' and CbCr textures.
516                 ycbcr_semiplanar_converter->prepare_chain_for_fade_from_texture(qf.output_tex, global_flags.width, global_flags.height, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, global_flags.width, global_flags.height);
517
518                 // Subsample and split Cb/Cr.
519                 chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, global_flags.width, global_flags.height, resources->cb_tex, resources->cr_tex);
520
521                 interpolate_no_split->release_texture(qf.output_tex);
522         } else {
523                 tie(qf.output_tex, qf.cbcr_tex) = interpolate->exec(resources->input_tex, resources->gray_tex, flow_tex, global_flags.width, global_flags.height, alpha);
524                 check_error();
525
526                 // Subsample and split Cb/Cr.
527                 chroma_subsampler->subsample_chroma(qf.cbcr_tex, global_flags.width, global_flags.height, resources->cb_tex, resources->cr_tex);
528         }
529
530         // We could have released qf.flow_tex here, but to make sure we don't cause a stall
531         // when trying to reuse it for the next frame, we can just as well hold on to it
532         // and release it only when the readback is done.
533         //
534         // TODO: This is maybe less relevant now that qf.flow_tex contains the texture we used
535         // _last_ frame, not this one.
536
537         // Read it down (asynchronously) to the CPU.
538         glPixelStorei(GL_PACK_ROW_LENGTH, 0);
539         glBindBuffer(GL_PIXEL_PACK_BUFFER, resources->pbo);
540         check_error();
541         if (secondary_frame.pts != -1) {
542                 glGetTextureImage(resources->fade_y_output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 4, BUFFER_OFFSET(0));
543         } else {
544                 glGetTextureImage(qf.output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 4, BUFFER_OFFSET(0));
545         }
546         check_error();
547         glGetTextureImage(resources->cb_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 3, BUFFER_OFFSET(global_flags.width * global_flags.height));
548         check_error();
549         glGetTextureImage(resources->cr_tex, 0, GL_RED, GL_UNSIGNED_BYTE, global_flags.width * global_flags.height * 3 - (global_flags.width / 2) * global_flags.height, BUFFER_OFFSET(global_flags.width * global_flags.height + (global_flags.width / 2) * global_flags.height));
550         check_error();
551         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
552
553         // Set a fence we can wait for to make sure the CPU sees the read.
554         glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
555         check_error();
556         qf.fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
557         check_error();
558         qf.resources = move(resources);
559
560         lock_guard<mutex> lock(queue_lock);
561         frame_queue.push_back(move(qf));
562         queue_changed.notify_all();
563 }
564
565 void VideoStream::schedule_refresh_frame(steady_clock::time_point local_pts,
566                                          int64_t output_pts, function<void()> &&display_func,
567                                          QueueSpotHolder &&queue_spot_holder, const string &subtitle)
568 {
569         QueuedFrame qf;
570         qf.type = QueuedFrame::REFRESH;
571         qf.output_pts = output_pts;
572         qf.display_func = move(display_func);
573         qf.queue_spot_holder = move(queue_spot_holder);
574         qf.subtitle = subtitle;
575
576         lock_guard<mutex> lock(queue_lock);
577         frame_queue.push_back(move(qf));
578         queue_changed.notify_all();
579 }
580
581 void VideoStream::schedule_silence(steady_clock::time_point local_pts, int64_t output_pts,
582                                    int64_t length_pts, QueueSpotHolder &&queue_spot_holder)
583 {
584         QueuedFrame qf;
585         qf.type = QueuedFrame::SILENCE;
586         qf.output_pts = output_pts;
587         qf.queue_spot_holder = move(queue_spot_holder);
588         qf.silence_length_pts = length_pts;
589
590         lock_guard<mutex> lock(queue_lock);
591         frame_queue.push_back(move(qf));
592         queue_changed.notify_all();
593 }
594
595 namespace {
596
597 shared_ptr<Frame> frame_from_pbo(void *contents, size_t width, size_t height)
598 {
599         size_t chroma_width = width / 2;
600
601         const uint8_t *y = (const uint8_t *)contents;
602         const uint8_t *cb = (const uint8_t *)contents + width * height;
603         const uint8_t *cr = (const uint8_t *)contents + width * height + chroma_width * height;
604
605         shared_ptr<Frame> frame(new Frame);
606         frame->y.reset(new uint8_t[width * height]);
607         frame->cb.reset(new uint8_t[chroma_width * height]);
608         frame->cr.reset(new uint8_t[chroma_width * height]);
609         for (unsigned yy = 0; yy < height; ++yy) {
610                 memcpy(frame->y.get() + width * yy, y + width * yy, width);
611                 memcpy(frame->cb.get() + chroma_width * yy, cb + chroma_width * yy, chroma_width);
612                 memcpy(frame->cr.get() + chroma_width * yy, cr + chroma_width * yy, chroma_width);
613         }
614         frame->is_semiplanar = false;
615         frame->width = width;
616         frame->height = height;
617         frame->chroma_subsampling_x = 2;
618         frame->chroma_subsampling_y = 1;
619         frame->pitch_y = width;
620         frame->pitch_chroma = chroma_width;
621         return frame;
622 }
623
624 }  // namespace
625
626 void VideoStream::encode_thread_func()
627 {
628         pthread_setname_np(pthread_self(), "VideoStream");
629         QSurface *surface = create_surface();
630         QOpenGLContext *context = create_context(surface);
631         bool ok = make_current(context, surface);
632         if (!ok) {
633                 fprintf(stderr, "Video stream couldn't get an OpenGL context\n");
634                 abort();
635         }
636
637         while (!should_quit) {
638                 QueuedFrame qf;
639                 {
640                         unique_lock<mutex> lock(queue_lock);
641
642                         // Wait until we have a frame to play.
643                         queue_changed.wait(lock, [this] {
644                                 return !frame_queue.empty() || should_quit;
645                         });
646                         if (should_quit) {
647                                 break;
648                         }
649                         steady_clock::time_point frame_start = frame_queue.front().local_pts;
650
651                         // Now sleep until the frame is supposed to start (the usual case),
652                         // _or_ clear_queue() happened.
653                         bool aborted;
654                         if (output_fast_forward) {
655                                 aborted = frame_queue.empty() || frame_queue.front().local_pts != frame_start;
656                         } else {
657                                 aborted = queue_changed.wait_until(lock, frame_start, [this, frame_start] {
658                                         return frame_queue.empty() || frame_queue.front().local_pts != frame_start;
659                                 });
660                         }
661                         if (aborted) {
662                                 // clear_queue() happened, so don't play this frame after all.
663                                 continue;
664                         }
665                         qf = move(frame_queue.front());
666                         frame_queue.pop_front();
667                 }
668
669                 // Hack: We mux the subtitle packet one time unit before the actual frame,
670                 // so that Nageru is sure to get it first.
671                 if (!qf.subtitle.empty()) {
672                         AVPacket pkt;
673                         av_init_packet(&pkt);
674                         pkt.stream_index = mux->get_subtitle_stream_idx();
675                         assert(pkt.stream_index != -1);
676                         pkt.data = (uint8_t *)qf.subtitle.data();
677                         pkt.size = qf.subtitle.size();
678                         pkt.flags = 0;
679                         pkt.duration = lrint(TIMEBASE / global_flags.output_framerate);  // Doesn't really matter for Nageru.
680                         mux->add_packet(pkt, qf.output_pts - 1, qf.output_pts - 1);
681                 }
682
683                 if (qf.type == QueuedFrame::ORIGINAL) {
684                         // Send the JPEG frame on, unchanged.
685                         string jpeg = move(*qf.encoded_jpeg);
686                         AVPacket pkt;
687                         av_init_packet(&pkt);
688                         pkt.stream_index = 0;
689                         pkt.data = (uint8_t *)jpeg.data();
690                         pkt.size = jpeg.size();
691                         pkt.flags = AV_PKT_FLAG_KEY;
692                         mux->add_packet(pkt, qf.output_pts, qf.output_pts);
693                         last_frame = move(jpeg);
694
695                         add_audio_or_silence(qf);
696                 } else if (qf.type == QueuedFrame::FADED) {
697                         glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
698
699                         shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height);
700
701                         // Now JPEG encode it, and send it on to the stream.
702                         string jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), global_flags.width, global_flags.height);
703
704                         AVPacket pkt;
705                         av_init_packet(&pkt);
706                         pkt.stream_index = 0;
707                         pkt.data = (uint8_t *)jpeg.data();
708                         pkt.size = jpeg.size();
709                         pkt.flags = AV_PKT_FLAG_KEY;
710                         mux->add_packet(pkt, qf.output_pts, qf.output_pts);
711                         last_frame = move(jpeg);
712
713                         add_audio_or_silence(qf);
714                 } else if (qf.type == QueuedFrame::INTERPOLATED || qf.type == QueuedFrame::FADED_INTERPOLATED) {
715                         glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
716
717                         // Send it on to display.
718                         shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height);
719                         if (qf.display_decoded_func != nullptr) {
720                                 qf.display_decoded_func(frame);
721                         }
722
723                         // Now JPEG encode it, and send it on to the stream.
724                         string jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), global_flags.width, global_flags.height);
725                         if (qf.flow_tex != 0) {
726                                 compute_flow->release_texture(qf.flow_tex);
727                         }
728                         if (qf.type != QueuedFrame::FADED_INTERPOLATED) {
729                                 interpolate->release_texture(qf.output_tex);
730                                 interpolate->release_texture(qf.cbcr_tex);
731                         }
732
733                         AVPacket pkt;
734                         av_init_packet(&pkt);
735                         pkt.stream_index = 0;
736                         pkt.data = (uint8_t *)jpeg.data();
737                         pkt.size = jpeg.size();
738                         pkt.flags = AV_PKT_FLAG_KEY;
739                         mux->add_packet(pkt, qf.output_pts, qf.output_pts);
740                         last_frame = move(jpeg);
741
742                         add_audio_or_silence(qf);
743                 } else if (qf.type == QueuedFrame::REFRESH) {
744                         AVPacket pkt;
745                         av_init_packet(&pkt);
746                         pkt.stream_index = 0;
747                         pkt.data = (uint8_t *)last_frame.data();
748                         pkt.size = last_frame.size();
749                         pkt.flags = AV_PKT_FLAG_KEY;
750                         mux->add_packet(pkt, qf.output_pts, qf.output_pts);
751
752                         add_audio_or_silence(qf);  // Definitely silence.
753                 } else if (qf.type == QueuedFrame::SILENCE) {
754                         add_silence(qf.output_pts, qf.silence_length_pts);
755                 } else {
756                         assert(false);
757                 }
758                 if (qf.display_func != nullptr) {
759                         qf.display_func();
760                 }
761         }
762 }
763
764 int VideoStream::write_packet2_thunk(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
765 {
766         VideoStream *video_stream = (VideoStream *)opaque;
767         return video_stream->write_packet2(buf, buf_size, type, time);
768 }
769
770 int VideoStream::write_packet2(uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
771 {
772         if (type == AVIO_DATA_MARKER_SYNC_POINT || type == AVIO_DATA_MARKER_BOUNDARY_POINT) {
773                 seen_sync_markers = true;
774         } else if (type == AVIO_DATA_MARKER_UNKNOWN && !seen_sync_markers) {
775                 // We don't know if this is a keyframe or not (the muxer could
776                 // avoid marking it), so we just have to make the best of it.
777                 type = AVIO_DATA_MARKER_SYNC_POINT;
778         }
779
780         if (type == AVIO_DATA_MARKER_HEADER) {
781                 stream_mux_header.append((char *)buf, buf_size);
782                 global_httpd->set_header(HTTPD::MAIN_STREAM, stream_mux_header);
783         } else {
784                 global_httpd->add_data(HTTPD::MAIN_STREAM, (char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT, time, AVRational{ AV_TIME_BASE, 1 });
785         }
786         return buf_size;
787 }
788
789 void VideoStream::add_silence(int64_t pts, int64_t length_pts)
790 {
791         // At 59.94, this will never quite add up (even discounting refresh frames,
792         // which have unpredictable length), but hopefully, the player in the other
793         // end should be able to stretch silence easily enough.
794         long num_samples = lrint(length_pts * double(OUTPUT_FREQUENCY) / double(TIMEBASE)) * 2;
795         uint8_t *zero = (uint8_t *)calloc(num_samples, sizeof(int32_t));
796
797         AVPacket pkt;
798         av_init_packet(&pkt);
799         pkt.stream_index = 1;
800         pkt.data = zero;
801         pkt.size = num_samples * sizeof(int32_t);
802         pkt.flags = AV_PKT_FLAG_KEY;
803         mux->add_packet(pkt, pts, pts);
804
805         free(zero);
806 }
807
808 void VideoStream::add_audio_or_silence(const QueuedFrame &qf)
809 {
810         if (qf.audio.empty()) {
811                 int64_t frame_length = lrint(double(TIMEBASE) / global_flags.output_framerate);
812                 add_silence(qf.output_pts, frame_length);
813         } else {
814                 AVPacket pkt;
815                 av_init_packet(&pkt);
816                 pkt.stream_index = 1;
817                 pkt.data = (uint8_t *)qf.audio.data();
818                 pkt.size = qf.audio.size();
819                 pkt.flags = AV_PKT_FLAG_KEY;
820                 mux->add_packet(pkt, qf.output_pts, qf.output_pts);
821         }
822 }