]> git.sesse.net Git - pkanalytics/blob - video_widget.cpp
Make it possible to zoom the VideoWidget.
[pkanalytics] / video_widget.cpp
1 #define GL_GLEXT_PROTOTYPES
2
3 #include "video_widget.h"
4
5 #include <assert.h>
6 #include <pthread.h>
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/stat.h>
12 #include <unistd.h>
13
14 extern "C" {
15 #include <libavcodec/avcodec.h>
16 #include <libavformat/avformat.h>
17 #include <libavutil/avutil.h>
18 #include <libavutil/error.h>
19 #include <libavutil/frame.h>
20 #include <libavutil/imgutils.h>
21 #include <libavutil/mem.h>
22 #include <libavutil/pixfmt.h>
23 #include <libavutil/opt.h>
24 #include <libswscale/swscale.h>
25 }
26
27 #include <chrono>
28 #include <cstdint>
29 #include <utility>
30 #include <vector>
31 #include <unordered_set>
32
33 #include <QOpenGLFunctions>
34 #include <QWheelEvent>
35
36 using namespace std;
37 using namespace std::chrono;
38
39 namespace {
40
41 bool is_full_range(const AVPixFmtDescriptor *desc)
42 {
43         // This is horrible, but there's no better way that I know of.
44         return (strchr(desc->name, 'j') != nullptr);
45 }
46
47 AVPixelFormat decide_dst_format(AVPixelFormat src_format)
48 {
49         // If this is a non-Y'CbCr format, just convert to 4:4:4 Y'CbCr
50         // and be done with it. It's too strange to spend a lot of time on.
51         // (Let's hope there's no alpha.)
52         const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_format);
53         if (src_desc == nullptr ||
54             src_desc->nb_components != 3 ||
55             (src_desc->flags & AV_PIX_FMT_FLAG_RGB)) {
56                 return AV_PIX_FMT_YUV444P;
57         }
58
59         // The best for us would be Cb and Cr together if possible,
60         // but FFmpeg doesn't support that except in the special case of
61         // NV12, so we need to go to planar even for the case of NV12.
62         // Thus, look for the closest (but no worse) 8-bit planar Y'CbCr format
63         // that matches in color range. (This will also include the case of
64         // the source format already being acceptable.)
65         bool src_full_range = is_full_range(src_desc);
66         const char *best_format = "yuv444p";
67         unsigned best_score = numeric_limits<unsigned>::max();
68         for (const AVPixFmtDescriptor *desc = av_pix_fmt_desc_next(nullptr);
69              desc;
70              desc = av_pix_fmt_desc_next(desc)) {
71                 // Find planar Y'CbCr formats only.
72                 if (desc->nb_components != 3) continue;
73                 if (desc->flags & AV_PIX_FMT_FLAG_RGB) continue;
74                 if (!(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) continue;
75                 if (desc->comp[0].plane != 0 ||
76                     desc->comp[1].plane != 1 ||
77                     desc->comp[2].plane != 2) continue;
78
79                 // 8-bit formats only.
80                 if (desc->flags & AV_PIX_FMT_FLAG_BE) continue;
81                 if (desc->comp[0].depth != 8) continue;
82
83                 // Same or better chroma resolution only.
84                 int chroma_w_diff = src_desc->log2_chroma_w - desc->log2_chroma_w;
85                 int chroma_h_diff = src_desc->log2_chroma_h - desc->log2_chroma_h;
86                 if (chroma_w_diff < 0 || chroma_h_diff < 0)
87                         continue;
88
89                 // Matching full/limited range only.
90                 if (is_full_range(desc) != src_full_range)
91                         continue;
92
93                 // Pick something with as little excess chroma resolution as possible.
94                 unsigned score = (1 << (chroma_w_diff)) << chroma_h_diff;
95                 if (score < best_score) {
96                         best_score = score;
97                         best_format = desc->name;
98                 }
99         }
100         return av_get_pix_fmt(best_format);
101 }
102
103 }  // namespace
104
105 bool VideoWidget::process_queued_commands(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, int video_stream_index, bool *seeked)
106 {
107         // Process any queued commands from other threads.
108         vector<QueuedCommand> commands;
109         {
110                 lock_guard<mutex> lock(queue_mu);
111                 swap(commands, command_queue);
112         }
113
114         for (const QueuedCommand &cmd : commands) {
115                 switch (cmd.command) {
116                 case QueuedCommand::PAUSE:
117                         paused = true;
118                         break;
119                 case QueuedCommand::RESUME:
120                         paused = false;
121                         pts_origin = last_pts;
122                         start = next_frame_start = steady_clock::now();
123                         break;
124                 case QueuedCommand::SEEK:
125                 case QueuedCommand::SEEK_ABSOLUTE:
126                         // Dealt with below.
127                         break;
128                 }
129         }
130
131         // Combine all seeks into one big one. (There are edge cases where this is probably
132         // subtly wrong, but we'll live with it.)
133         int64_t base_pts = last_pts;
134         int64_t relative_seek_ms = 0;
135         int64_t relative_seek_frames = 0;
136         for (const QueuedCommand &cmd : commands) {
137                 if (cmd.command == QueuedCommand::SEEK) {
138                         relative_seek_ms += cmd.relative_seek_ms;
139                         relative_seek_frames += cmd.relative_seek_frames;
140                 } else if (cmd.command == QueuedCommand::SEEK_ABSOLUTE) {
141                         base_pts = cmd.seek_ms;
142                         relative_seek_ms = 0;
143                         relative_seek_frames = 0;
144                 }
145         }
146         int64_t relative_seek_pts = av_rescale_q(relative_seek_ms, AVRational{ 1, 1000 }, video_timebase);
147         if (relative_seek_ms != 0 && relative_seek_pts == 0) {
148                 // Just to be sure rounding errors don't move us into nothingness.
149                 relative_seek_pts = (relative_seek_ms > 0) ? 1 : -1;
150         }
151         int64_t goal_pts = base_pts + relative_seek_pts;
152         if (goal_pts != last_pts || relative_seek_frames < 0) {
153                 avcodec_flush_buffers(video_codec_ctx);
154                 queued_frames.clear();
155
156                 // Seek to the last keyframe before this point.
157                 int64_t seek_pts = goal_pts;
158                 if (relative_seek_frames < 0) {
159                         // If we're frame-skipping backwards, add 100 ms of slop for each frame
160                         // so we're fairly certain we are able to see the ones we want.
161                         seek_pts -= av_rescale_q(-relative_seek_frames, AVRational{ 1, 10 }, video_timebase);
162                 }
163                 av_seek_frame(format_ctx, video_stream_index, seek_pts, AVSEEK_FLAG_BACKWARD);
164
165                 // Decode frames until EOF, or until we see something past our seek point.
166                 std::deque<AVFrameWithDeleter> queue;
167                 for ( ;; ) {
168                         bool error = false;
169                         AVFrameWithDeleter frame = decode_frame(format_ctx, video_codec_ctx,
170                                 pathname, video_stream_index, &error);
171                         if (frame == nullptr || error) {
172                                 break;
173                         }
174
175                         int64_t frame_pts = frame->pts;
176                         if (relative_seek_frames < 0) {
177                                 // Buffer this frame; don't display it unless we know it's the Nth-latest.
178                                 queue.push_back(std::move(frame));
179                                 if (queue.size() > uint64_t(-relative_seek_frames) + 1) {
180                                         queue.pop_front();
181                                 }
182                         }
183                         if (frame_pts >= goal_pts) {
184                                 if (relative_seek_frames > 0) {
185                                         --relative_seek_frames;
186                                 } else {
187                                         if (relative_seek_frames < 0) {
188                                                 // Hope we have the right amount.
189                                                 // The rest will remain in the queue for when we play forward again.
190                                                 frame = std::move(queue.front());
191                                                 queue.pop_front();
192                                                 queued_frames = std::move(queue);
193                                         }
194                                         current_frame.reset(new Frame(make_video_frame(frame.get())));
195                                         update();
196                                         store_pts(frame->pts);
197                                         break;
198                                 }
199                         }
200                 }
201
202                 // NOTE: We keep pause status as-is.
203
204                 pts_origin = last_pts;
205                 start = next_frame_start = last_frame = steady_clock::now();
206                 if (seeked) {
207                         *seeked = true;
208                 }
209         } else if (relative_seek_frames > 0) {
210                 // The base PTS is fine, we only need to skip a few frames forwards.
211                 while (relative_seek_frames > 1) {
212                         // Eat a frame (ignore errors).
213                         bool error;
214                         decode_frame(format_ctx, video_codec_ctx, pathname, video_stream_index, &error);
215                         --relative_seek_frames;
216                 }
217
218                 // Display the last one.
219                 bool error;
220                 AVFrameWithDeleter frame = decode_frame(format_ctx, video_codec_ctx,
221                         pathname, video_stream_index, &error);
222                 if (frame == nullptr || error) {
223                         return true;
224                 }
225                 current_frame.reset(new Frame(make_video_frame(frame.get())));
226                 update();
227                 store_pts(frame->pts);
228         }
229         return false;
230 }
231
232 VideoWidget::VideoWidget(QWidget *parent)
233         : QOpenGLWidget(parent) {}
234
235 GLuint compile_shader(const string &shader_src, GLenum type)
236 {
237         GLuint obj = glCreateShader(type);
238         const GLchar* source[] = { shader_src.data() };
239         const GLint length[] = { (GLint)shader_src.size() };
240         glShaderSource(obj, 1, source, length);
241         glCompileShader(obj);
242
243         GLchar info_log[4096];
244         GLsizei log_length = sizeof(info_log) - 1;
245         glGetShaderInfoLog(obj, log_length, &log_length, info_log);
246         info_log[log_length] = 0;
247         if (strlen(info_log) > 0) {
248                 fprintf(stderr, "Shader compile log: %s\n", info_log);
249         }
250
251         GLint status;
252         glGetShaderiv(obj, GL_COMPILE_STATUS, &status);
253         if (status == GL_FALSE) {
254                 // Add some line numbers to easier identify compile errors.
255                 string src_with_lines = "/*   1 */ ";
256                 size_t lineno = 1;
257                 for (char ch : shader_src) {
258                         src_with_lines.push_back(ch);
259                         if (ch == '\n') {
260                                 char buf[32];
261                                 snprintf(buf, sizeof(buf), "/* %3zu */ ", ++lineno);
262                                 src_with_lines += buf;
263                         }
264                 }
265
266                 fprintf(stderr, "Failed to compile shader:\n%s\n", src_with_lines.c_str());
267                 exit(1);
268         }
269
270         return obj;
271 }
272
273 void VideoWidget::initializeGL()
274 {
275         glDisable(GL_BLEND);
276         glDisable(GL_DEPTH_TEST);
277         glDepthMask(GL_FALSE);
278         glCreateTextures(GL_TEXTURE_2D, 3, tex);
279
280         ycbcr_vertex_shader = compile_shader(R"(
281 #version 460 core
282
283 layout(location = 0) in vec2 position;
284 layout(location = 1) in vec2 texcoord;
285 out vec2 tc;
286
287 void main()
288 {
289         // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is:
290         //
291         //   2.000  0.000  0.000 -1.000
292         //   0.000  2.000  0.000 -1.000
293         //   0.000  0.000 -2.000 -1.000
294         //   0.000  0.000  0.000  1.000
295         gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0);
296         tc = texcoord;
297         tc.y = 1.0f - tc.y;
298 }
299 )", GL_VERTEX_SHADER);
300         ycbcr_fragment_shader = compile_shader(R"(
301 #version 460 core
302
303 layout(location = 0) uniform sampler2D tex_y;
304 layout(location = 1) uniform sampler2D tex_cb;
305 layout(location = 2) uniform sampler2D tex_cr;
306 layout(location = 3) uniform vec2 cbcr_offset;
307
308 in vec2 tc;
309 out vec4 FragColor;
310
311 // Computed statically by Movit, for limited-range BT.709.
312 // (We don't check whether the input could be BT.601 or BT.2020 currently, or full-range)
313 const mat3 inv_ycbcr_matrix = mat3(
314         1.16438f, 1.16438f, 1.16438f,
315         0.0f, -0.21325f, 2.11240f,
316         1.79274f, -0.53291f, 0.0f
317 );
318
319 void main()
320 {
321         if (tc.x < 0.0 || tc.x > 1.0 || tc.y < 0.0 || tc.y > 1.0) {
322                 FragColor.rgba = vec4(0.0f, 0.0f, 0.0f, 1.0f);
323                 return;
324         }
325
326         vec3 ycbcr;
327         ycbcr.r = texture(tex_y, tc).r;
328         ycbcr.g = texture(tex_cb, tc + cbcr_offset).r;
329         ycbcr.b = texture(tex_cr, tc + cbcr_offset).r;
330         ycbcr -= vec3(16.0f / 255.0f, 128.0f / 255.0f, 128.0f / 255.0f);
331         FragColor.rgb = inv_ycbcr_matrix * ycbcr;
332         FragColor.a = 1.0f;
333 }
334 )", GL_FRAGMENT_SHADER);
335         ycbcr_program = glCreateProgram();
336         glAttachShader(ycbcr_program, ycbcr_vertex_shader);
337         glAttachShader(ycbcr_program, ycbcr_fragment_shader);
338         glLinkProgram(ycbcr_program);
339
340         GLint success;
341         glGetProgramiv(ycbcr_program, GL_LINK_STATUS, &success);
342         if (success == GL_FALSE) {
343                 GLchar error_log[1024] = {0};
344                 glGetProgramInfoLog(ycbcr_program, 1024, nullptr, error_log);
345                 fprintf(stderr, "Error linking program: %s\n", error_log);
346                 exit(1);
347         }
348
349         glCreateSamplers(1, &bilinear_sampler);
350         glSamplerParameteri(bilinear_sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);
351         glSamplerParameteri(bilinear_sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
352         glSamplerParameteri(bilinear_sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
353         glSamplerParameteri(bilinear_sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
354 }
355
356 void VideoWidget::resizeGL(int w, int h)
357 {
358         glViewport(0, 0, w, h);
359         display_aspect = double(w) / h;
360 }
361
362 int num_levels(GLuint width, GLuint height)
363 {
364         int levels = 1;
365         while (width > 1 || height > 1) {
366                 width = max(width / 2, 1u);
367                 height = max(height / 2, 1u);
368                 ++levels;
369         }
370         return levels;
371 }
372
373 void VideoWidget::paintGL()
374 {
375         std::shared_ptr<Frame> frame = current_frame;
376         if (frame == nullptr) {
377                 glClear(GL_COLOR_BUFFER_BIT);
378                 return;
379         }
380
381         glUseProgram(ycbcr_program);
382         if (frame->width != last_width || frame->height != last_height) {
383                 glTextureStorage2D(tex[0], num_levels(frame->width, frame->height), GL_R8, frame->width, frame->height);
384         }
385         if (frame->chroma_width != last_chroma_width || frame->chroma_height != last_chroma_height) {
386                 for (GLuint num : { tex[1], tex[2] }) {
387                         glTextureStorage2D(num, num_levels(frame->chroma_width, frame->chroma_height), GL_R8, frame->chroma_width, frame->chroma_height);
388                 }
389         }
390
391         glTextureSubImage2D(tex[0], 0, 0, 0, frame->width, frame->height, GL_RED, GL_UNSIGNED_BYTE, frame->data.get());
392         glGenerateTextureMipmap(tex[0]);
393
394         glTextureSubImage2D(tex[1], 0, 0, 0, frame->chroma_width, frame->chroma_height, GL_RED, GL_UNSIGNED_BYTE, frame->data.get() + frame->width * frame->height);
395         glGenerateTextureMipmap(tex[1]);
396
397         glTextureSubImage2D(tex[2], 0, 0, 0, frame->chroma_width, frame->chroma_height, GL_RED, GL_UNSIGNED_BYTE, frame->data.get() + frame->width * frame->height + frame->chroma_width * frame->chroma_height);
398         glGenerateTextureMipmap(tex[2]);
399
400         glBindTextureUnit(0, tex[0]);
401         glBindTextureUnit(1, tex[1]);
402         glBindTextureUnit(2, tex[2]);
403         glBindSampler(0, bilinear_sampler);
404         glBindSampler(1, bilinear_sampler);
405         glBindSampler(2, bilinear_sampler);
406         glProgramUniform1i(ycbcr_program, 0, 0);
407         glProgramUniform1i(ycbcr_program, 1, 1);
408         glProgramUniform1i(ycbcr_program, 2, 2);
409         glProgramUniform2f(ycbcr_program, 3, cbcr_offset[0], -cbcr_offset[1]);
410
411         float tx1 = 0.0f;
412         float tx2 = 1.0f;
413         float ty1 = 0.0f;
414         float ty2 = 1.0f;
415
416         double video_aspect = double(frame->width) / frame->height;
417         if (display_aspect > video_aspect) {
418                 double extra_width = frame->height * display_aspect - frame->width;
419                 tx1 = -0.5 * extra_width / frame->width;
420                 tx2 = 1.0 + 0.5 * extra_width / frame->width;
421         } else if (display_aspect < video_aspect) {
422                 double extra_height = frame->width / display_aspect - frame->height;
423                 ty1 = -0.5 * extra_height / frame->height;
424                 ty2 = 1.0 + 0.5 * extra_height / frame->height;
425         }
426
427         glBegin(GL_QUADS);
428
429         // (0,0)
430         glVertexAttrib2f(1, tx1, ty1);
431         glVertex2f(zoom_matrix[2 * 3 + 0], zoom_matrix[2 * 3 + 1]);
432
433         // (0,1)
434         glVertexAttrib2f(1, tx1, ty2);
435         glVertex2f(zoom_matrix[1 * 3 + 0] + zoom_matrix[2 * 3 + 0], zoom_matrix[1 * 3 + 1] + zoom_matrix[2 * 3 + 1]);
436
437         // (1,1)
438         glVertexAttrib2f(1, tx2, ty2);
439         glVertex2f(zoom_matrix[0 * 3 + 0] + zoom_matrix[1 * 3 + 0] + zoom_matrix[2 * 3 + 0],
440                    zoom_matrix[1 * 3 + 0] + zoom_matrix[1 * 3 + 1] + zoom_matrix[2 * 3 + 1]);
441
442         // (1,0)
443         glVertexAttrib2f(1, tx2, ty1);
444         glVertex2f(zoom_matrix[0 * 3 + 0] + zoom_matrix[2 * 3 + 0],
445                    zoom_matrix[1 * 3 + 0] + zoom_matrix[2 * 3 + 1]);
446
447         glEnd();
448 }
449
450 void matmul3x3(const double a[9], const double b[9], double res[9])
451 {
452         for (int i = 0; i < 3; ++i) {
453                 for (int j = 0; j < 3; ++j) {
454                         double sum = 0.0;
455                         for (int k = 0; k < 3; ++k) {
456                                 sum += a[i * 3 + k] * b[k * 3 + j];
457                         }
458                         res[i * 3 + j] = sum;
459                 }
460         }
461 }
462
463 void VideoWidget::wheelEvent(QWheelEvent *event)
464 {
465         int delta = event->angleDelta().y();
466         if (delta == 0) {
467                 return;
468         }
469         double x = event->position().x() / width();
470         double y = 1.0 - event->position().y() / height();
471         double zoom = delta > 0 ? pow(1.01, delta) : pow(1/1.01, -delta);
472
473         const double inv_translation_matrix[9] = {
474                 1.0, 0.0, 0.0,
475                 0.0, 1.0, 0.0,
476                 -x, -y, 1.0
477         };
478         const double scale_matrix[9] = {
479                 zoom, 0.0, 0.0,
480                 0.0, zoom, 0.0,
481                 0.0, 0.0, 1.0
482         };
483         const double translation_matrix[9] = {
484                 1.0, 0.0, 0.0,
485                 0.0, 1.0, 0.0,
486                 x, y, 1.0
487         };
488         double tmp1[9], tmp2[9];
489         matmul3x3(zoom_matrix, inv_translation_matrix, tmp1);
490         matmul3x3(tmp1, scale_matrix, tmp2);
491         matmul3x3(tmp2, translation_matrix, zoom_matrix);
492
493         fixup_zoom_matrix();
494 }
495
496 // Normalize the matrix so that we never get skew or similar,
497 // and also never can zoom or pan too far out.
498 void VideoWidget::fixup_zoom_matrix()
499 {
500         // Correct for any numerical errors (we know the matrix must be orthogonal
501         // and have zero rotation).
502         zoom_matrix[4] = zoom_matrix[0];
503         zoom_matrix[1] = zoom_matrix[2] = zoom_matrix[3] = zoom_matrix[5] = 0.0;
504         zoom_matrix[8] = 1.0;
505
506         // We can't zoom further out than 1:1. (Perhaps it would be nice to
507         // reuse the last zoom-in point to do this, but the center will have to do
508         // for now.)
509         if (zoom_matrix[0] < 1.0) {
510                 const double zoom = 1.0 / zoom_matrix[0];
511                 const double inv_translation_matrix[9] = {
512                         1.0, 0.0, 0.0,
513                         0.0, 1.0, 0.0,
514                         -0.5, -0.5, 1.0
515                 };
516                 const double scale_matrix[9] = {
517                         zoom, 0.0, 0.0,
518                         0.0, zoom, 0.0,
519                         0.0, 0.0, 1.0
520                 };
521                 const double translation_matrix[9] = {
522                         1.0, 0.0, 0.0,
523                         0.0, 1.0, 0.0,
524                         0.5, 0.5, 1.0
525                 };
526                 double tmp1[9], tmp2[9];
527                 matmul3x3(zoom_matrix, inv_translation_matrix, tmp1);
528                 matmul3x3(tmp1, scale_matrix, tmp2);
529                 matmul3x3(tmp2, translation_matrix, zoom_matrix);
530         }
531
532         // Looking at the points we'll draw with glVertex2f(), make sure none of them are
533         // inside the square (which would generally mean we've panned ourselves out-of-bounds).
534         // We simply adjust the translation, which is possible because we fixed scaling above.
535         zoom_matrix[6] = min(zoom_matrix[6], 0.0);  // Left side (x=0).
536         zoom_matrix[7] = min(zoom_matrix[7], 0.0);  // Bottom side (y=0).
537         zoom_matrix[6] = std::max(zoom_matrix[6], 1.0 - zoom_matrix[0]);  // Right side (x=1).
538         zoom_matrix[7] = std::max(zoom_matrix[7], 1.0 - zoom_matrix[4]);  // Top side (y=1).
539 }
540
541 void VideoWidget::open(const string &filename)
542 {
543         stop();
544         internal_rewind();
545         pathname = filename;
546         play();
547 }
548
549 void VideoWidget::play()
550 {
551         if (running) {
552                 std::lock_guard<std::mutex> lock(queue_mu);
553                 command_queue.push_back(QueuedCommand { QueuedCommand::RESUME });
554                 producer_thread_should_quit.wakeup();
555                 return;
556         }
557         running = true;
558         producer_thread_should_quit.unquit();
559         producer_thread = std::thread(&VideoWidget::producer_thread_func, this);
560 }
561
562 void VideoWidget::pause()
563 {
564         if (!running) {
565                 return;
566         }
567         std::lock_guard<std::mutex> lock(queue_mu);
568         command_queue.push_back(QueuedCommand { QueuedCommand::PAUSE });
569         producer_thread_should_quit.wakeup();
570 }
571
572 void VideoWidget::seek(int64_t relative_seek_ms)
573 {
574         if (!running) {
575                 return;
576         }
577         std::lock_guard<std::mutex> lock(queue_mu);
578         command_queue.push_back(QueuedCommand { QueuedCommand::SEEK, relative_seek_ms, 0, 0 });
579         producer_thread_should_quit.wakeup();
580 }
581
582 void VideoWidget::seek_frames(int64_t relative_seek_frames)
583 {
584         if (!running) {
585                 return;
586         }
587         std::lock_guard<std::mutex> lock(queue_mu);
588         command_queue.push_back(QueuedCommand { QueuedCommand::SEEK, 0, relative_seek_frames, 0 });
589         producer_thread_should_quit.wakeup();
590 }
591
592 void VideoWidget::seek_absolute(int64_t position_ms)
593 {
594         if (!running) {
595                 return;
596         }
597         std::lock_guard<std::mutex> lock(queue_mu);
598         command_queue.push_back(QueuedCommand { QueuedCommand::SEEK_ABSOLUTE, 0, 0, position_ms });
599         producer_thread_should_quit.wakeup();
600 }
601
602 void VideoWidget::stop()
603 {
604         if (!running) {
605                 return;
606         }
607         running = false;
608         producer_thread_should_quit.quit();
609         producer_thread.join();
610 }
611
612 void VideoWidget::producer_thread_func()
613 {
614         if (!producer_thread_should_quit.should_quit()) {
615                 if (!play_video(pathname)) {
616                         // TODO: Send the error back to the UI somehow.
617                 }
618         }
619 }
620
621 void VideoWidget::internal_rewind()
622 {
623         pts_origin = last_pts = 0;
624         last_position = 0;
625         start = next_frame_start = steady_clock::now();
626 }
627
628 template<AVHWDeviceType type>
629 AVPixelFormat get_hw_format(AVCodecContext *ctx, const AVPixelFormat *fmt)
630 {
631         bool found_config_of_right_type = false;
632         for (int i = 0;; ++i) {  // Termination condition inside loop.
633                 const AVCodecHWConfig *config = avcodec_get_hw_config(ctx->codec, i);
634                 if (config == nullptr) {  // End of list.
635                         break;
636                 }
637                 if (!(config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) ||
638                     config->device_type != type) {
639                         // Not interesting for us.
640                         continue;
641                 }
642
643                 // We have a config of the right type, but does it actually support
644                 // the pixel format we want? (Seemingly, FFmpeg's way of signaling errors
645                 // is to just replace the pixel format with a software-decoded one,
646                 // such as yuv420p.)
647                 found_config_of_right_type = true;
648                 for (const AVPixelFormat *fmt_ptr = fmt; *fmt_ptr != -1; ++fmt_ptr) {
649                         if (config->pix_fmt == *fmt_ptr) {
650                                 fprintf(stderr, "Initialized '%s' hardware decoding for codec '%s'.\n",
651                                         av_hwdevice_get_type_name(type), ctx->codec->name);
652                                 if (ctx->profile == FF_PROFILE_H264_BASELINE) {
653                                         fprintf(stderr, "WARNING: Stream claims to be H.264 Baseline, which is generally poorly supported in hardware decoders.\n");
654                                         fprintf(stderr, "         Consider encoding it as Constrained Baseline, Main or High instead.\n");
655                                         fprintf(stderr, "         Decoding might fail and fall back to software.\n");
656                                 }
657                                 return config->pix_fmt;
658                         }
659                 }
660                 fprintf(stderr, "Decoder '%s' supports only these pixel formats:", ctx->codec->name);
661                 unordered_set<AVPixelFormat> seen;
662                 for (const AVPixelFormat *fmt_ptr = fmt; *fmt_ptr != -1; ++fmt_ptr) {
663                         if (!seen.count(*fmt_ptr)) {
664                                 fprintf(stderr, " %s", av_get_pix_fmt_name(*fmt_ptr));
665                                 seen.insert(*fmt_ptr);
666                         }
667                 }
668                 fprintf(stderr, " (wanted %s for hardware acceleration)\n", av_get_pix_fmt_name(config->pix_fmt));
669
670         }
671
672         if (!found_config_of_right_type) {
673                 fprintf(stderr, "Decoder '%s' does not support device type '%s'.\n", ctx->codec->name, av_hwdevice_get_type_name(type));
674         }
675
676         // We found no VA-API formats, so take the first software format.
677         for (const AVPixelFormat *fmt_ptr = fmt; *fmt_ptr != -1; ++fmt_ptr) {
678                 if ((av_pix_fmt_desc_get(*fmt_ptr)->flags & AV_PIX_FMT_FLAG_HWACCEL) == 0) {
679                         fprintf(stderr, "Falling back to software format %s.\n", av_get_pix_fmt_name(*fmt_ptr));
680                         return *fmt_ptr;
681                 }
682         }
683
684         // Fallback: Just return anything. (Should never really happen.)
685         return fmt[0];
686 }
687
688 AVFrameWithDeleter VideoWidget::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx,
689         const std::string &pathname, int video_stream_index,
690         bool *error)
691 {
692         *error = false;
693
694         if (!queued_frames.empty()) {
695                 AVFrameWithDeleter frame = std::move(queued_frames.front());
696                 queued_frames.pop_front();
697                 return frame;
698         }
699
700         // Read packets until we have a frame or there are none left.
701         bool frame_finished = false;
702         AVFrameWithDeleter video_avframe = av_frame_alloc_unique();
703         bool eof = false;
704         do {
705                 AVPacket pkt;
706                 unique_ptr<AVPacket, decltype(av_packet_unref)*> pkt_cleanup(
707                         &pkt, av_packet_unref);
708                 av_init_packet(&pkt);
709                 pkt.data = nullptr;
710                 pkt.size = 0;
711                 if (av_read_frame(format_ctx, &pkt) == 0) {
712                         if (pkt.stream_index == video_stream_index) {
713                                 if (avcodec_send_packet(video_codec_ctx, &pkt) < 0) {
714                                         fprintf(stderr, "%s: Cannot send packet to video codec.\n", pathname.c_str());
715                                         *error = true;
716                                         return AVFrameWithDeleter(nullptr);
717                                 }
718                         }
719                 } else {
720                         eof = true;  // Or error, but ignore that for the time being.
721                 }
722
723                 // Decode video, if we have a frame.
724                 int err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
725                 if (err == 0) {
726                         frame_finished = true;
727                         break;
728                 } else if (err != AVERROR(EAGAIN)) {
729                         fprintf(stderr, "%s: Cannot receive frame from video codec.\n", pathname.c_str());
730                         *error = true;
731                         return AVFrameWithDeleter(nullptr);
732                 }
733         } while (!eof);
734
735         if (frame_finished)
736                 return video_avframe;
737         else
738                 return AVFrameWithDeleter(nullptr);
739 }
740
741 int find_stream_index(AVFormatContext *ctx, AVMediaType media_type)
742 {
743         for (unsigned i = 0; i < ctx->nb_streams; ++i) {
744                 if (ctx->streams[i]->codecpar->codec_type == media_type) {
745                         return i;
746                 }
747         }
748         return -1;
749 }
750
751 steady_clock::time_point compute_frame_start(int64_t frame_pts, int64_t pts_origin, const AVRational &video_timebase, const steady_clock::time_point &origin, double rate)
752 {
753         const duration<double> pts((frame_pts - pts_origin) * double(video_timebase.num) / double(video_timebase.den));
754         return origin + duration_cast<steady_clock::duration>(pts / rate);
755 }
756
757 bool VideoWidget::play_video(const string &pathname)
758 {
759         queued_frames.clear();
760         AVFormatContextWithCloser format_ctx = avformat_open_input_unique(pathname.c_str(), /*fmt=*/nullptr,
761                 /*options=*/nullptr);
762         if (format_ctx == nullptr) {
763                 fprintf(stderr, "%s: Error opening file\n", pathname.c_str());
764                 return false;
765         }
766
767         if (avformat_find_stream_info(format_ctx.get(), nullptr) < 0) {
768                 fprintf(stderr, "%s: Error finding stream info\n", pathname.c_str());
769                 return false;
770         }
771
772         int video_stream_index = find_stream_index(format_ctx.get(), AVMEDIA_TYPE_VIDEO);
773         if (video_stream_index == -1) {
774                 fprintf(stderr, "%s: No video stream found\n", pathname.c_str());
775                 return false;
776         }
777
778         // Open video decoder.
779         const AVCodecParameters *video_codecpar = format_ctx->streams[video_stream_index]->codecpar;
780         const AVCodec *video_codec = avcodec_find_decoder(video_codecpar->codec_id);
781
782         video_timebase = format_ctx->streams[video_stream_index]->time_base;
783         AVCodecContextWithDeleter video_codec_ctx = avcodec_alloc_context3_unique(nullptr);
784         if (avcodec_parameters_to_context(video_codec_ctx.get(), video_codecpar) < 0) {
785                 fprintf(stderr, "%s: Cannot fill video codec parameters\n", pathname.c_str());
786                 return false;
787         }
788         if (video_codec == nullptr) {
789                 fprintf(stderr, "%s: Cannot find video decoder\n", pathname.c_str());
790                 return false;
791         }
792
793         // Seemingly, it's not too easy to make something that just initializes
794         // “whatever goes”, so we don't get CUDA or VULKAN or whatever here
795         // without enumerating through several different types.
796         // VA-API and VDPAU will do for now. We prioritize VDPAU for the
797         // simple reason that there's a VA-API-via-VDPAU emulation for NVidia
798         // cards that seems to work, but just hangs when trying to transfer the frame.
799         //
800         // Note that we don't actually check codec support beforehand,
801         // so if you have a low-end VDPAU device but a high-end VA-API device,
802         // you lose out on the extra codec support from the latter.
803         AVBufferRef *hw_device_ctx = nullptr;
804         if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VDPAU, nullptr, nullptr, 0) >= 0) {
805                 video_codec_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
806                 video_codec_ctx->get_format = get_hw_format<AV_HWDEVICE_TYPE_VDPAU>;
807         } else if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VAAPI, nullptr, nullptr, 0) >= 0) {
808                 video_codec_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
809                 video_codec_ctx->get_format = get_hw_format<AV_HWDEVICE_TYPE_VAAPI>;
810         } else {
811                 fprintf(stderr, "Failed to initialize VA-API or VDPAU for FFmpeg acceleration. Decoding video in software.\n");
812         }
813
814         if (avcodec_open2(video_codec_ctx.get(), video_codec, nullptr) < 0) {
815                 fprintf(stderr, "%s: Cannot open video decoder\n", pathname.c_str());
816                 return false;
817         }
818         unique_ptr<AVCodecContext, decltype(avcodec_close)*> video_codec_ctx_cleanup(
819                 video_codec_ctx.get(), avcodec_close);
820
821         internal_rewind();
822
823         // Main loop.
824         int consecutive_errors = 0;
825         double rate = 1.0;
826         while (!producer_thread_should_quit.should_quit()) {
827                 if (process_queued_commands(format_ctx.get(), video_codec_ctx.get(), video_stream_index, /*seeked=*/nullptr)) {
828                         return true;
829                 }
830                 if (paused) {
831                         producer_thread_should_quit.sleep_for(hours(1));
832                         continue;
833                 }
834
835                 bool error;
836                 AVFrameWithDeleter frame = decode_frame(format_ctx.get(), video_codec_ctx.get(),
837                         pathname, video_stream_index, &error);
838                 if (error) {
839                         if (++consecutive_errors >= 100) {
840                                 fprintf(stderr, "More than 100 consecutive video frames, aborting playback.\n");
841                                 return false;
842                         } else {
843                                 continue;
844                         }
845                 } else {
846                         consecutive_errors = 0;
847                 }
848                 if (frame == nullptr) {
849                         // EOF.
850                         return false;
851                 }
852
853                 // Sleep until it's time to present this frame.
854                 for ( ;; ) {
855                         if (last_pts == 0 && pts_origin == 0) {
856                                 pts_origin = frame->pts;
857                         }
858                         steady_clock::time_point now = steady_clock::now();
859                         next_frame_start = compute_frame_start(frame->pts, pts_origin, video_timebase, start, rate);
860
861                         if (duration<double>(now - next_frame_start).count() >= 0.1) {
862                                 // If we don't have enough CPU to keep up, or if we have a live stream
863                                 // where the initial origin was somehow wrong, we could be behind indefinitely.
864                                 fprintf(stderr, "%s: Playback %.0f ms behind, resetting time scale\n",
865                                         pathname.c_str(),
866                                         1e3 * duration<double>(now - next_frame_start).count());
867                                 pts_origin = frame->pts;
868                                 start = next_frame_start = now;
869                         }
870                         bool finished_wakeup;
871                         finished_wakeup = producer_thread_should_quit.sleep_until(next_frame_start);
872                         if (finished_wakeup) {
873                                 current_frame.reset(new Frame(make_video_frame(frame.get())));
874                                 last_frame = steady_clock::now();
875                                 update();
876                                 break;
877                         } else {
878                                 if (producer_thread_should_quit.should_quit()) break;
879
880                                 bool seeked = false;
881                                 if (process_queued_commands(format_ctx.get(), video_codec_ctx.get(), video_stream_index, &seeked)) {
882                                         return true;
883                                 }
884
885                                 if (paused) {
886                                         // Just paused, so present the frame immediately and then go into deep sleep.
887                                         current_frame.reset(new Frame(make_video_frame(frame.get())));
888                                         last_frame = steady_clock::now();
889                                         update();
890                                         break;
891                                 }
892
893                                 // If we just seeked, drop this frame on the floor and be done.
894                                 if (seeked) {
895                                         break;
896                                 }
897                         }
898                 }
899                 store_pts(frame->pts);
900         }
901         return true;
902 }
903
904 void VideoWidget::store_pts(int64_t pts)
905 {
906         last_pts = pts;
907         last_position = lrint(pts * double(video_timebase.num) / double(video_timebase.den) * 1000);
908         emit position_changed(last_position);
909 }
910
911 // Taken from Movit (see the comment there for explanation)
912 float compute_chroma_offset(float pos, unsigned subsampling_factor, unsigned resolution)
913 {
914         float local_chroma_pos = (0.5 + pos * (subsampling_factor - 1)) / subsampling_factor;
915         if (fabs(local_chroma_pos - 0.5) < 1e-10) {
916                 // x + (-0) can be optimized away freely, as opposed to x + 0.
917                 return -0.0;
918         } else {
919                 return (0.5 - local_chroma_pos) / resolution;
920         }
921 }
922
923 VideoWidget::Frame VideoWidget::make_video_frame(const AVFrame *frame)
924 {
925         Frame video_frame;
926         AVFrameWithDeleter sw_frame;
927
928         if (frame->format == AV_PIX_FMT_VAAPI ||
929             frame->format == AV_PIX_FMT_VDPAU) {
930                 // Get the frame down to the CPU. (TODO: See if we can keep it
931                 // on the GPU all the way, since it will be going up again later.
932                 // However, this only works if the OpenGL GPU is the same one.)
933                 sw_frame = av_frame_alloc_unique();
934                 int err = av_hwframe_transfer_data(sw_frame.get(), frame, 0);
935                 if (err != 0) {
936                         fprintf(stderr, "%s: Cannot transfer hardware video frame to software.\n", pathname.c_str());
937                 } else {
938                         sw_frame->pts = frame->pts;
939                         sw_frame->pkt_duration = frame->pkt_duration;
940                         frame = sw_frame.get();
941                 }
942         }
943
944         if (sws_ctx == nullptr ||
945             sws_last_width != frame->width ||
946             sws_last_height != frame->height ||
947             sws_last_src_format != frame->format) {
948                 sws_dst_format = decide_dst_format(AVPixelFormat(frame->format));
949                 sws_ctx.reset(
950                         sws_getContext(frame->width, frame->height, AVPixelFormat(frame->format),
951                                 frame->width, frame->height, sws_dst_format,
952                                 SWS_BICUBIC, nullptr, nullptr, nullptr));
953                 sws_last_width = frame->width;
954                 sws_last_height = frame->height;
955                 sws_last_src_format = frame->format;
956         }
957         if (sws_ctx == nullptr) {
958                 fprintf(stderr, "Could not create scaler context\n");
959                 abort();
960         }
961
962         uint8_t *pic_data[4] = { nullptr, nullptr, nullptr, nullptr };
963         int linesizes[4] = { 0, 0, 0, 0 };
964         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sws_dst_format);
965
966         video_frame.width = frame->width;
967         video_frame.height = frame->height;
968         video_frame.chroma_width = AV_CEIL_RSHIFT(int(frame->width), desc->log2_chroma_w);
969         video_frame.chroma_height = AV_CEIL_RSHIFT(int(frame->height), desc->log2_chroma_h);
970
971         // We always assume left chroma placement for now.
972         cbcr_offset[0] = compute_chroma_offset(0.0f, 1 << desc->log2_chroma_w, video_frame.chroma_width);
973         cbcr_offset[1] = compute_chroma_offset(0.5f, 1 << desc->log2_chroma_h, video_frame.chroma_height);
974
975         size_t len = frame->width * frame->height + 2 * video_frame.chroma_width * video_frame.chroma_height;
976         video_frame.data.reset(new uint8_t[len]);
977
978         pic_data[0] = video_frame.data.get();
979         linesizes[0] = frame->width;
980
981         pic_data[1] = pic_data[0] + frame->width * frame->height;
982         linesizes[1] = video_frame.chroma_width;
983
984         pic_data[2] = pic_data[1] + video_frame.chroma_width * video_frame.chroma_height;
985         linesizes[2] = video_frame.chroma_width;
986
987         sws_scale(sws_ctx.get(), frame->data, frame->linesize, 0, frame->height, pic_data, linesizes);
988
989         return video_frame;
990 }
991