]> git.sesse.net Git - nageru/blob - mixer.cpp
Move chroma subsampling into its own function.
[nageru] / mixer.cpp
1 #define WIDTH 1280
2 #define HEIGHT 720
3
4 #undef Success
5
6 #include "mixer.h"
7
8 #include <assert.h>
9 #include <effect.h>
10 #include <effect_chain.h>
11 #include <effect_util.h>
12 #include <epoxy/egl.h>
13 #include <features.h>
14 #include <image_format.h>
15 #include <init.h>
16 #include <overlay_effect.h>
17 #include <padding_effect.h>
18 #include <resample_effect.h>
19 #include <resource_pool.h>
20 #include <saturation_effect.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <sys/time.h>
25 #include <time.h>
26 #include <util.h>
27 #include <white_balance_effect.h>
28 #include <ycbcr.h>
29 #include <ycbcr_input.h>
30 #include <cmath>
31 #include <condition_variable>
32 #include <cstddef>
33 #include <memory>
34 #include <mutex>
35 #include <string>
36 #include <thread>
37 #include <vector>
38
39 #include "bmusb.h"
40 #include "context.h"
41 #include "h264encode.h"
42 #include "pbo_frame_allocator.h"
43 #include "ref_counted_gl_sync.h"
44
45 class QOpenGLContext;
46
47 using namespace movit;
48 using namespace std;
49 using namespace std::placeholders;
50
51 Mixer *global_mixer = nullptr;
52
53 Mixer::Mixer(QSurface *surface1, QSurface *surface2, QSurface *surface3, QSurface *surface4)
54         : surface1(surface1), surface2(surface2), surface3(surface3), surface4(surface4)
55 {
56         CHECK(init_movit(MOVIT_SHADER_DIR, MOVIT_DEBUG_OFF));
57         check_error();
58
59         chain.reset(new EffectChain(WIDTH, HEIGHT));
60         check_error();
61
62         ImageFormat inout_format;
63         inout_format.color_space = COLORSPACE_sRGB;
64         inout_format.gamma_curve = GAMMA_sRGB;
65
66         YCbCrFormat ycbcr_format;
67         ycbcr_format.chroma_subsampling_x = 2;
68         ycbcr_format.chroma_subsampling_y = 1;
69         ycbcr_format.cb_x_position = 0.0;
70         ycbcr_format.cr_x_position = 0.0;
71         ycbcr_format.cb_y_position = 0.5;
72         ycbcr_format.cr_y_position = 0.5;
73         ycbcr_format.luma_coefficients = YCBCR_REC_601;
74         ycbcr_format.full_range = false;
75
76         input[0] = new YCbCrInput(inout_format, ycbcr_format, WIDTH, HEIGHT, YCBCR_INPUT_SPLIT_Y_AND_CBCR);
77         chain->add_input(input[0]);
78         input[1] = new YCbCrInput(inout_format, ycbcr_format, WIDTH, HEIGHT, YCBCR_INPUT_SPLIT_Y_AND_CBCR);
79         chain->add_input(input[1]);
80         resample_effect = chain->add_effect(new ResampleEffect(), input[0]);
81         padding_effect = chain->add_effect(new IntegralPaddingEffect());
82         float border_color[] = { 0.0f, 0.0f, 0.0f, 1.0f };
83         CHECK(padding_effect->set_vec4("border_color", border_color));
84
85         resample2_effect = chain->add_effect(new ResampleEffect(), input[1]);
86         Effect *saturation_effect = chain->add_effect(new SaturationEffect());
87         CHECK(saturation_effect->set_float("saturation", 0.3f));
88         Effect *wb_effect = chain->add_effect(new WhiteBalanceEffect());
89         CHECK(wb_effect->set_float("output_color_temperature", 3500.0));
90         padding2_effect = chain->add_effect(new IntegralPaddingEffect());
91
92         chain->add_effect(new OverlayEffect(), padding_effect, padding2_effect);
93
94         ycbcr_format.chroma_subsampling_x = 1;
95
96         chain->add_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
97         chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_format, YCBCR_OUTPUT_SPLIT_Y_AND_CBCR);
98         chain->set_dither_bits(8);
99         chain->set_output_origin(OUTPUT_ORIGIN_TOP_LEFT);
100         chain->finalize();
101
102         h264_encoder.reset(new H264Encoder(surface2, WIDTH, HEIGHT, "test.mp4"));
103
104         printf("Configuring first card...\n");
105         cards[0].usb = new BMUSBCapture(0x1edb, 0xbd3b);  // 0xbd4f
106         cards[0].usb->set_frame_callback(std::bind(&Mixer::bm_frame, this, 0, _1, _2, _3, _4, _5, _6, _7));
107         cards[0].frame_allocator.reset(new PBOFrameAllocator(1280 * 750 * 2 + 44));
108         cards[0].usb->set_video_frame_allocator(cards[0].frame_allocator.get());
109         cards[0].usb->configure_card();
110
111         if (NUM_CARDS == 2) {
112                 printf("Configuring second card...\n");
113                 cards[1].usb = new BMUSBCapture(0x1edb, 0xbd4f);
114                 cards[1].usb->set_frame_callback(std::bind(&Mixer::bm_frame, this, 1, _1, _2, _3, _4, _5, _6, _7));
115                 cards[1].frame_allocator.reset(new PBOFrameAllocator(1280 * 750 * 2 + 44));
116                 cards[1].usb->set_video_frame_allocator(cards[1].frame_allocator.get());
117                 cards[1].usb->configure_card();
118         }
119
120         BMUSBCapture::start_bm_thread();
121
122         for (int card_index = 0; card_index < NUM_CARDS; ++card_index) {
123                 cards[card_index].usb->start_bm_capture();
124                 input[card_index]->set_pixel_data(0, nullptr, 0);
125                 input[card_index]->set_pixel_data(1, nullptr, 0);
126         }
127
128         //chain->enable_phase_timing(true);
129
130         // Set up stuff for NV12 conversion.
131         resource_pool = chain->get_resource_pool();
132
133         // Cb/Cr shader.
134         string cbcr_vert_shader = read_file("vs-cbcr.130.vert");
135         string cbcr_frag_shader =
136                 "#version 130 \n"
137                 "in vec2 tc0; \n"
138                 "uniform sampler2D cbcr_tex; \n"
139                 "void main() { \n"
140                 "    gl_FragColor = texture2D(cbcr_tex, tc0); \n"
141                 "} \n";
142         cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader);
143 }
144
145 Mixer::~Mixer()
146 {
147         resource_pool->release_glsl_program(cbcr_program_num);
148         BMUSBCapture::stop_bm_thread();
149 }
150
151 void Mixer::bm_frame(int card_index, uint16_t timecode,
152                      FrameAllocator::Frame video_frame, size_t video_offset, uint16_t video_format,
153                      FrameAllocator::Frame audio_frame, size_t audio_offset, uint16_t audio_format)
154 {
155         CaptureCard *card = &cards[card_index];
156         if (!card->thread_initialized) {
157                 printf("initializing context for bmusb thread %d\n", card_index);
158                 eglBindAPI(EGL_OPENGL_API);
159                 card->context = create_context();
160                 if (!make_current(card->context, card->surface)) {
161                         printf("failed to create bmusb context\n");
162                         exit(1);
163                 }
164                 card->thread_initialized = true;
165         }       
166
167         if (video_frame.len - video_offset != 1280 * 750 * 2) {
168                 printf("dropping frame with wrong length (%ld)\n", video_frame.len - video_offset);
169                 FILE *fp = fopen("frame.raw", "wb");
170                 fwrite(video_frame.data, video_frame.len, 1, fp);
171                 fclose(fp);
172                 //exit(1);
173                 card->usb->get_video_frame_allocator()->release_frame(video_frame);
174                 card->usb->get_audio_frame_allocator()->release_frame(audio_frame);
175                 return;
176         }
177         {
178                 // Wait until the previous frame was consumed.
179                 std::unique_lock<std::mutex> lock(bmusb_mutex);
180                 card->new_data_ready_changed.wait(lock, [card]{ return !card->new_data_ready; });
181         }
182         GLuint pbo = (GLint)(intptr_t)video_frame.userdata;
183         check_error();
184         glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);
185         check_error();
186         glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, video_frame.size);
187         check_error();
188         //glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
189         //check_error();
190         GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);              
191         check_error();
192         assert(fence != nullptr);
193         {
194                 std::unique_lock<std::mutex> lock(bmusb_mutex);
195                 card->new_data_ready = true;
196                 card->new_frame = video_frame;
197                 card->new_data_ready_fence = fence;
198                 card->new_data_ready_changed.notify_all();
199         }
200
201         // Video frame will be released later.
202         card->usb->get_audio_frame_allocator()->release_frame(audio_frame);
203 }
204         
205 void Mixer::place_rectangle(Effect *resample_effect, Effect *padding_effect, float x0, float y0, float x1, float y1)
206 {
207         float srcx0 = 0.0f;
208         float srcx1 = 1.0f;
209         float srcy0 = 0.0f;
210         float srcy1 = 1.0f;
211
212         // Cull.
213         if (x0 > 1280.0 || x1 < 0.0 || y0 > 720.0 || y1 < 0.0) {
214                 CHECK(resample_effect->set_int("width", 1));
215                 CHECK(resample_effect->set_int("height", 1));
216                 CHECK(resample_effect->set_float("zoom_x", 1280.0));
217                 CHECK(resample_effect->set_float("zoom_y", 720.0));
218                 CHECK(padding_effect->set_int("left", 2000));
219                 CHECK(padding_effect->set_int("top", 2000));
220                 return; 
221         }
222
223         // Clip. (TODO: Clip on upper/left sides, too.)
224         if (x1 > 1280.0) {
225                 srcx1 = (1280.0 - x0) / (x1 - x0);
226                 x1 = 1280.0;
227         }
228         if (y1 > 720.0) {
229                 srcy1 = (720.0 - y0) / (y1 - y0);
230                 y1 = 720.0;
231         }
232
233         float x_subpixel_offset = x0 - floor(x0);
234         float y_subpixel_offset = y0 - floor(y0);
235
236         // Resampling must be to an integral number of pixels. Round up,
237         // and then add an extra pixel so we have some leeway for the border.
238         int width = int(ceil(x1 - x0)) + 1;
239         int height = int(ceil(y1 - y0)) + 1;
240         CHECK(resample_effect->set_int("width", width));
241         CHECK(resample_effect->set_int("height", height));
242
243         // Correct the discrepancy with zoom. (This will leave a small
244         // excess edge of pixels and subpixels, which we'll correct for soon.)
245         float zoom_x = (x1 - x0) / (width * (srcx1 - srcx0));
246         float zoom_y = (y1 - y0) / (height * (srcy1 - srcy0));
247         CHECK(resample_effect->set_float("zoom_x", zoom_x));
248         CHECK(resample_effect->set_float("zoom_y", zoom_y));
249         CHECK(resample_effect->set_float("zoom_center_x", 0.0f));
250         CHECK(resample_effect->set_float("zoom_center_y", 0.0f));
251
252         // Padding must also be to a whole-pixel offset.
253         CHECK(padding_effect->set_int("left", floor(x0)));
254         CHECK(padding_effect->set_int("top", floor(y0)));
255
256         // Correct _that_ discrepancy by subpixel offset in the resampling.
257         CHECK(resample_effect->set_float("left", -x_subpixel_offset / zoom_x));
258         CHECK(resample_effect->set_float("top", -y_subpixel_offset / zoom_y));
259
260         // Finally, adjust the border so it is exactly where we want it.
261         CHECK(padding_effect->set_float("border_offset_left", x_subpixel_offset));
262         CHECK(padding_effect->set_float("border_offset_right", x1 - (floor(x0) + width)));
263         CHECK(padding_effect->set_float("border_offset_top", y_subpixel_offset));
264         CHECK(padding_effect->set_float("border_offset_bottom", y1 - (floor(y0) + height)));
265 }
266         
267 void Mixer::thread_func()
268 {
269         cards[0].surface = surface3;
270 #if NUM_CARDS == 2
271         cards[1].surface = surface4;
272 #endif
273
274         eglBindAPI(EGL_OPENGL_API);
275         QOpenGLContext *context = create_context();
276         if (!make_current(context, surface1)) {
277                 printf("oops\n");
278                 exit(1);
279         }
280
281         struct timespec start, now;
282         clock_gettime(CLOCK_MONOTONIC, &start);
283
284         while (!should_quit) {
285                 ++frame;
286
287                 //int width0 = lrintf(848 * (1.0 + 0.2 * sin(frame * 0.02)));
288                 int width0 = 848;
289                 int height0 = lrintf(width0 * 9.0 / 16.0);
290
291                 //float top0 = 96 + 48 * sin(frame * 0.005);
292                 //float left0 = 96 + 48 * cos(frame * 0.006);
293                 float top0 = 48;
294                 float left0 = 16;
295                 float bottom0 = top0 + height0;
296                 float right0 = left0 + width0;
297
298                 int width1 = 384;
299                 int height1 = 216;
300         
301                 float bottom1 = 720 - 48;
302                 float right1 = 1280 - 16;
303                 float top1 = bottom1 - height1;
304                 float left1 = right1 - width1;
305         
306                 if (current_source == SOURCE_INPUT1) {
307                         top0 = 0.0;
308                         bottom0 = HEIGHT;
309                         left0 = 0.0;
310                         right0 = WIDTH;
311
312                         top1 = HEIGHT + 10;
313                         bottom1 = HEIGHT + 20;
314                         left1 = WIDTH + 10;
315                         right1 = WIDTH + 20;
316                 } else if (current_source == SOURCE_INPUT2) {
317                         top1 = 0.0;
318                         bottom1 = HEIGHT;
319                         left1 = 0.0;
320                         right1 = WIDTH;
321
322                         top0 = HEIGHT + 10;
323                         bottom0 = HEIGHT + 20;
324                         left0 = WIDTH + 10;
325                         right0 = WIDTH + 20;
326                 } else {
327                         float t = 0.5 + 0.5 * cos(frame * 0.006);
328                         float scale0 = 1.0 + t * (1280.0 / 848.0 - 1.0);
329                         float tx0 = 0.0 + t * (-16.0 * scale0);
330                         float ty0 = 0.0 + t * (-48.0 * scale0);
331
332                         top0 = top0 * scale0 + ty0;
333                         bottom0 = bottom0 * scale0 + ty0;
334                         left0 = left0 * scale0 + tx0;
335                         right0 = right0 * scale0 + tx0;
336
337                         top1 = top1 * scale0 + ty0;
338                         bottom1 = bottom1 * scale0 + ty0;
339                         left1 = left1 * scale0 + tx0;
340                         right1 = right1 * scale0 + tx0;
341                 }
342
343                 place_rectangle(resample_effect, padding_effect, left0, top0, right0, bottom0);
344                 place_rectangle(resample2_effect, padding2_effect, left1, top1, right1, bottom1);
345
346                 CaptureCard card_copy[NUM_CARDS];
347
348                 {
349                         std::unique_lock<std::mutex> lock(bmusb_mutex);
350
351                         // The first card is the master timer, so wait for it to have a new frame.
352                         // TODO: Make configurable, and with a timeout.
353                         cards[0].new_data_ready_changed.wait(lock, [this]{ return cards[0].new_data_ready; });
354
355                         for (int card_index = 0; card_index < NUM_CARDS; ++card_index) {
356                                 CaptureCard *card = &cards[card_index];
357                                 card_copy[card_index].usb = card->usb;
358                                 card_copy[card_index].new_data_ready = card->new_data_ready;
359                                 card_copy[card_index].new_frame = card->new_frame;
360                                 card_copy[card_index].new_data_ready_fence = card->new_data_ready_fence;
361                                 card->new_data_ready = false;
362                                 card->new_data_ready_changed.notify_all();
363                         }
364                 }
365
366                 vector<FrameAllocator::Frame> input_frames_to_release;
367         
368                 for (int card_index = 0; card_index < NUM_CARDS; ++card_index) {
369                         CaptureCard *card = &card_copy[card_index];
370                         if (!card->new_data_ready)
371                                 continue;
372
373                         // Now we're done with the previous frame, so we can definitely
374                         // release it when this is done rendering. (Actually, we could do
375                         // it one frame earlier, but before we have a new one, there's no
376                         // knowing when the current one is released.)
377                         if (bmusb_current_rendering_frame[card_index].owner != nullptr) {
378                                 input_frames_to_release.push_back(bmusb_current_rendering_frame[card_index]);
379                         }
380                         bmusb_current_rendering_frame[card_index] = card->new_frame;
381                         check_error();
382
383                         // The new texture might still be uploaded,
384                         // tell the GPU to wait until it's there.
385                         if (card->new_data_ready_fence)
386                                 glWaitSync(card->new_data_ready_fence, /*flags=*/0, GL_TIMEOUT_IGNORED);
387                         check_error();
388                         glDeleteSync(card->new_data_ready_fence);
389                         check_error();
390                         GLint input_tex_pbo = (GLint)(intptr_t)card->new_frame.userdata;
391                         input[card_index]->set_pixel_data(0, (unsigned char *)BUFFER_OFFSET((1280 * 750 * 2 + 44) / 2 + 1280 * 25 + 22), input_tex_pbo);
392                         input[card_index]->set_pixel_data(1, (unsigned char *)BUFFER_OFFSET(1280 * 25 + 22), input_tex_pbo);
393
394                         if (NUM_CARDS == 1) {
395                                 // Set to the other one, too.
396                                 input[1]->set_pixel_data(0, (unsigned char *)BUFFER_OFFSET((1280 * 750 * 2 + 44) / 2 + 1280 * 25 + 22), input_tex_pbo);
397                                 input[1]->set_pixel_data(1, (unsigned char *)BUFFER_OFFSET(1280 * 25 + 22), input_tex_pbo);
398                         }
399                 }
400
401                 GLuint y_tex, cbcr_tex;
402                 bool got_frame = h264_encoder->begin_frame(&y_tex, &cbcr_tex);
403                 assert(got_frame);
404
405                 // Render chain.
406                 GLuint cbcr_full_tex = resource_pool->create_2d_texture(GL_RG8, WIDTH, HEIGHT);
407                 GLuint rgba_tex = resource_pool->create_2d_texture(GL_RGBA8, WIDTH, HEIGHT);
408                 GLuint fbo = resource_pool->create_fbo(y_tex, cbcr_full_tex, rgba_tex);
409                 chain->render_to_fbo(fbo, WIDTH, HEIGHT);
410                 resource_pool->release_fbo(fbo);
411
412                 subsample_chroma(cbcr_full_tex, cbcr_tex);
413                 resource_pool->release_2d_texture(cbcr_full_tex);
414
415                 RefCountedGLsync fence(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
416                 check_error();
417                 h264_encoder->end_frame(fence, input_frames_to_release);
418
419                 // Store this frame for display. Remove the ready frame if any
420                 // (it was seemingly never used).
421                 {
422                         std::unique_lock<std::mutex> lock(display_frame_mutex);
423                         if (has_ready_display_frame) {
424                                 resource_pool->release_2d_texture(ready_display_frame.texnum);
425                                 ready_display_frame.ready_fence.reset();
426                         }
427                         ready_display_frame.texnum = rgba_tex;
428                         ready_display_frame.ready_fence = fence;
429                         has_ready_display_frame = true;
430                 }
431
432                 if (has_new_frame_ready_callback) {
433                         new_frame_ready_callback();
434                 }
435
436                 clock_gettime(CLOCK_MONOTONIC, &now);
437                 double elapsed = now.tv_sec - start.tv_sec +
438                         1e-9 * (now.tv_nsec - start.tv_nsec);
439                 if (frame % 100 == 0) {
440                         printf("%d frames in %.3f seconds = %.1f fps (%.1f ms/frame)\n",
441                                 frame, elapsed, frame / elapsed,
442                                 1e3 * elapsed / frame);
443                 //      chain->print_phase_timing();
444                 }
445
446                 // Reset every 100 frames, so that local variations in frame times
447                 // (especially for the first few frames, when the shaders are
448                 // compiled etc.) don't make it hard to measure for the entire
449                 // remaining duration of the program.
450                 if (frame == 10000) {
451                         frame = 0;
452                         start = now;
453                 }
454                 check_error();
455         }
456 }
457
458 void Mixer::subsample_chroma(GLuint src_tex, GLuint dst_tex)
459 {
460         GLuint vao;
461         glGenVertexArrays(1, &vao);
462         check_error();
463
464         float vertices[] = {
465                 0.0f, 2.0f,
466                 0.0f, 0.0f,
467                 2.0f, 0.0f
468         };
469
470         glBindVertexArray(vao);
471         check_error();
472
473         // Extract Cb/Cr.
474         GLuint fbo = resource_pool->create_fbo(dst_tex);
475         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
476         glViewport(0, 0, WIDTH/2, HEIGHT/2);
477         check_error();
478
479         glUseProgram(cbcr_program_num);
480         check_error();
481
482         glActiveTexture(GL_TEXTURE0);
483         check_error();
484         glBindTexture(GL_TEXTURE_2D, src_tex);
485         check_error();
486         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
487         check_error();
488         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
489         check_error();
490         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
491         check_error();
492
493         float chroma_offset_0[] = { -0.5f / WIDTH, 0.0f };
494         set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0);
495
496         GLuint position_vbo = fill_vertex_attribute(cbcr_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
497         GLuint texcoord_vbo = fill_vertex_attribute(cbcr_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices);  // Same as vertices.
498
499         glDrawArrays(GL_TRIANGLES, 0, 3);
500         check_error();
501
502         cleanup_vertex_attribute(cbcr_program_num, "position", position_vbo);
503         cleanup_vertex_attribute(cbcr_program_num, "texcoord", texcoord_vbo);
504
505         glUseProgram(0);
506         check_error();
507
508         resource_pool->release_fbo(fbo);
509         glDeleteVertexArrays(1, &vao);
510 }
511
512 bool Mixer::get_display_frame(DisplayFrame *frame)
513 {
514         std::unique_lock<std::mutex> lock(display_frame_mutex);
515         if (!has_current_display_frame && !has_ready_display_frame) {
516                 return false;
517         }
518
519         if (has_current_display_frame && has_ready_display_frame) {
520                 // We have a new ready frame. Toss the current one.
521                 resource_pool->release_2d_texture(current_display_frame.texnum);
522                 current_display_frame.ready_fence.reset();
523                 has_current_display_frame = false;
524         }
525         if (has_ready_display_frame) {
526                 assert(!has_current_display_frame);
527                 current_display_frame = ready_display_frame;
528                 ready_display_frame.ready_fence.reset();  // Drop the refcount.
529                 has_current_display_frame = true;
530                 has_ready_display_frame = false;
531         }
532
533         *frame = current_display_frame;
534         return true;
535 }
536
537 void Mixer::set_frame_ready_fallback(new_frame_ready_callback_t callback)
538 {
539         new_frame_ready_callback = callback;
540         has_new_frame_ready_callback = true;
541 }
542
543 void Mixer::start()
544 {
545         mixer_thread = std::thread(&Mixer::thread_func, this);
546 }
547
548 void Mixer::quit()
549 {
550         should_quit = true;
551         mixer_thread.join();
552 }
553
554 void Mixer::cut(Source source)
555 {
556         current_source = source;
557 }