]> git.sesse.net Git - nageru/blob - flow_main.cpp
Allow symlinked frame files. Useful for testing.
[nageru] / flow_main.cpp
1 #define NO_SDL_GLEXT 1
2
3 #include "flow.h"
4 #include "gpu_timers.h"
5 #include "util.h"
6
7 #include <SDL2/SDL.h>
8 #include <SDL2/SDL_error.h>
9 #include <SDL2/SDL_events.h>
10 #include <SDL2/SDL_image.h>
11 #include <SDL2/SDL_keyboard.h>
12 #include <SDL2/SDL_mouse.h>
13 #include <SDL2/SDL_video.h>
14 #include <algorithm>
15 #include <assert.h>
16 #include <deque>
17 #include <epoxy/gl.h>
18 #include <getopt.h>
19 #include <map>
20 #include <memory>
21 #include <stack>
22 #include <stdio.h>
23 #include <unistd.h>
24 #include <vector>
25
26 #define BUFFER_OFFSET(i) ((char *)nullptr + (i))
27
28 using namespace std;
29
30 SDL_Window *window;
31
32 bool enable_warmup = false;
33 bool enable_variational_refinement = true;  // Just for debugging.
34 bool enable_interpolation = false;
35
36 extern float vr_alpha, vr_delta, vr_gamma;
37
38 // Structures for asynchronous readback. We assume everything is the same size (and GL_RG16F).
39 struct ReadInProgress {
40         GLuint pbo;
41         string filename0, filename1;
42         string flow_filename, ppm_filename;  // Either may be empty for no write.
43 };
44 stack<GLuint> spare_pbos;
45 deque<ReadInProgress> reads_in_progress;
46
47 enum MipmapPolicy {
48         WITHOUT_MIPMAPS,
49         WITH_MIPMAPS
50 };
51
52 GLuint load_texture(const char *filename, unsigned *width_ret, unsigned *height_ret, MipmapPolicy mipmaps)
53 {
54         SDL_Surface *surf = IMG_Load(filename);
55         if (surf == nullptr) {
56                 fprintf(stderr, "IMG_Load(%s): %s\n", filename, IMG_GetError());
57                 exit(1);
58         }
59
60         // For whatever reason, SDL doesn't support converting to YUV surfaces
61         // nor grayscale, so we'll do it ourselves.
62         SDL_Surface *rgb_surf = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_RGBA32, /*flags=*/0);
63         if (rgb_surf == nullptr) {
64                 fprintf(stderr, "SDL_ConvertSurfaceFormat(%s): %s\n", filename, SDL_GetError());
65                 exit(1);
66         }
67
68         SDL_FreeSurface(surf);
69
70         unsigned width = rgb_surf->w, height = rgb_surf->h;
71         const uint8_t *sptr = (uint8_t *)rgb_surf->pixels;
72         unique_ptr<uint8_t[]> pix(new uint8_t[width * height * 4]);
73
74         // Extract the Y component, and convert to bottom-left origin.
75         for (unsigned y = 0; y < height; ++y) {
76                 unsigned y2 = height - 1 - y;
77                 memcpy(pix.get() + y * width * 4, sptr + y2 * rgb_surf->pitch, width * 4);
78         }
79         SDL_FreeSurface(rgb_surf);
80
81         int num_levels = (mipmaps == WITH_MIPMAPS) ? find_num_levels(width, height) : 1;
82
83         GLuint tex;
84         glCreateTextures(GL_TEXTURE_2D, 1, &tex);
85         glTextureStorage2D(tex, num_levels, GL_RGBA8, width, height);
86         glTextureSubImage2D(tex, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pix.get());
87
88         if (mipmaps == WITH_MIPMAPS) {
89                 glGenerateTextureMipmap(tex);
90         }
91
92         *width_ret = width;
93         *height_ret = height;
94
95         return tex;
96 }
97
98 // OpenGL uses a bottom-left coordinate system, .flo files use a top-left coordinate system.
99 void flip_coordinate_system(float *dense_flow, unsigned width, unsigned height)
100 {
101         for (unsigned i = 0; i < width * height; ++i) {
102                 dense_flow[i * 2 + 1] = -dense_flow[i * 2 + 1];
103         }
104 }
105
106 // Not relevant for RGB.
107 void flip_coordinate_system(uint8_t *dense_flow, unsigned width, unsigned height)
108 {
109 }
110
111 void write_flow(const char *filename, const float *dense_flow, unsigned width, unsigned height)
112 {
113         FILE *flowfp = fopen(filename, "wb");
114         fprintf(flowfp, "FEIH");
115         fwrite(&width, 4, 1, flowfp);
116         fwrite(&height, 4, 1, flowfp);
117         for (unsigned y = 0; y < height; ++y) {
118                 int yy = height - y - 1;
119                 fwrite(&dense_flow[yy * width * 2], width * 2 * sizeof(float), 1, flowfp);
120         }
121         fclose(flowfp);
122 }
123
124 // Not relevant for RGB.
125 void write_flow(const char *filename, const uint8_t *dense_flow, unsigned width, unsigned height)
126 {
127         assert(false);
128 }
129
130 void write_ppm(const char *filename, const float *dense_flow, unsigned width, unsigned height)
131 {
132         FILE *fp = fopen(filename, "wb");
133         fprintf(fp, "P6\n%d %d\n255\n", width, height);
134         for (unsigned y = 0; y < unsigned(height); ++y) {
135                 int yy = height - y - 1;
136                 for (unsigned x = 0; x < unsigned(width); ++x) {
137                         float du = dense_flow[(yy * width + x) * 2 + 0];
138                         float dv = dense_flow[(yy * width + x) * 2 + 1];
139
140                         uint8_t r, g, b;
141                         flow2rgb(du, dv, &r, &g, &b);
142                         putc(r, fp);
143                         putc(g, fp);
144                         putc(b, fp);
145                 }
146         }
147         fclose(fp);
148 }
149
150 void write_ppm(const char *filename, const uint8_t *rgba, unsigned width, unsigned height)
151 {
152         unique_ptr<uint8_t[]> rgb_line(new uint8_t[width * 3 + 1]);
153
154         FILE *fp = fopen(filename, "wb");
155         fprintf(fp, "P6\n%d %d\n255\n", width, height);
156         for (unsigned y = 0; y < height; ++y) {
157                 unsigned y2 = height - 1 - y;
158                 for (size_t x = 0; x < width; ++x) {
159                         memcpy(&rgb_line[x * 3], &rgba[(y2 * width + x) * 4], 4);
160                 }
161                 fwrite(rgb_line.get(), width * 3, 1, fp);
162         }
163         fclose(fp);
164 }
165
166 struct FlowType {
167         using type = float;
168         static constexpr GLenum gl_format = GL_RG;
169         static constexpr GLenum gl_type = GL_FLOAT;
170         static constexpr int num_channels = 2;
171 };
172
173 struct RGBAType {
174         using type = uint8_t;
175         static constexpr GLenum gl_format = GL_RGBA;
176         static constexpr GLenum gl_type = GL_UNSIGNED_BYTE;
177         static constexpr int num_channels = 4;
178 };
179
180 template <class Type>
181 void finish_one_read(GLuint width, GLuint height)
182 {
183         using T = typename Type::type;
184         constexpr int bytes_per_pixel = Type::num_channels * sizeof(T);
185
186         assert(!reads_in_progress.empty());
187         ReadInProgress read = reads_in_progress.front();
188         reads_in_progress.pop_front();
189
190         unique_ptr<T[]> flow(new typename Type::type[width * height * Type::num_channels]);
191         void *buf = glMapNamedBufferRange(read.pbo, 0, width * height * bytes_per_pixel, GL_MAP_READ_BIT);  // Blocks if the read isn't done yet.
192         memcpy(flow.get(), buf, width * height * bytes_per_pixel);  // TODO: Unneeded for RGBType, since flip_coordinate_system() does nothing.:
193         glUnmapNamedBuffer(read.pbo);
194         spare_pbos.push(read.pbo);
195
196         flip_coordinate_system(flow.get(), width, height);
197         if (!read.flow_filename.empty()) {
198                 write_flow(read.flow_filename.c_str(), flow.get(), width, height);
199                 fprintf(stderr, "%s %s -> %s\n", read.filename0.c_str(), read.filename1.c_str(), read.flow_filename.c_str());
200         }
201         if (!read.ppm_filename.empty()) {
202                 write_ppm(read.ppm_filename.c_str(), flow.get(), width, height);
203         }
204 }
205
206 template <class Type>
207 void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename0, const char *filename1, const char *flow_filename, const char *ppm_filename)
208 {
209         using T = typename Type::type;
210         constexpr int bytes_per_pixel = Type::num_channels * sizeof(T);
211
212         if (spare_pbos.empty()) {
213                 finish_one_read<Type>(width, height);
214         }
215         assert(!spare_pbos.empty());
216         reads_in_progress.emplace_back(ReadInProgress{ spare_pbos.top(), filename0, filename1, flow_filename, ppm_filename });
217         glBindBuffer(GL_PIXEL_PACK_BUFFER, spare_pbos.top());
218         spare_pbos.pop();
219         glGetTextureImage(tex, 0, Type::gl_format, Type::gl_type, width * height * bytes_per_pixel, nullptr);
220         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
221 }
222
223 void compute_flow_only(int argc, char **argv, int optind)
224 {
225         const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
226         const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
227         const char *flow_filename = argc >= (optind + 3) ? argv[optind + 2] : "flow.flo";
228
229         // Load pictures.
230         unsigned width1, height1, width2, height2;
231         GLuint tex0 = load_texture(filename0, &width1, &height1, WITHOUT_MIPMAPS);
232         GLuint tex1 = load_texture(filename1, &width2, &height2, WITHOUT_MIPMAPS);
233
234         if (width1 != width2 || height1 != height2) {
235                 fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
236                         width1, height1, width2, height2);
237                 exit(1);
238         }
239
240         // Move them into an array texture, since that's how the rest of the code
241         // would like them.
242         GLuint image_tex;
243         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &image_tex);
244         glTextureStorage3D(image_tex, 1, GL_RGBA8, width1, height1, 2);
245         glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
246         glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
247         glDeleteTextures(1, &tex0);
248         glDeleteTextures(1, &tex1);
249
250         // Set up some PBOs to do asynchronous readback.
251         GLuint pbos[5];
252         glCreateBuffers(5, pbos);
253         for (int i = 0; i < 5; ++i) {
254                 glNamedBufferData(pbos[i], width1 * height1 * 2 * 2 * sizeof(float), nullptr, GL_STREAM_READ);
255                 spare_pbos.push(pbos[i]);
256         }
257
258         int levels = find_num_levels(width1, height1);
259
260         GLuint tex_gray;
261         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
262         glTextureStorage3D(tex_gray, levels, GL_R8, width1, height1, 2);
263
264         OperatingPoint op = operating_point3;
265         if (!enable_variational_refinement) {
266                 op.variational_refinement = false;
267         }
268
269         DISComputeFlow compute_flow(width1, height1, op);  // Must be initialized before gray.
270         GrayscaleConversion gray;
271         gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
272         glGenerateTextureMipmap(tex_gray);
273
274         if (enable_warmup) {
275                 in_warmup = true;
276                 for (int i = 0; i < 10; ++i) {
277                         GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
278                         compute_flow.release_texture(final_tex);
279                 }
280                 in_warmup = false;
281         }
282
283         GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
284         //GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
285
286         schedule_read<FlowType>(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
287         compute_flow.release_texture(final_tex);
288
289         // See if there are more flows on the command line (ie., more than three arguments),
290         // and if so, process them.
291         int num_flows = (argc - optind) / 3;
292         for (int i = 1; i < num_flows; ++i) {
293                 const char *filename0 = argv[optind + i * 3 + 0];
294                 const char *filename1 = argv[optind + i * 3 + 1];
295                 const char *flow_filename = argv[optind + i * 3 + 2];
296                 GLuint width, height;
297                 GLuint tex0 = load_texture(filename0, &width, &height, WITHOUT_MIPMAPS);
298                 if (width != width1 || height != height1) {
299                         fprintf(stderr, "%s: Image dimensions don't match (%dx%d versus %dx%d)\n",
300                                 filename0, width, height, width1, height1);
301                         exit(1);
302                 }
303                 glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
304                 glDeleteTextures(1, &tex0);
305
306                 GLuint tex1 = load_texture(filename1, &width, &height, WITHOUT_MIPMAPS);
307                 if (width != width1 || height != height1) {
308                         fprintf(stderr, "%s: Image dimensions don't match (%dx%d versus %dx%d)\n",
309                                 filename1, width, height, width1, height1);
310                         exit(1);
311                 }
312                 glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
313                 glDeleteTextures(1, &tex1);
314
315                 gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
316                 glGenerateTextureMipmap(tex_gray);
317
318                 GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
319
320                 schedule_read<FlowType>(final_tex, width1, height1, filename0, filename1, flow_filename, "");
321                 compute_flow.release_texture(final_tex);
322         }
323         glDeleteTextures(1, &tex_gray);
324
325         while (!reads_in_progress.empty()) {
326                 finish_one_read<FlowType>(width1, height1);
327         }
328 }
329
330 // Interpolate images based on
331 //
332 //   Herbst, Seitz, Baker: “Occlusion Reasoning for Temporal Interpolation
333 //   Using Optical Flow”
334 //
335 // or at least a reasonable subset thereof. Unfinished.
336 void interpolate_image(int argc, char **argv, int optind)
337 {
338         const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
339         const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
340         //const char *out_filename = argc >= (optind + 3) ? argv[optind + 2] : "interpolated.png";
341
342         // Load pictures.
343         unsigned width1, height1, width2, height2;
344         GLuint tex0 = load_texture(filename0, &width1, &height1, WITH_MIPMAPS);
345         GLuint tex1 = load_texture(filename1, &width2, &height2, WITH_MIPMAPS);
346
347         if (width1 != width2 || height1 != height2) {
348                 fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
349                         width1, height1, width2, height2);
350                 exit(1);
351         }
352
353         // Move them into an array texture, since that's how the rest of the code
354         // would like them.
355         int levels = find_num_levels(width1, height1);
356         GLuint image_tex;
357         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &image_tex);
358         glTextureStorage3D(image_tex, levels, GL_RGBA8, width1, height1, 2);
359         glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
360         glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
361         glDeleteTextures(1, &tex0);
362         glDeleteTextures(1, &tex1);
363         glGenerateTextureMipmap(image_tex);
364
365         // Set up some PBOs to do asynchronous readback.
366         GLuint pbos[5];
367         glCreateBuffers(5, pbos);
368         for (int i = 0; i < 5; ++i) {
369                 glNamedBufferData(pbos[i], width1 * height1 * 4 * sizeof(uint8_t), nullptr, GL_STREAM_READ);
370                 spare_pbos.push(pbos[i]);
371         }
372
373         OperatingPoint op = operating_point3;
374         if (!enable_variational_refinement) {
375                 op.variational_refinement = false;
376         }
377         DISComputeFlow compute_flow(width1, height1, op);
378         GrayscaleConversion gray;
379         Interpolate interpolate(op, /*split_ycbcr_output=*/false);
380
381         GLuint tex_gray;
382         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
383         glTextureStorage3D(tex_gray, levels, GL_R8, width1, height1, 2);
384         gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
385         glGenerateTextureMipmap(tex_gray);
386
387         if (enable_warmup) {
388                 in_warmup = true;
389                 for (int i = 0; i < 10; ++i) {
390                         GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
391                         GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, 0.5f).first;
392                         compute_flow.release_texture(bidirectional_flow_tex);
393                         interpolate.release_texture(interpolated_tex);
394                 }
395                 in_warmup = false;
396         }
397
398         GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
399
400         for (int frameno = 1; frameno < 60; ++frameno) {
401                 char ppm_filename[256];
402                 snprintf(ppm_filename, sizeof(ppm_filename), "interp%04d.ppm", frameno);
403
404                 float alpha = frameno / 60.0f;
405                 GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, alpha).first;
406
407                 schedule_read<RGBAType>(interpolated_tex, width1, height1, filename0, filename1, "", ppm_filename);
408                 interpolate.release_texture(interpolated_tex);
409         }
410
411         while (!reads_in_progress.empty()) {
412                 finish_one_read<RGBAType>(width1, height1);
413         }
414 }
415
416 int main(int argc, char **argv)
417 {
418         static const option long_options[] = {
419                 { "smoothness-relative-weight", required_argument, 0, 's' },  // alpha.
420                 { "intensity-relative-weight", required_argument, 0, 'i' },  // delta.
421                 { "gradient-relative-weight", required_argument, 0, 'g' },  // gamma.
422                 { "disable-timing", no_argument, 0, 1000 },
423                 { "detailed-timing", no_argument, 0, 1003 },
424                 { "disable-variational-refinement", no_argument, 0, 1001 },
425                 { "interpolate", no_argument, 0, 1002 },
426                 { "warmup", no_argument, 0, 1004 }
427         };
428
429         enable_timing = true;
430
431         for ( ;; ) {
432                 int option_index = 0;
433                 int c = getopt_long(argc, argv, "s:i:g:", long_options, &option_index);
434
435                 if (c == -1) {
436                         break;
437                 }
438                 switch (c) {
439                 case 's':
440                         vr_alpha = atof(optarg);
441                         break;
442                 case 'i':
443                         vr_delta = atof(optarg);
444                         break;
445                 case 'g':
446                         vr_gamma = atof(optarg);
447                         break;
448                 case 1000:
449                         enable_timing = false;
450                         break;
451                 case 1001:
452                         enable_variational_refinement = false;
453                         break;
454                 case 1002:
455                         enable_interpolation = true;
456                         break;
457                 case 1003:
458                         detailed_timing = true;
459                         break;
460                 case 1004:
461                         enable_warmup = true;
462                         break;
463                 default:
464                         fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
465                         exit(1);
466                 };
467         }
468
469         if (SDL_Init(SDL_INIT_EVERYTHING) == -1) {
470                 fprintf(stderr, "SDL_Init failed: %s\n", SDL_GetError());
471                 exit(1);
472         }
473         SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 8);
474         SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 0);
475         SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 0);
476         SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
477
478         SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
479         SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
480         SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5);
481         // SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
482         window = SDL_CreateWindow("OpenGL window",
483                 SDL_WINDOWPOS_UNDEFINED,
484                 SDL_WINDOWPOS_UNDEFINED,
485                 64, 64,
486                 SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
487         SDL_GLContext context = SDL_GL_CreateContext(window);
488         assert(context != nullptr);
489
490         if (enable_interpolation) {
491                 interpolate_image(argc, argv, optind);
492         } else {
493                 compute_flow_only(argc, argv, optind);
494         }
495 }