]> git.sesse.net Git - nageru/blob - flow_main.cpp
Do deinterleaving on the GPU (subsampling still remains).
[nageru] / flow_main.cpp
1 #define NO_SDL_GLEXT 1
2
3 #include <epoxy/gl.h>
4
5 #include <SDL2/SDL.h>
6 #include <SDL2/SDL_error.h>
7 #include <SDL2/SDL_events.h>
8 #include <SDL2/SDL_image.h>
9 #include <SDL2/SDL_keyboard.h>
10 #include <SDL2/SDL_mouse.h>
11 #include <SDL2/SDL_video.h>
12
13 #include <assert.h>
14 #include <getopt.h>
15 #include <stdio.h>
16 #include <unistd.h>
17
18 #include "flow.h"
19 #include "gpu_timers.h"
20 #include "util.h"
21
22 #include <algorithm>
23 #include <deque>
24 #include <memory>
25 #include <map>
26 #include <stack>
27 #include <vector>
28
29 #define BUFFER_OFFSET(i) ((char *)nullptr + (i))
30
31 using namespace std;
32
33 SDL_Window *window;
34
35 bool enable_warmup = false;
36 bool enable_variational_refinement = true;  // Just for debugging.
37 bool enable_interpolation = false;
38
39 extern float vr_alpha, vr_delta, vr_gamma;
40
41 // Structures for asynchronous readback. We assume everything is the same size (and GL_RG16F).
42 struct ReadInProgress {
43         GLuint pbo;
44         string filename0, filename1;
45         string flow_filename, ppm_filename;  // Either may be empty for no write.
46 };
47 stack<GLuint> spare_pbos;
48 deque<ReadInProgress> reads_in_progress;
49
50 enum MipmapPolicy {
51         WITHOUT_MIPMAPS,
52         WITH_MIPMAPS
53 };
54
55 GLuint load_texture(const char *filename, unsigned *width_ret, unsigned *height_ret, MipmapPolicy mipmaps)
56 {
57         SDL_Surface *surf = IMG_Load(filename);
58         if (surf == nullptr) {
59                 fprintf(stderr, "IMG_Load(%s): %s\n", filename, IMG_GetError());
60                 exit(1);
61         }
62
63         // For whatever reason, SDL doesn't support converting to YUV surfaces
64         // nor grayscale, so we'll do it ourselves.
65         SDL_Surface *rgb_surf = SDL_ConvertSurfaceFormat(surf, SDL_PIXELFORMAT_RGBA32, /*flags=*/0);
66         if (rgb_surf == nullptr) {
67                 fprintf(stderr, "SDL_ConvertSurfaceFormat(%s): %s\n", filename, SDL_GetError());
68                 exit(1);
69         }
70
71         SDL_FreeSurface(surf);
72
73         unsigned width = rgb_surf->w, height = rgb_surf->h;
74         const uint8_t *sptr = (uint8_t *)rgb_surf->pixels;
75         unique_ptr<uint8_t[]> pix(new uint8_t[width * height * 4]);
76
77         // Extract the Y component, and convert to bottom-left origin.
78         for (unsigned y = 0; y < height; ++y) {
79                 unsigned y2 = height - 1 - y;
80                 memcpy(pix.get() + y * width * 4, sptr + y2 * rgb_surf->pitch, width * 4);
81         }
82         SDL_FreeSurface(rgb_surf);
83
84         int num_levels = (mipmaps == WITH_MIPMAPS) ? find_num_levels(width, height) : 1;
85
86         GLuint tex;
87         glCreateTextures(GL_TEXTURE_2D, 1, &tex);
88         glTextureStorage2D(tex, num_levels, GL_RGBA8, width, height);
89         glTextureSubImage2D(tex, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, pix.get());
90
91         if (mipmaps == WITH_MIPMAPS) {
92                 glGenerateTextureMipmap(tex);
93         }
94
95         *width_ret = width;
96         *height_ret = height;
97
98         return tex;
99 }
100
101 // OpenGL uses a bottom-left coordinate system, .flo files use a top-left coordinate system.
102 void flip_coordinate_system(float *dense_flow, unsigned width, unsigned height)
103 {
104         for (unsigned i = 0; i < width * height; ++i) {
105                 dense_flow[i * 2 + 1] = -dense_flow[i * 2 + 1];
106         }
107 }
108
109 // Not relevant for RGB.
110 void flip_coordinate_system(uint8_t *dense_flow, unsigned width, unsigned height)
111 {
112 }
113
114 void write_flow(const char *filename, const float *dense_flow, unsigned width, unsigned height)
115 {
116         FILE *flowfp = fopen(filename, "wb");
117         fprintf(flowfp, "FEIH");
118         fwrite(&width, 4, 1, flowfp);
119         fwrite(&height, 4, 1, flowfp);
120         for (unsigned y = 0; y < height; ++y) {
121                 int yy = height - y - 1;
122                 fwrite(&dense_flow[yy * width * 2], width * 2 * sizeof(float), 1, flowfp);
123         }
124         fclose(flowfp);
125 }
126
127 // Not relevant for RGB.
128 void write_flow(const char *filename, const uint8_t *dense_flow, unsigned width, unsigned height)
129 {
130         assert(false);
131 }
132
133 void write_ppm(const char *filename, const float *dense_flow, unsigned width, unsigned height)
134 {
135         FILE *fp = fopen(filename, "wb");
136         fprintf(fp, "P6\n%d %d\n255\n", width, height);
137         for (unsigned y = 0; y < unsigned(height); ++y) {
138                 int yy = height - y - 1;
139                 for (unsigned x = 0; x < unsigned(width); ++x) {
140                         float du = dense_flow[(yy * width + x) * 2 + 0];
141                         float dv = dense_flow[(yy * width + x) * 2 + 1];
142
143                         uint8_t r, g, b;
144                         flow2rgb(du, dv, &r, &g, &b);
145                         putc(r, fp);
146                         putc(g, fp);
147                         putc(b, fp);
148                 }
149         }
150         fclose(fp);
151 }
152
153 void write_ppm(const char *filename, const uint8_t *rgba, unsigned width, unsigned height)
154 {
155         unique_ptr<uint8_t[]> rgb_line(new uint8_t[width * 3 + 1]);
156
157         FILE *fp = fopen(filename, "wb");
158         fprintf(fp, "P6\n%d %d\n255\n", width, height);
159         for (unsigned y = 0; y < height; ++y) {
160                 unsigned y2 = height - 1 - y;
161                 for (size_t x = 0; x < width; ++x) {
162                         memcpy(&rgb_line[x * 3], &rgba[(y2 * width + x) * 4], 4);
163                 }
164                 fwrite(rgb_line.get(), width * 3, 1, fp);
165         }
166         fclose(fp);
167 }
168
169 struct FlowType {
170         using type = float;
171         static constexpr GLenum gl_format = GL_RG;
172         static constexpr GLenum gl_type = GL_FLOAT;
173         static constexpr int num_channels = 2;
174 };
175
176 struct RGBAType {
177         using type = uint8_t;
178         static constexpr GLenum gl_format = GL_RGBA;
179         static constexpr GLenum gl_type = GL_UNSIGNED_BYTE;
180         static constexpr int num_channels = 4;
181 };
182
183 template <class Type>
184 void finish_one_read(GLuint width, GLuint height)
185 {
186         using T = typename Type::type;
187         constexpr int bytes_per_pixel = Type::num_channels * sizeof(T);
188
189         assert(!reads_in_progress.empty());
190         ReadInProgress read = reads_in_progress.front();
191         reads_in_progress.pop_front();
192
193         unique_ptr<T[]> flow(new typename Type::type[width * height * Type::num_channels]);
194         void *buf = glMapNamedBufferRange(read.pbo, 0, width * height * bytes_per_pixel, GL_MAP_READ_BIT);  // Blocks if the read isn't done yet.
195         memcpy(flow.get(), buf, width * height * bytes_per_pixel);  // TODO: Unneeded for RGBType, since flip_coordinate_system() does nothing.:
196         glUnmapNamedBuffer(read.pbo);
197         spare_pbos.push(read.pbo);
198
199         flip_coordinate_system(flow.get(), width, height);
200         if (!read.flow_filename.empty()) {
201                 write_flow(read.flow_filename.c_str(), flow.get(), width, height);
202                 fprintf(stderr, "%s %s -> %s\n", read.filename0.c_str(), read.filename1.c_str(), read.flow_filename.c_str());
203         }
204         if (!read.ppm_filename.empty()) {
205                 write_ppm(read.ppm_filename.c_str(), flow.get(), width, height);
206         }
207 }
208
209 template <class Type>
210 void schedule_read(GLuint tex, GLuint width, GLuint height, const char *filename0, const char *filename1, const char *flow_filename, const char *ppm_filename)
211 {
212         using T = typename Type::type;
213         constexpr int bytes_per_pixel = Type::num_channels * sizeof(T);
214
215         if (spare_pbos.empty()) {
216                 finish_one_read<Type>(width, height);
217         }
218         assert(!spare_pbos.empty());
219         reads_in_progress.emplace_back(ReadInProgress{ spare_pbos.top(), filename0, filename1, flow_filename, ppm_filename });
220         glBindBuffer(GL_PIXEL_PACK_BUFFER, spare_pbos.top());
221         spare_pbos.pop();
222         glGetTextureImage(tex, 0, Type::gl_format, Type::gl_type, width * height * bytes_per_pixel, nullptr);
223         glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
224 }
225
226 void compute_flow_only(int argc, char **argv, int optind)
227 {
228         const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
229         const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
230         const char *flow_filename = argc >= (optind + 3) ? argv[optind + 2] : "flow.flo";
231
232         // Load pictures.
233         unsigned width1, height1, width2, height2;
234         GLuint tex0 = load_texture(filename0, &width1, &height1, WITHOUT_MIPMAPS);
235         GLuint tex1 = load_texture(filename1, &width2, &height2, WITHOUT_MIPMAPS);
236
237         if (width1 != width2 || height1 != height2) {
238                 fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
239                         width1, height1, width2, height2);
240                 exit(1);
241         }
242
243         // Move them into an array texture, since that's how the rest of the code
244         // would like them.
245         GLuint image_tex;
246         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &image_tex);
247         glTextureStorage3D(image_tex, 1, GL_RGBA8, width1, height1, 2);
248         glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
249         glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
250         glDeleteTextures(1, &tex0);
251         glDeleteTextures(1, &tex1);
252
253         // Set up some PBOs to do asynchronous readback.
254         GLuint pbos[5];
255         glCreateBuffers(5, pbos);
256         for (int i = 0; i < 5; ++i) {
257                 glNamedBufferData(pbos[i], width1 * height1 * 2 * 2 * sizeof(float), nullptr, GL_STREAM_READ);
258                 spare_pbos.push(pbos[i]);
259         }
260
261         int levels = find_num_levels(width1, height1);
262
263         GLuint tex_gray;
264         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
265         glTextureStorage3D(tex_gray, levels, GL_R8, width1, height1, 2);
266
267         GrayscaleConversion gray;
268         gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
269         glGenerateTextureMipmap(tex_gray);
270
271         OperatingPoint op = operating_point3;
272         if (!enable_variational_refinement) {
273                 op.variational_refinement = false;
274         }
275         DISComputeFlow compute_flow(width1, height1, op);
276
277         if (enable_warmup) {
278                 in_warmup = true;
279                 for (int i = 0; i < 10; ++i) {
280                         GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
281                         compute_flow.release_texture(final_tex);
282                 }
283                 in_warmup = false;
284         }
285
286         GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
287         //GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
288
289         schedule_read<FlowType>(final_tex, width1, height1, filename0, filename1, flow_filename, "flow.ppm");
290         compute_flow.release_texture(final_tex);
291
292         // See if there are more flows on the command line (ie., more than three arguments),
293         // and if so, process them.
294         int num_flows = (argc - optind) / 3;
295         for (int i = 1; i < num_flows; ++i) {
296                 const char *filename0 = argv[optind + i * 3 + 0];
297                 const char *filename1 = argv[optind + i * 3 + 1];
298                 const char *flow_filename = argv[optind + i * 3 + 2];
299                 GLuint width, height;
300                 GLuint tex0 = load_texture(filename0, &width, &height, WITHOUT_MIPMAPS);
301                 if (width != width1 || height != height1) {
302                         fprintf(stderr, "%s: Image dimensions don't match (%dx%d versus %dx%d)\n",
303                                 filename0, width, height, width1, height1);
304                         exit(1);
305                 }
306                 glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
307                 glDeleteTextures(1, &tex0);
308
309                 GLuint tex1 = load_texture(filename1, &width, &height, WITHOUT_MIPMAPS);
310                 if (width != width1 || height != height1) {
311                         fprintf(stderr, "%s: Image dimensions don't match (%dx%d versus %dx%d)\n",
312                                 filename1, width, height, width1, height1);
313                         exit(1);
314                 }
315                 glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
316                 glDeleteTextures(1, &tex1);
317
318                 gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
319                 glGenerateTextureMipmap(tex_gray);
320
321                 GLuint final_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD, DISComputeFlow::RESIZE_FLOW_TO_FULL_SIZE);
322
323                 schedule_read<FlowType>(final_tex, width1, height1, filename0, filename1, flow_filename, "");
324                 compute_flow.release_texture(final_tex);
325         }
326         glDeleteTextures(1, &tex_gray);
327
328         while (!reads_in_progress.empty()) {
329                 finish_one_read<FlowType>(width1, height1);
330         }
331 }
332
333 // Interpolate images based on
334 //
335 //   Herbst, Seitz, Baker: “Occlusion Reasoning for Temporal Interpolation
336 //   Using Optical Flow”
337 //
338 // or at least a reasonable subset thereof. Unfinished.
339 void interpolate_image(int argc, char **argv, int optind)
340 {
341         const char *filename0 = argc >= (optind + 1) ? argv[optind] : "test1499.png";
342         const char *filename1 = argc >= (optind + 2) ? argv[optind + 1] : "test1500.png";
343         //const char *out_filename = argc >= (optind + 3) ? argv[optind + 2] : "interpolated.png";
344
345         // Load pictures.
346         unsigned width1, height1, width2, height2;
347         GLuint tex0 = load_texture(filename0, &width1, &height1, WITH_MIPMAPS);
348         GLuint tex1 = load_texture(filename1, &width2, &height2, WITH_MIPMAPS);
349
350         if (width1 != width2 || height1 != height2) {
351                 fprintf(stderr, "Image dimensions don't match (%dx%d versus %dx%d)\n",
352                         width1, height1, width2, height2);
353                 exit(1);
354         }
355
356         // Move them into an array texture, since that's how the rest of the code
357         // would like them.
358         int levels = find_num_levels(width1, height1);
359         GLuint image_tex;
360         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &image_tex);
361         glTextureStorage3D(image_tex, levels, GL_RGBA8, width1, height1, 2);
362         glCopyImageSubData(tex0, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, width1, height1, 1);
363         glCopyImageSubData(tex1, GL_TEXTURE_2D, 0, 0, 0, 0, image_tex, GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, width1, height1, 1);
364         glDeleteTextures(1, &tex0);
365         glDeleteTextures(1, &tex1);
366         glGenerateTextureMipmap(image_tex);
367
368         // Set up some PBOs to do asynchronous readback.
369         GLuint pbos[5];
370         glCreateBuffers(5, pbos);
371         for (int i = 0; i < 5; ++i) {
372                 glNamedBufferData(pbos[i], width1 * height1 * 4 * sizeof(uint8_t), nullptr, GL_STREAM_READ);
373                 spare_pbos.push(pbos[i]);
374         }
375
376         OperatingPoint op = operating_point3;
377         if (!enable_variational_refinement) {
378                 op.variational_refinement = false;
379         }
380         DISComputeFlow compute_flow(width1, height1, op);
381         GrayscaleConversion gray;
382         Interpolate interpolate(width1, height1, op, /*split_ycbcr_output=*/false);
383
384         GLuint tex_gray;
385         glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
386         glTextureStorage3D(tex_gray, levels, GL_R8, width1, height1, 2);
387         gray.exec(image_tex, tex_gray, width1, height1, /*num_layers=*/2);
388         glGenerateTextureMipmap(tex_gray);
389
390         if (enable_warmup) {
391                 in_warmup = true;
392                 for (int i = 0; i < 10; ++i) {
393                         GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
394                         GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, 0.5f).first;
395                         compute_flow.release_texture(bidirectional_flow_tex);
396                         interpolate.release_texture(interpolated_tex);
397                 }
398                 in_warmup = false;
399         }
400
401         GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
402
403         for (int frameno = 1; frameno < 60; ++frameno) {
404                 char ppm_filename[256];
405                 snprintf(ppm_filename, sizeof(ppm_filename), "interp%04d.ppm", frameno);
406
407                 float alpha = frameno / 60.0f;
408                 GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, alpha).first;
409
410                 schedule_read<RGBAType>(interpolated_tex, width1, height1, filename0, filename1, "", ppm_filename);
411                 interpolate.release_texture(interpolated_tex);
412         }
413
414         while (!reads_in_progress.empty()) {
415                 finish_one_read<RGBAType>(width1, height1);
416         }
417 }
418
419 int main(int argc, char **argv)
420 {
421         static const option long_options[] = {
422                 { "smoothness-relative-weight", required_argument, 0, 's' },  // alpha.
423                 { "intensity-relative-weight", required_argument, 0, 'i' },  // delta.
424                 { "gradient-relative-weight", required_argument, 0, 'g' },  // gamma.
425                 { "disable-timing", no_argument, 0, 1000 },
426                 { "detailed-timing", no_argument, 0, 1003 },
427                 { "disable-variational-refinement", no_argument, 0, 1001 },
428                 { "interpolate", no_argument, 0, 1002 },
429                 { "warmup", no_argument, 0, 1004 }
430         };
431
432         enable_timing = true;
433
434         for ( ;; ) {
435                 int option_index = 0;
436                 int c = getopt_long(argc, argv, "s:i:g:", long_options, &option_index);
437
438                 if (c == -1) {
439                         break;
440                 }
441                 switch (c) {
442                 case 's':
443                         vr_alpha = atof(optarg);
444                         break;
445                 case 'i':
446                         vr_delta = atof(optarg);
447                         break;
448                 case 'g':
449                         vr_gamma = atof(optarg);
450                         break;
451                 case 1000:
452                         enable_timing = false;
453                         break;
454                 case 1001:
455                         enable_variational_refinement = false;
456                         break;
457                 case 1002:
458                         enable_interpolation = true;
459                         break;
460                 case 1003:
461                         detailed_timing = true;
462                         break;
463                 case 1004:
464                         enable_warmup = true;
465                         break;
466                 default:
467                         fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
468                         exit(1);
469                 };
470         }
471
472         if (SDL_Init(SDL_INIT_EVERYTHING) == -1) {
473                 fprintf(stderr, "SDL_Init failed: %s\n", SDL_GetError());
474                 exit(1);
475         }
476         SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 8);
477         SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 0);
478         SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 0);
479         SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
480
481         SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
482         SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
483         SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5);
484         // SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
485         window = SDL_CreateWindow("OpenGL window",
486                 SDL_WINDOWPOS_UNDEFINED,
487                 SDL_WINDOWPOS_UNDEFINED,
488                 64, 64,
489                 SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
490         SDL_GLContext context = SDL_GL_CreateContext(window);
491         assert(context != nullptr);
492
493         if (enable_interpolation) {
494                 interpolate_image(argc, argv, optind);
495         } else {
496                 compute_flow_only(argc, argv, optind);
497         }
498 }