]> git.sesse.net Git - narabu/blobdiff - narabu-encoder.cpp
Silence some Mesa warnings.
[narabu] / narabu-encoder.cpp
index 2ddd8992fd2fe0da3b56bd2b0ec90598ceb93461..a4e77169b88d2df283f9b08c16fbc12c24f43431 100644 (file)
@@ -332,8 +332,30 @@ int main(int argc, char **argv)
                exit(1);
        }
 
+       // Compile the tally shader.
+       shader_src = ::read_file("tally.shader");
+       shader_num = compile_shader(shader_src, GL_COMPUTE_SHADER);
+       GLuint glsl_tally_program_num = glCreateProgram();
+       glAttachShader(glsl_tally_program_num, shader_num);
+       glLinkProgram(glsl_tally_program_num);
+
+       glGetProgramiv(glsl_tally_program_num, GL_LINK_STATUS, &success);
+       if (success == GL_FALSE) {
+               GLchar error_log[1024] = {0};
+               glGetProgramInfoLog(glsl_tally_program_num, 1024, nullptr, error_log);
+               fprintf(stderr, "Error linking program: %s\n", error_log);
+               exit(1);
+       }
+
        glUseProgram(glsl_program_num);
 
+       // An SSBO for the rANS distributions.
+       GLuint ssbo;
+       glGenBuffers(1, &ssbo);
+       glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
+       glBufferData(GL_SHADER_STORAGE_BUFFER, 256 * 4 * sizeof(uint32_t), nullptr, GL_DYNAMIC_COPY);
+       glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, ssbo);
+
        // Upload luma.
        GLuint y_tex;
        glGenTextures(1, &y_tex);
@@ -391,10 +413,19 @@ int main(int argc, char **argv)
        glBindImageTexture(5, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI);
        check_error();
 
+       glUseProgram(glsl_tally_program_num);
+       glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, ssbo);
+
        steady_clock::time_point start = steady_clock::now();
        unsigned num_iterations = 1000;
        for (unsigned i = 0; i < num_iterations; ++i) {
-               glDispatchCompute(WIDTH_BLOCKS, HEIGHT_BLOCKS, 1);
+               glClearNamedBufferSubData(ssbo, GL_R8, 0, 256 * 4 * sizeof(uint32_t), GL_RED, GL_UNSIGNED_BYTE, nullptr);
+               glUseProgram(glsl_program_num);
+               glDispatchCompute(WIDTH_BLOCKS / 16, HEIGHT_BLOCKS, 1);
+               glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
+
+               glUseProgram(glsl_tally_program_num);
+               glDispatchCompute(4, 1, 1);
        }
        check_error();
        glFinish();
@@ -443,7 +474,7 @@ int main(int argc, char **argv)
                }
        }
 
-#if 1
+#if 0
        for (unsigned y = 0; y < HEIGHT; ++y) {
                for (unsigned xb = 0; xb < WIDTH/8; ++xb) {
                        printf("%4d %4d %4d %4d %4d %4d %4d %4d | ",
@@ -575,4 +606,11 @@ int main(int argc, char **argv)
        printf("\n");
        printf("Each iteration took %.3f ms (but note that is DCT only, no rANS).\n", 1e3 * duration<double>(now - start).count() / num_iterations);
 
+#if 1
+       glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
+       const uint32_t *dist = (const uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
+       for (int i = 0; i < 1024; ++i) {
+               printf("%d,%d: %u\n", i / 256, i % 256, dist[i]);
+       }
+#endif
 }