X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=narabu-encoder.cpp;fp=narabu-encoder.cpp;h=a4e77169b88d2df283f9b08c16fbc12c24f43431;hb=fd6116de8d7253bed230222bf277a7c8aaa3b8ff;hp=730135978c34c3bb604bcad6d3b893df0b2e7d16;hpb=57fdfa782c418299639dcc670a194716c0657cab;p=narabu diff --git a/narabu-encoder.cpp b/narabu-encoder.cpp index 7301359..a4e7716 100644 --- a/narabu-encoder.cpp +++ b/narabu-encoder.cpp @@ -332,13 +332,28 @@ int main(int argc, char **argv) exit(1); } + // Compile the tally shader. + shader_src = ::read_file("tally.shader"); + shader_num = compile_shader(shader_src, GL_COMPUTE_SHADER); + GLuint glsl_tally_program_num = glCreateProgram(); + glAttachShader(glsl_tally_program_num, shader_num); + glLinkProgram(glsl_tally_program_num); + + glGetProgramiv(glsl_tally_program_num, GL_LINK_STATUS, &success); + if (success == GL_FALSE) { + GLchar error_log[1024] = {0}; + glGetProgramInfoLog(glsl_tally_program_num, 1024, nullptr, error_log); + fprintf(stderr, "Error linking program: %s\n", error_log); + exit(1); + } + glUseProgram(glsl_program_num); // An SSBO for the rANS distributions. GLuint ssbo; glGenBuffers(1, &ssbo); glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo); - glBufferData(GL_SHADER_STORAGE_BUFFER, 65536 * 4 * sizeof(uint32_t), nullptr, GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, 256 * 4 * sizeof(uint32_t), nullptr, GL_DYNAMIC_COPY); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, ssbo); // Upload luma. @@ -398,10 +413,19 @@ int main(int argc, char **argv) glBindImageTexture(5, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI); check_error(); + glUseProgram(glsl_tally_program_num); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, ssbo); + steady_clock::time_point start = steady_clock::now(); - unsigned num_iterations = 100; + unsigned num_iterations = 1000; for (unsigned i = 0; i < num_iterations; ++i) { + glClearNamedBufferSubData(ssbo, GL_R8, 0, 256 * 4 * sizeof(uint32_t), GL_RED, GL_UNSIGNED_BYTE, nullptr); + glUseProgram(glsl_program_num); glDispatchCompute(WIDTH_BLOCKS / 16, HEIGHT_BLOCKS, 1); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + glUseProgram(glsl_tally_program_num); + glDispatchCompute(4, 1, 1); } check_error(); glFinish(); @@ -450,7 +474,7 @@ int main(int argc, char **argv) } } -#if 1 +#if 0 for (unsigned y = 0; y < HEIGHT; ++y) { for (unsigned xb = 0; xb < WIDTH/8; ++xb) { printf("%4d %4d %4d %4d %4d %4d %4d %4d | ", @@ -586,7 +610,7 @@ int main(int argc, char **argv) glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo); const uint32_t *dist = (const uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY); for (int i = 0; i < 1024; ++i) { - printf("%d,%d: %u\n", i / 256, i % 256, dist[i] / num_iterations); + printf("%d,%d: %u\n", i / 256, i % 256, dist[i]); } #endif }