exit(1);
}
+ // Compile the tally shader.
+ shader_src = ::read_file("tally.shader");
+ shader_num = compile_shader(shader_src, GL_COMPUTE_SHADER);
+ GLuint glsl_tally_program_num = glCreateProgram();
+ glAttachShader(glsl_tally_program_num, shader_num);
+ glLinkProgram(glsl_tally_program_num);
+
+ glGetProgramiv(glsl_tally_program_num, GL_LINK_STATUS, &success);
+ if (success == GL_FALSE) {
+ GLchar error_log[1024] = {0};
+ glGetProgramInfoLog(glsl_tally_program_num, 1024, nullptr, error_log);
+ fprintf(stderr, "Error linking program: %s\n", error_log);
+ exit(1);
+ }
+
glUseProgram(glsl_program_num);
// An SSBO for the rANS distributions.
GLuint ssbo;
glGenBuffers(1, &ssbo);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
- glBufferData(GL_SHADER_STORAGE_BUFFER, 65536 * 4 * sizeof(uint32_t), nullptr, GL_DYNAMIC_COPY);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, 256 * 4 * sizeof(uint32_t), nullptr, GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, ssbo);
// Upload luma.
glBindImageTexture(5, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI);
check_error();
+ glUseProgram(glsl_tally_program_num);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, ssbo);
+
steady_clock::time_point start = steady_clock::now();
- unsigned num_iterations = 100;
+ unsigned num_iterations = 1000;
for (unsigned i = 0; i < num_iterations; ++i) {
- glDispatchCompute(WIDTH_BLOCKS, HEIGHT_BLOCKS, 1);
+ glClearNamedBufferSubData(ssbo, GL_R8, 0, 256 * 4 * sizeof(uint32_t), GL_RED, GL_UNSIGNED_BYTE, nullptr);
+ glUseProgram(glsl_program_num);
+ glDispatchCompute(WIDTH_BLOCKS / 16, HEIGHT_BLOCKS, 1);
+ glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
+
+ glUseProgram(glsl_tally_program_num);
+ glDispatchCompute(4, 1, 1);
}
check_error();
glFinish();
}
}
-#if 1
+#if 0
for (unsigned y = 0; y < HEIGHT; ++y) {
for (unsigned xb = 0; xb < WIDTH/8; ++xb) {
printf("%4d %4d %4d %4d %4d %4d %4d %4d | ",
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
const uint32_t *dist = (const uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
for (int i = 0; i < 1024; ++i) {
- printf("%d,%d: %u\n", i / 256, i % 256, dist[i] / num_iterations);
+ printf("%d,%d: %u\n", i / 256, i % 256, dist[i]);
}
#endif
}