glUseProgram(glsl_program_num);
+ // An SSBO for the rANS distributions.
+ GLuint ssbo;
+ glGenBuffers(1, &ssbo);
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, 65536 * 4 * sizeof(uint32_t), nullptr, GL_DYNAMIC_COPY);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, ssbo);
+
// Upload luma.
GLuint y_tex;
glGenTextures(1, &y_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_R8I, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, pix_y);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_R8UI, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, pix_y);
check_error();
// Make destination textures.
glBindImageTexture(2, ac2_ac5_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R16UI);
glBindImageTexture(3, ac3_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8I);
glBindImageTexture(4, ac4_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8I);
- glBindImageTexture(5, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8I);
+ glBindImageTexture(5, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI);
check_error();
steady_clock::time_point start = steady_clock::now();
- unsigned num_iterations = 1000;
+ unsigned num_iterations = 100;
for (unsigned i = 0; i < num_iterations; ++i) {
- glDispatchCompute(WIDTH_BLOCKS, HEIGHT_BLOCKS, 1);
+ glDispatchCompute(WIDTH_BLOCKS / 16, HEIGHT_BLOCKS, 1);
}
check_error();
glFinish();
coeff_y[y * WIDTH + xb*8 + 5],
coeff_y[y * WIDTH + xb*8 + 6],
coeff_y[y * WIDTH + xb*8 + 7]);
+ printf("%4d %4d %4d %4d %4d %4d %4d %4d || ",
+ pix_y[y * WIDTH + xb*8 + 0],
+ pix_y[y * WIDTH + xb*8 + 1],
+ pix_y[y * WIDTH + xb*8 + 2],
+ pix_y[y * WIDTH + xb*8 + 3],
+ pix_y[y * WIDTH + xb*8 + 4],
+ pix_y[y * WIDTH + xb*8 + 5],
+ pix_y[y * WIDTH + xb*8 + 6],
+ pix_y[y * WIDTH + xb*8 + 7]);
}
printf("\n");
}
printf("\n");
printf("Each iteration took %.3f ms (but note that is DCT only, no rANS).\n", 1e3 * duration<double>(now - start).count() / num_iterations);
+#if 1
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
+ const uint32_t *dist = (const uint32_t *)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
+ for (int i = 0; i < 1024; ++i) {
+ printf("%d,%d: %u\n", i / 256, i % 256, dist[i] / num_iterations);
+ }
+#endif
}