layout(rg16ui) uniform restrict readonly uimage2D dsyms_tex;
layout(r8) uniform restrict writeonly image2D out_tex;
layout(r16i) uniform restrict writeonly iimage2D coeff_tex;
+uniform int num_blocks;
const uint prob_bits = 12;
const uint prob_scale = 1 << prob_bits;
const uint local_y = (gl_LocalInvocationID.x / 8) % 8;
const uint local_z = gl_LocalInvocationID.x / 64;
- const uint num_blocks = 720 / 16; // FIXME: make a uniform
const uint slice_num = local_z;
const uint thread_num = local_y * 8 + local_x;
GLint out_tex_pos = glGetUniformLocation(glsl_program_num, "out_tex");
GLint coeff_tex_pos = glGetUniformLocation(glsl_program_num, "coeff_tex");
GLint sign_bias_pos = glGetUniformLocation(glsl_program_num, "sign_bias_per_model");
+ GLint num_blocks_pos = glGetUniformLocation(glsl_program_num, "num_blocks");
printf("%d err=0x%x pos=%d,%d,%d,%d\n", __LINE__, glGetError(), cum2sym_tex_pos, dsyms_tex_pos, out_tex_pos, sign_bias_pos);
+ unsigned num_blocks = (HEIGHT / 16);
+
// Bind the textures.
glUniform1i(cum2sym_tex_pos, 0);
glUniform1i(dsyms_tex_pos, 1);
glUniform1i(out_tex_pos, 2);
glUniform1i(coeff_tex_pos, 3);
glUniform1uiv(sign_bias_pos, 16, sign_bias);
+ glUniform1i(num_blocks_pos, num_blocks);
glBindImageTexture(0, cum2sym_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI);
glBindImageTexture(1, dsyms_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG16UI);
glBindImageTexture(2, out_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8);
printf("%d err=0x%x\n", __LINE__, glGetError());
// Decode all luma blocks.
- unsigned num_blocks = (HEIGHT / 16);
for (unsigned y = 0; y < 8; ++y) {
for (unsigned x = 0; x < 8; ++x) {
unsigned coeff_num = y * 8 + x;