]> git.sesse.net Git - narabu/blobdiff - narabu.cpp
Make blocks per stream a named constant.
[narabu] / narabu.cpp
index 49afa9ad9d502703281820f3ba5880d848745cf1..898f7efeec4b343a7b011bcea85b8376a223873a 100644 (file)
@@ -24,6 +24,7 @@ const unsigned prob_bits = 12;
 const unsigned prob_scale = 1 << prob_bits;
 const unsigned NUM_SYMS = 256;
 const unsigned NUM_TABLES = 8;
+const unsigned BLOCKS_PER_STREAM = 320;
 
 struct RansDecSymbol {
         unsigned sym_start;
@@ -198,14 +199,18 @@ int main(int argc, char **argv)
        GLint out_tex_pos = glGetUniformLocation(glsl_program_num, "out_tex");
        GLint coeff_tex_pos = glGetUniformLocation(glsl_program_num, "coeff_tex");
        GLint sign_bias_pos = glGetUniformLocation(glsl_program_num, "sign_bias_per_model");
+       GLint num_blocks_pos = glGetUniformLocation(glsl_program_num, "num_blocks");
        printf("%d err=0x%x pos=%d,%d,%d,%d\n", __LINE__, glGetError(), cum2sym_tex_pos, dsyms_tex_pos, out_tex_pos, sign_bias_pos);
 
+       unsigned num_blocks = (HEIGHT / 16);
+
        // Bind the textures.
        glUniform1i(cum2sym_tex_pos, 0);
        glUniform1i(dsyms_tex_pos, 1);
        glUniform1i(out_tex_pos, 2);
        glUniform1i(coeff_tex_pos, 3);
        glUniform1uiv(sign_bias_pos, 16, sign_bias);
+       glUniform1i(num_blocks_pos, num_blocks);
         glBindImageTexture(0, cum2sym_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI);
         glBindImageTexture(1, dsyms_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG16UI);
         glBindImageTexture(2, out_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8);
@@ -213,7 +218,6 @@ int main(int argc, char **argv)
        printf("%d err=0x%x\n", __LINE__, glGetError());
 
        // Decode all luma blocks.
-       unsigned num_blocks = (HEIGHT / 16);
        for (unsigned y = 0; y < 8; ++y) {
                 for (unsigned x = 0; x < 8; ++x) {
                        unsigned coeff_num = y * 8 + x;
@@ -263,7 +267,7 @@ int main(int argc, char **argv)
 #define PARALLEL_SLICES 1
        steady_clock::time_point start = steady_clock::now();
        for (int i = 0; i < 1000; ++i) {
-               unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/320;
+               unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/BLOCKS_PER_STREAM;
                glDispatchCompute(1, (num_slices+PARALLEL_SLICES-1)/PARALLEL_SLICES, 1);
        }
        check_error();