const unsigned prob_bits = 12;
const unsigned prob_scale = 1 << prob_bits;
const unsigned NUM_SYMS = 256;
-const unsigned NUM_TABLES = 16;
+const unsigned NUM_TABLES = 8;
struct RansDecSymbol {
unsigned sym_start;
exit(1);
}
- decode_tables[table].dsyms[(sym + 1) & 255].sym_start = cum_freq;
- decode_tables[table].dsyms[(sym + 1) & 255].sym_freq = *freq;
+ decode_tables[table].dsyms[(sym + 1) & (NUM_SYMS - 1)].sym_start = cum_freq;
+ decode_tables[table].dsyms[(sym + 1) & (NUM_SYMS - 1)].sym_freq = *freq;
for (uint32_t i = 0; i < freq; ++i) {
if (cum_freq < prob_scale)
- decode_tables[table].cum2sym[cum_freq] = (sym + 1) & 255;
+ decode_tables[table].cum2sym[cum_freq] = (sym + 1) & (NUM_SYMS - 1);
++cum_freq;
}
}
check_error();
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
check_error();
- glTexImage2D(GL_TEXTURE_2D, 0, GL_R16I, 1280, 720, 0, GL_RED_INTEGER, GL_SHORT, nullptr);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_R16I, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_SHORT, nullptr);
check_error();
GLuint out_tex;
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, 1280, 720, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
- //glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, 1280, 720, 0, GL_RED, GL_FLOAT, nullptr);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, WIDTH, HEIGHT, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
+ //glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, WIDTH, HEIGHT, 0, GL_RED, GL_FLOAT, nullptr);
check_error();
GLint cum2sym_tex_pos = glGetUniformLocation(glsl_program_num, "cum2sym_tex");
GLint out_tex_pos = glGetUniformLocation(glsl_program_num, "out_tex");
GLint coeff_tex_pos = glGetUniformLocation(glsl_program_num, "coeff_tex");
GLint sign_bias_pos = glGetUniformLocation(glsl_program_num, "sign_bias_per_model");
+ GLint num_blocks_pos = glGetUniformLocation(glsl_program_num, "num_blocks");
printf("%d err=0x%x pos=%d,%d,%d,%d\n", __LINE__, glGetError(), cum2sym_tex_pos, dsyms_tex_pos, out_tex_pos, sign_bias_pos);
+ unsigned num_blocks = (HEIGHT / 16);
+
// Bind the textures.
glUniform1i(cum2sym_tex_pos, 0);
glUniform1i(dsyms_tex_pos, 1);
glUniform1i(out_tex_pos, 2);
glUniform1i(coeff_tex_pos, 3);
glUniform1uiv(sign_bias_pos, 16, sign_bias);
+ glUniform1i(num_blocks_pos, num_blocks);
glBindImageTexture(0, cum2sym_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI);
glBindImageTexture(1, dsyms_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG16UI);
glBindImageTexture(2, out_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8);
printf("%d err=0x%x\n", __LINE__, glGetError());
// Decode all luma blocks.
- unsigned num_blocks = (HEIGHT / 16);
for (unsigned y = 0; y < 8; ++y) {
for (unsigned x = 0; x < 8; ++x) {
unsigned coeff_num = y * 8 + x;
#define PARALLEL_SLICES 1
steady_clock::time_point start = steady_clock::now();
- for (int i = 0; i < 1000; ++i)
- glDispatchCompute(1, (45+PARALLEL_SLICES-1)/PARALLEL_SLICES, 1);
+ for (int i = 0; i < 1000; ++i) {
+ unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/320;
+ glDispatchCompute(1, (num_slices+PARALLEL_SLICES-1)/PARALLEL_SLICES, 1);
+ }
check_error();
glFinish();
steady_clock::time_point now = steady_clock::now();
}
printf("\n");
- unsigned char *data = new unsigned char[1280 * 720];
+ unsigned char *data = new unsigned char[WIDTH * HEIGHT];
glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, data);
check_error();
printf("%d err=0x%x bufsize=%zu\n", __LINE__, glGetError(), coded.size());
for (int k = 0; k < 4; ++k) {
for (int y = 0; y < 8; ++y) {
for (int x = 0; x < 8; ++x) {
- printf("%3d ", data[y * 1280 + x + k*8]);
+ printf("%3d ", data[y * WIDTH + x + k*8]);
}
printf("\n");
}
for (int k = 0; k < 4; ++k) {
for (int y = 0; y < 8; ++y) {
for (int x = 0; x < 8; ++x) {
- //printf("%5.2f ", data[(y+8) * 1280 + x + (1272-k*8)]);
- printf("%3d ", data[y * 1280 + x + k*8]);
+ //printf("%5.2f ", data[(y+8) * WIDTH + x + (1272-k*8)]);
+ printf("%3d ", data[y * WIDTH + x + k*8]);
}
printf("\n");
}
#endif
FILE *fp = fopen("narabu.pgm", "wb");
- fprintf(fp, "P5\n1280 720\n255\n");
- for (int y = 0; y < 720; ++y) {
- for (int x = 0; x < 1280; ++x) {
- int k = lrintf(data[y * 1280 + x]);
+ fprintf(fp, "P5\n%d %d\n255\n", WIDTH, HEIGHT);
+ for (int y = 0; y < HEIGHT; ++y) {
+ for (int x = 0; x < WIDTH; ++x) {
+ int k = lrintf(data[y * WIDTH + x]);
if (k < 0) k = 0;
if (k > 255) k = 255;
putc(k, fp);
}
fclose(fp);
- int16_t *coeff_data = new int16_t[1280 * 720];
+ int16_t *coeff_data = new int16_t[WIDTH * HEIGHT];
glBindTexture(GL_TEXTURE_2D, coeff_tex);
check_error();
glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_SHORT, coeff_data);
for (int k = 0; k < 4; ++k) {
for (int y = 0; y < 8; ++y) {
for (int x = 0; x < 8; ++x) {
- printf("%3d ", coeff_data[y * 1280 + x + k*8]);
+ printf("%3d ", coeff_data[y * WIDTH + x + k*8]);
}
printf("\n");
}