#define WIDTH 1280
#define HEIGHT 720
+#define WIDTH_BLOCKS (WIDTH/8)
+#define WIDTH_BLOCKS_CHROMA (WIDTH/16)
+#define HEIGHT_BLOCKS (HEIGHT/8)
+#define NUM_BLOCKS (WIDTH_BLOCKS * HEIGHT_BLOCKS)
+#define NUM_BLOCKS_CHROMA (WIDTH_BLOCKS_CHROMA * HEIGHT_BLOCKS)
const unsigned prob_bits = 12;
const unsigned prob_scale = 1 << prob_bits;
const unsigned NUM_SYMS = 256;
const unsigned NUM_TABLES = 8;
+const unsigned BLOCKS_PER_STREAM = 320;
struct RansDecSymbol {
unsigned sym_start;
optional<uint32_t> read_varint(const char **ptr, const char *end)
{
uint32_t x = 0;
- int shift = 0;
- while (*ptr < end) {
- int ch = **ptr;
- ++(*ptr);
-
- x |= (ch & 0x7f) << shift;
- if ((ch & 0x80) == 0) return x;
- shift += 7;
- if (shift >= 32) {
- return nullopt; // Error: Overlong int.
- }
- }
- return nullopt; // Error: EOF.
+ memcpy(&x, *ptr, 4);
+ *ptr += 4;
+ return x;
}
+const unsigned num_blocks = ((NUM_BLOCKS + BLOCKS_PER_STREAM - 1) / BLOCKS_PER_STREAM);
+
struct CoeffStream {
uint src_offset, src_len;
};
-CoeffStream streams[45 * 64]; // HACK
+CoeffStream streams[num_blocks * 64];
int main(int argc, char **argv)
{
string coded = ::read_file(argc >= 2 ? argv[1] : "coded.dat");
const char *ptr = &coded[0];
+ //assert((intptr_t)ptr % 4 == 0);
const char *end = ptr + coded.size();
GLuint sign_bias[NUM_TABLES];
check_error();
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
check_error();
- glTexImage2D(GL_TEXTURE_2D, 0, GL_R16I, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_SHORT, nullptr);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_R32I, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_INT, nullptr);
+ check_error();
+
+ GLuint coeff2_tex;
+ glGenTextures(1, &coeff2_tex);
+ glBindTexture(GL_TEXTURE_2D, coeff2_tex);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
+ check_error();
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_R32I, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_INT, nullptr);
check_error();
GLuint out_tex;
GLint dsyms_tex_pos = glGetUniformLocation(glsl_program_num, "dsyms_tex");
GLint out_tex_pos = glGetUniformLocation(glsl_program_num, "out_tex");
GLint coeff_tex_pos = glGetUniformLocation(glsl_program_num, "coeff_tex");
+ GLint coeff2_tex_pos = glGetUniformLocation(glsl_program_num, "coeff2_tex");
GLint sign_bias_pos = glGetUniformLocation(glsl_program_num, "sign_bias_per_model");
GLint num_blocks_pos = glGetUniformLocation(glsl_program_num, "num_blocks");
printf("%d err=0x%x pos=%d,%d,%d,%d\n", __LINE__, glGetError(), cum2sym_tex_pos, dsyms_tex_pos, out_tex_pos, sign_bias_pos);
- unsigned num_blocks = (HEIGHT / 16);
-
// Bind the textures.
glUniform1i(cum2sym_tex_pos, 0);
glUniform1i(dsyms_tex_pos, 1);
glUniform1i(out_tex_pos, 2);
glUniform1i(coeff_tex_pos, 3);
+ glUniform1i(coeff2_tex_pos, 4);
glUniform1uiv(sign_bias_pos, 16, sign_bias);
glUniform1i(num_blocks_pos, num_blocks);
glBindImageTexture(0, cum2sym_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI);
glBindImageTexture(1, dsyms_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG16UI);
glBindImageTexture(2, out_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8);
- glBindImageTexture(3, coeff_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R16I);
+ glBindImageTexture(3, coeff_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32I);
+ glBindImageTexture(4, coeff2_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32I);
printf("%d err=0x%x\n", __LINE__, glGetError());
// Decode all luma blocks.
for (unsigned x = 0; x < 8; ++x) {
unsigned coeff_num = y * 8 + x;
- for (unsigned yb = 0; yb < HEIGHT; yb += 16) {
+ for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += BLOCKS_PER_STREAM) {
optional<uint32_t> num_rans_bytes = read_varint(&ptr, end);
if (!num_rans_bytes) {
- fprintf(stderr, "Error parsing varint for block %d rANS bytes\n", yb);
+ fprintf(stderr, "Error parsing varint for block %d rANS bytes\n", block_idx);
exit(1);
}
- CoeffStream *stream = &streams[coeff_num * num_blocks + (yb/16)];
+ CoeffStream *stream = &streams[coeff_num * num_blocks + block_idx / BLOCKS_PER_STREAM];
stream->src_offset = ptr - coded.data();
stream->src_len = *num_rans_bytes;
+ //assert(stream->src_offset % 4 == 0);
// TODO: check len
ptr += *num_rans_bytes;
#define PARALLEL_SLICES 1
steady_clock::time_point start = steady_clock::now();
- for (int i = 0; i < 1000; ++i) {
- unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/320;
+ unsigned num_iterations = 1000;
+ for (unsigned i = 0; i < num_iterations; ++i) {
+ unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/BLOCKS_PER_STREAM;
glDispatchCompute(1, (num_slices+PARALLEL_SLICES-1)/PARALLEL_SLICES, 1);
}
check_error();
}
fclose(fp);
- int16_t *coeff_data = new int16_t[WIDTH * HEIGHT];
+#if 0
+ uint32_t *coeff_data = new uint32_t[WIDTH * HEIGHT];
glBindTexture(GL_TEXTURE_2D, coeff_tex);
check_error();
- glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_SHORT, coeff_data);
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_INT, coeff_data);
check_error();
- for (int k = 0; k < 4; ++k) {
- for (int y = 0; y < 8; ++y) {
- for (int x = 0; x < 8; ++x) {
- printf("%3d ", coeff_data[y * WIDTH + x + k*8]);
- }
- printf("\n");
- }
- printf("\n");
+ uint32_t *coeff2_data = new uint32_t[WIDTH * HEIGHT];
+ glBindTexture(GL_TEXTURE_2D, coeff2_tex);
+ check_error();
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_INT, coeff2_data);
+ check_error();
+ for (int x = 0; x < 320; ++x) {
+ printf("%08x.%08x ", coeff2_data[x], coeff_data[x]);
}
printf("\n");
-
+#endif
check_error();
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); // unbind
printf("foo = 0x%x\n", glGetError());
- printf("Each iteration took %.3f ms.\n", 1e3 * duration<double>(now - start).count() / 1000);
+ printf("Each iteration took %.3f ms.\n", 1e3 * duration<double>(now - start).count() / num_iterations);
}