X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=narabu.cpp;h=81b0726cfd4aed5ae27f956ffd75f1d035796113;hb=3fb87c6b953be3382cd216c74ff6aa025c8eaa2a;hp=49afa9ad9d502703281820f3ba5880d848745cf1;hpb=704e9573dc4a4480edbf855ecd5ab7838398843d;p=narabu diff --git a/narabu.cpp b/narabu.cpp index 49afa9a..81b0726 100644 --- a/narabu.cpp +++ b/narabu.cpp @@ -19,11 +19,17 @@ using namespace std::chrono; #define WIDTH 1280 #define HEIGHT 720 +#define WIDTH_BLOCKS (WIDTH/8) +#define WIDTH_BLOCKS_CHROMA (WIDTH/16) +#define HEIGHT_BLOCKS (HEIGHT/8) +#define NUM_BLOCKS (WIDTH_BLOCKS * HEIGHT_BLOCKS) +#define NUM_BLOCKS_CHROMA (WIDTH_BLOCKS_CHROMA * HEIGHT_BLOCKS) const unsigned prob_bits = 12; const unsigned prob_scale = 1 << prob_bits; const unsigned NUM_SYMS = 256; const unsigned NUM_TABLES = 8; +const unsigned BLOCKS_PER_STREAM = 320; struct RansDecSymbol { unsigned sym_start; @@ -38,25 +44,17 @@ RansDecodeTable decode_tables[NUM_TABLES]; optional read_varint(const char **ptr, const char *end) { uint32_t x = 0; - int shift = 0; - while (*ptr < end) { - int ch = **ptr; - ++(*ptr); - - x |= (ch & 0x7f) << shift; - if ((ch & 0x80) == 0) return x; - shift += 7; - if (shift >= 32) { - return nullopt; // Error: Overlong int. - } - } - return nullopt; // Error: EOF. + memcpy(&x, *ptr, 4); + *ptr += 4; + return x; } +const unsigned num_blocks = ((NUM_BLOCKS + BLOCKS_PER_STREAM - 1) / BLOCKS_PER_STREAM); + struct CoeffStream { uint src_offset, src_len; }; -CoeffStream streams[45 * 64]; // HACK +CoeffStream streams[num_blocks * 64]; int main(int argc, char **argv) { @@ -105,6 +103,7 @@ int main(int argc, char **argv) string coded = ::read_file(argc >= 2 ? argv[1] : "coded.dat"); const char *ptr = &coded[0]; + //assert((intptr_t)ptr % 4 == 0); const char *end = ptr + coded.size(); GLuint sign_bias[NUM_TABLES]; @@ -179,7 +178,21 @@ int main(int argc, char **argv) check_error(); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); check_error(); - glTexImage2D(GL_TEXTURE_2D, 0, GL_R16I, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_SHORT, nullptr); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32I, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_INT, nullptr); + check_error(); + + GLuint coeff2_tex; + glGenTextures(1, &coeff2_tex); + glBindTexture(GL_TEXTURE_2D, coeff2_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + check_error(); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32I, WIDTH, HEIGHT, 0, GL_RED_INTEGER, GL_INT, nullptr); check_error(); GLuint out_tex; @@ -197,7 +210,9 @@ int main(int argc, char **argv) GLint dsyms_tex_pos = glGetUniformLocation(glsl_program_num, "dsyms_tex"); GLint out_tex_pos = glGetUniformLocation(glsl_program_num, "out_tex"); GLint coeff_tex_pos = glGetUniformLocation(glsl_program_num, "coeff_tex"); + GLint coeff2_tex_pos = glGetUniformLocation(glsl_program_num, "coeff2_tex"); GLint sign_bias_pos = glGetUniformLocation(glsl_program_num, "sign_bias_per_model"); + GLint num_blocks_pos = glGetUniformLocation(glsl_program_num, "num_blocks"); printf("%d err=0x%x pos=%d,%d,%d,%d\n", __LINE__, glGetError(), cum2sym_tex_pos, dsyms_tex_pos, out_tex_pos, sign_bias_pos); // Bind the textures. @@ -205,29 +220,32 @@ int main(int argc, char **argv) glUniform1i(dsyms_tex_pos, 1); glUniform1i(out_tex_pos, 2); glUniform1i(coeff_tex_pos, 3); + glUniform1i(coeff2_tex_pos, 4); glUniform1uiv(sign_bias_pos, 16, sign_bias); + glUniform1i(num_blocks_pos, num_blocks); glBindImageTexture(0, cum2sym_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI); glBindImageTexture(1, dsyms_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG16UI); glBindImageTexture(2, out_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8); - glBindImageTexture(3, coeff_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R16I); + glBindImageTexture(3, coeff_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32I); + glBindImageTexture(4, coeff2_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32I); printf("%d err=0x%x\n", __LINE__, glGetError()); // Decode all luma blocks. - unsigned num_blocks = (HEIGHT / 16); for (unsigned y = 0; y < 8; ++y) { for (unsigned x = 0; x < 8; ++x) { unsigned coeff_num = y * 8 + x; - for (unsigned yb = 0; yb < HEIGHT; yb += 16) { + for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += BLOCKS_PER_STREAM) { optional num_rans_bytes = read_varint(&ptr, end); if (!num_rans_bytes) { - fprintf(stderr, "Error parsing varint for block %d rANS bytes\n", yb); + fprintf(stderr, "Error parsing varint for block %d rANS bytes\n", block_idx); exit(1); } - CoeffStream *stream = &streams[coeff_num * num_blocks + (yb/16)]; + CoeffStream *stream = &streams[coeff_num * num_blocks + block_idx / BLOCKS_PER_STREAM]; stream->src_offset = ptr - coded.data(); stream->src_len = *num_rans_bytes; + //assert(stream->src_offset % 4 == 0); // TODO: check len ptr += *num_rans_bytes; @@ -262,8 +280,9 @@ int main(int argc, char **argv) #define PARALLEL_SLICES 1 steady_clock::time_point start = steady_clock::now(); - for (int i = 0; i < 1000; ++i) { - unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/320; + unsigned num_iterations = 1000; + for (unsigned i = 0; i < num_iterations; ++i) { + unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/BLOCKS_PER_STREAM; glDispatchCompute(1, (num_slices+PARALLEL_SLICES-1)/PARALLEL_SLICES, 1); } check_error(); @@ -343,26 +362,26 @@ int main(int argc, char **argv) } fclose(fp); - int16_t *coeff_data = new int16_t[WIDTH * HEIGHT]; +#if 0 + uint32_t *coeff_data = new uint32_t[WIDTH * HEIGHT]; glBindTexture(GL_TEXTURE_2D, coeff_tex); check_error(); - glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_SHORT, coeff_data); + glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_INT, coeff_data); check_error(); - for (int k = 0; k < 4; ++k) { - for (int y = 0; y < 8; ++y) { - for (int x = 0; x < 8; ++x) { - printf("%3d ", coeff_data[y * WIDTH + x + k*8]); - } - printf("\n"); - } - printf("\n"); + uint32_t *coeff2_data = new uint32_t[WIDTH * HEIGHT]; + glBindTexture(GL_TEXTURE_2D, coeff2_tex); + check_error(); + glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_INT, coeff2_data); + check_error(); + for (int x = 0; x < 320; ++x) { + printf("%08x.%08x ", coeff2_data[x], coeff_data[x]); } printf("\n"); - +#endif check_error(); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); // unbind printf("foo = 0x%x\n", glGetError()); - printf("Each iteration took %.3f ms.\n", 1e3 * duration(now - start).count() / 1000); + printf("Each iteration took %.3f ms.\n", 1e3 * duration(now - start).count() / num_iterations); }