From: Steinar H. Gunderson Date: Sun, 24 Sep 2017 17:45:40 +0000 (+0200) Subject: Make the GPU decoder (finally) work with any resolution. X-Git-Url: https://git.sesse.net/?p=narabu;a=commitdiff_plain;h=de545fc4a6740687509e9ffd871e808a48281fb8 Make the GPU decoder (finally) work with any resolution. --- diff --git a/narabu.cpp b/narabu.cpp index 898f7ef..a0e5871 100644 --- a/narabu.cpp +++ b/narabu.cpp @@ -19,6 +19,11 @@ using namespace std::chrono; #define WIDTH 1280 #define HEIGHT 720 +#define WIDTH_BLOCKS (WIDTH/8) +#define WIDTH_BLOCKS_CHROMA (WIDTH/16) +#define HEIGHT_BLOCKS (HEIGHT/8) +#define NUM_BLOCKS (WIDTH_BLOCKS * HEIGHT_BLOCKS) +#define NUM_BLOCKS_CHROMA (WIDTH_BLOCKS_CHROMA * HEIGHT_BLOCKS) const unsigned prob_bits = 12; const unsigned prob_scale = 1 << prob_bits; @@ -54,10 +59,12 @@ optional read_varint(const char **ptr, const char *end) return nullopt; // Error: EOF. } +const unsigned num_blocks = ((NUM_BLOCKS + BLOCKS_PER_STREAM - 1) / BLOCKS_PER_STREAM); + struct CoeffStream { uint src_offset, src_len; }; -CoeffStream streams[45 * 64]; // HACK +CoeffStream streams[num_blocks * 64]; int main(int argc, char **argv) { @@ -202,8 +209,6 @@ int main(int argc, char **argv) GLint num_blocks_pos = glGetUniformLocation(glsl_program_num, "num_blocks"); printf("%d err=0x%x pos=%d,%d,%d,%d\n", __LINE__, glGetError(), cum2sym_tex_pos, dsyms_tex_pos, out_tex_pos, sign_bias_pos); - unsigned num_blocks = (HEIGHT / 16); - // Bind the textures. glUniform1i(cum2sym_tex_pos, 0); glUniform1i(dsyms_tex_pos, 1); @@ -222,14 +227,14 @@ int main(int argc, char **argv) for (unsigned x = 0; x < 8; ++x) { unsigned coeff_num = y * 8 + x; - for (unsigned yb = 0; yb < HEIGHT; yb += 16) { + for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += BLOCKS_PER_STREAM) { optional num_rans_bytes = read_varint(&ptr, end); if (!num_rans_bytes) { - fprintf(stderr, "Error parsing varint for block %d rANS bytes\n", yb); + fprintf(stderr, "Error parsing varint for block %d rANS bytes\n", block_idx); exit(1); } - CoeffStream *stream = &streams[coeff_num * num_blocks + (yb/16)]; + CoeffStream *stream = &streams[coeff_num * num_blocks + block_idx / BLOCKS_PER_STREAM]; stream->src_offset = ptr - coded.data(); stream->src_len = *num_rans_bytes;