From c6928acb2405c30fdaf1a5d9efeb2902d7c544aa Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 12 Nov 2017 11:30:20 +0100 Subject: [PATCH] More fixes of hard-coded values. --- narabu-encoder.cpp | 14 ++++++++------ rans.shader | 9 +++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/narabu-encoder.cpp b/narabu-encoder.cpp index b1d0d1f..f2dd7c2 100644 --- a/narabu-encoder.cpp +++ b/narabu-encoder.cpp @@ -34,6 +34,8 @@ #define BLOCKS_PER_STREAM 320 #define STREAM_BUF_SIZE 1024 // In bytes. +#define NUM_STREAMS ((NUM_BLOCKS + BLOCKS_PER_STREAM - 1) / BLOCKS_PER_STREAM) + static constexpr uint32_t prob_bits = 12; static constexpr uint32_t prob_scale = 1 << prob_bits; @@ -225,13 +227,13 @@ int main(int argc, char **argv) GLuint output_ssbo; glGenBuffers(1, &output_ssbo); glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_ssbo); - glNamedBufferStorage(output_ssbo, HEIGHT_BLOCKS * WIDTH_BLOCKS * STREAM_BUF_SIZE, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + glNamedBufferStorage(output_ssbo, 64 * NUM_STREAMS * STREAM_BUF_SIZE, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); check_error(); GLuint bytes_written_ssbo; glGenBuffers(1, &bytes_written_ssbo); glBindBuffer(GL_SHADER_STORAGE_BUFFER, bytes_written_ssbo); - glNamedBufferStorage(bytes_written_ssbo, HEIGHT_BLOCKS * WIDTH_BLOCKS * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + glNamedBufferStorage(bytes_written_ssbo, 64 * NUM_STREAMS * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); check_error(); // Bind SSBOs. @@ -338,7 +340,7 @@ int main(int argc, char **argv) glMemoryBarrier(GL_UNIFORM_BARRIER_BIT); glUseProgram(glsl_rans_program_num); - glDispatchCompute(NUM_BLOCKS / BLOCKS_PER_STREAM, 8, 5); + glDispatchCompute(NUM_STREAMS, 8, 5); } check_error(); glFinish(); @@ -385,19 +387,19 @@ int main(int argc, char **argv) // Write out the actual data. - const uint32_t *bytes_written = (const uint32_t *)glMapNamedBufferRange(bytes_written_ssbo, 0, HEIGHT_BLOCKS * WIDTH_BLOCKS * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + const uint32_t *bytes_written = (const uint32_t *)glMapNamedBufferRange(bytes_written_ssbo, 0, 64 * NUM_STREAMS * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); #if 0 for (int i = 0; i < HEIGHT_BLOCKS*64; ++i) { printf("%d,%d,%d: %u\n", i / 64, (i / 8) % 8, i % 8, 1024 * (i + 1) - offsets[i]); } #endif - const uint8_t *data = (const uint8_t *)glMapNamedBufferRange(output_ssbo, 0, HEIGHT_BLOCKS * WIDTH_BLOCKS * STREAM_BUF_SIZE, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + const uint8_t *data = (const uint8_t *)glMapNamedBufferRange(output_ssbo, 0, 64 * NUM_STREAMS * STREAM_BUF_SIZE, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); string last_block; for (unsigned y = 0; y < 8; ++y) { for (unsigned x = 0; x < 8; ++x) { - for (unsigned int stream_idx = 0; stream_idx < HEIGHT_BLOCKS; ++stream_idx) { + for (unsigned int stream_idx = 0; stream_idx < NUM_STREAMS; ++stream_idx) { const uint8_t *out_end = data + (stream_idx * 64 + y * 8 + x + 1) * STREAM_BUF_SIZE; uint32_t num_rans_bytes = bytes_written[stream_idx * 64 + y * 8 + x]; const uint8_t *ptr = out_end - num_rans_bytes; diff --git a/rans.shader b/rans.shader index 3e19c83..47d0c8b 100644 --- a/rans.shader +++ b/rans.shader @@ -9,6 +9,7 @@ const uint BLOCKS_PER_STREAM = 320; const uint STREAM_BUF_SIZE = 256; // In uint32s. 1 kB per stream ought to be enough for everyone :-) const uint NUM_SYMS = 256; const uint ESCAPE_LIMIT = NUM_SYMS - 1; +const uint WIDTH_BLOCKS = 160; // TODO: send in as a uniform. #define MAPPING(s0, s1, s2, s3, s4, s5, s6, s7) ((s0) | (s1 << 2) | (s2 << 4) | (s3 << 6) | (s4 << 8) | (s5 << 10) | (s6 << 12) | (s7 << 14)) @@ -167,8 +168,8 @@ void encode_9_7(uint streamgroup_num, uint coeff_row, layout(r16ui) restrict rea for (uint subblock_idx = 0; subblock_idx < BLOCKS_PER_STREAM; ++subblock_idx) { // TODO: Use SSBOs instead of a texture? - uint x = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) % 160; - uint y = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) / 160; + uint x = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) % WIDTH_BLOCKS; + uint y = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) / WIDTH_BLOCKS; uint f = imageLoad(tex, ivec2(x, y * 8 + coeff_row)).x; encode_coeff(sign_extend(f & 0x1ffu, 9), enc1); @@ -186,8 +187,8 @@ void encode_8(uint streamgroup_num, uint coeff_row, layout(r8i) restrict readonl for (uint subblock_idx = 0; subblock_idx < BLOCKS_PER_STREAM; ++subblock_idx) { // TODO: Use SSBOs instead of a texture? - uint x = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) % 160; - uint y = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) / 160; + uint x = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) % WIDTH_BLOCKS; + uint y = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) / WIDTH_BLOCKS; int f = imageLoad(tex, ivec2(x, y * 8 + coeff_row)).x; encode_coeff(f, enc); -- 2.39.2