#define NUM_SYMS 256
#define ESCAPE_LIMIT (NUM_SYMS - 1)
#define BLOCKS_PER_STREAM 320
+#define STREAM_BUF_SIZE 1024 // In bytes.
+
+#define NUM_STREAMS ((NUM_BLOCKS + BLOCKS_PER_STREAM - 1) / BLOCKS_PER_STREAM)
static constexpr uint32_t prob_bits = 12;
static constexpr uint32_t prob_scale = 1 << prob_bits;
GLuint output_ssbo;
glGenBuffers(1, &output_ssbo);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_ssbo);
- glNamedBufferStorage(output_ssbo, 45 * 64 * 1024, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ glNamedBufferStorage(output_ssbo, 64 * NUM_STREAMS * STREAM_BUF_SIZE, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
check_error();
- GLuint output_offset_ssbo;
- glGenBuffers(1, &output_offset_ssbo);
- glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_offset_ssbo);
- glNamedBufferStorage(output_offset_ssbo, 45 * 64 * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ GLuint bytes_written_ssbo;
+ glGenBuffers(1, &bytes_written_ssbo);
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, bytes_written_ssbo);
+ glNamedBufferStorage(bytes_written_ssbo, 64 * NUM_STREAMS * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
check_error();
// Bind SSBOs.
glUseProgram(glsl_rans_program_num);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, output_ssbo);
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, output_offset_ssbo);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, bytes_written_ssbo);
glBindBufferBase(GL_UNIFORM_BUFFER, 13, dist_ubo);
glUseProgram(glsl_program_num);
glMemoryBarrier(GL_UNIFORM_BARRIER_BIT);
glUseProgram(glsl_rans_program_num);
- glDispatchCompute(NUM_BLOCKS / BLOCKS_PER_STREAM, 8, 5);
+ glDispatchCompute(NUM_STREAMS, 8, 5);
}
check_error();
glFinish();
}
// Write out the actual data.
- // TODO: Do the deduplication.
- const uint32_t *offsets = (const uint32_t *)glMapNamedBufferRange(output_offset_ssbo, 0, 45 * 64 * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ const uint32_t *bytes_written = (const uint32_t *)glMapNamedBufferRange(bytes_written_ssbo, 0, 64 * NUM_STREAMS * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
#if 0
- for (int i = 0; i < 45*64; ++i) {
+ for (int i = 0; i < HEIGHT_BLOCKS*64; ++i) {
printf("%d,%d,%d: %u\n", i / 64, (i / 8) % 8, i % 8, 1024 * (i + 1) - offsets[i]);
}
#endif
- const uint8_t *data = (const uint8_t *)glMapNamedBufferRange(output_ssbo, 0, 45 * 64 * 1024, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ const uint8_t *data = (const uint8_t *)glMapNamedBufferRange(output_ssbo, 0, 64 * NUM_STREAMS * STREAM_BUF_SIZE, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ string last_block;
for (unsigned y = 0; y < 8; ++y) {
for (unsigned x = 0; x < 8; ++x) {
- for (unsigned int stream_idx = 0; stream_idx < 45; ++stream_idx) {
- const uint8_t *out_end = data + (stream_idx * 64 + y * 8 + x + 1) * 1024;
- const uint8_t *ptr = data + offsets[stream_idx * 64 + y * 8 + x];
- uint32_t num_rans_bytes = out_end - ptr;
-#if 0
+ for (unsigned int stream_idx = 0; stream_idx < NUM_STREAMS; ++stream_idx) {
+ const uint8_t *out_end = data + (stream_idx * 64 + y * 8 + x + 1) * STREAM_BUF_SIZE;
+ uint32_t num_rans_bytes = bytes_written[stream_idx * 64 + y * 8 + x];
+ const uint8_t *ptr = out_end - num_rans_bytes;
+ assert(num_rans_bytes <= STREAM_BUF_SIZE);
+
if (num_rans_bytes == last_block.size() &&
memcmp(last_block.data(), ptr, last_block.size()) == 0) {
write_varint(0, codedfp);
- clear();
- return 1;
} else {
last_block = string((const char *)ptr, num_rans_bytes);
+ write_varint(num_rans_bytes, codedfp);
+ fwrite(ptr, 1, num_rans_bytes, codedfp);
}
-#endif
-
- write_varint(num_rans_bytes, codedfp);
- fwrite(ptr, 1, num_rans_bytes, codedfp);
}
}
}