struct RansDistSSBO {
unsigned dist[4 * 256];
- std::pair<unsigned, unsigned> ransdist[4 * 256];
+ unsigned ransfreq[4 * 256];
+ struct {
+ uint32_t x_max, rcp_freq, bias, rcp_shift_and_cmpl_freq;
+ } ransdist[4 * 256];
+ unsigned sign_biases[4];
};
using namespace std;
GLuint ssbo;
glGenBuffers(1, &ssbo);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
- glNamedBufferStorage(ssbo, 256 * 16 * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ glNamedBufferStorage(ssbo, sizeof(RansDistSSBO), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
check_error();
// SSBOs for the rANS output (data and offsets).
steady_clock::time_point start = steady_clock::now();
unsigned num_iterations = 100;
for (unsigned i = 0; i < num_iterations; ++i) {
- glClearNamedBufferSubData(ssbo, GL_R8, 0, 256 * 16 * sizeof(uint32_t), GL_RED, GL_UNSIGNED_BYTE, nullptr);
+ glClearNamedBufferSubData(ssbo, GL_R8, 0, sizeof(RansDistSSBO), GL_RED, GL_UNSIGNED_BYTE, nullptr);
glUseProgram(glsl_program_num);
glDispatchCompute(WIDTH_BLOCKS / 16, HEIGHT_BLOCKS, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
// Write out the distributions.
- const RansDistSSBO *rans_dist = (const RansDistSSBO *)glMapNamedBufferRange(ssbo, 0, 256 * 16 * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ const RansDistSSBO *rans_dist = (const RansDistSSBO *)glMapNamedBufferRange(ssbo, 0, sizeof(RansDistSSBO), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
for (unsigned r = 0; r < 2; ++r) { // Hack to write fake chroma tables.
// TODO: rather gamma-k or something
for (unsigned i = 0; i < 4; ++i) {
printf("writing table %d\n", i);
for (unsigned j = 0; j < NUM_SYMS; ++j) {
- printf("%d,%d: start=%d freq=%d\n", i, j, rans_dist->ransdist[i * 256 + j].first, rans_dist->ransdist[i * 256 + j].second);
- write_varint(rans_dist->ransdist[i * 256 + j].second, codedfp);
+ printf("%d,%d: freq=%d x_max=%d, rcp_freq=%08x, bias=%d, rcp_shift=%d, cmpl_freq=%d\n",
+ i, j, rans_dist->ransfreq[i * 256 + j],
+ rans_dist->ransdist[i * 256 + j].x_max,
+ rans_dist->ransdist[i * 256 + j].rcp_freq,
+ rans_dist->ransdist[i * 256 + j].bias,
+ rans_dist->ransdist[i * 256 + j].rcp_shift_and_cmpl_freq & 0xffff,
+ rans_dist->ransdist[i * 256 + j].rcp_shift_and_cmpl_freq >> 16);
+ write_varint(rans_dist->ransfreq[i * 256 + j], codedfp);
}
}
}