X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=narabu-encoder.cpp;h=ca285282110480de405eadd04acf495898d2be6c;hb=f3eac72e679d3e0ec7ae1d4484736cd552c344dd;hp=b1b86d847a3f2cc128430c22a975a36adc986618;hpb=4e20a14f8ca0bc3259fa2be5bbbd4057080ce62c;p=narabu diff --git a/narabu-encoder.cpp b/narabu-encoder.cpp index b1b86d8..ca28528 100644 --- a/narabu-encoder.cpp +++ b/narabu-encoder.cpp @@ -19,8 +19,6 @@ #include -#include "ryg_rans/rans_byte.h" -#include "ryg_rans/renormalize.h" #include "util.h" #define WIDTH 1280 @@ -45,7 +43,11 @@ unsigned char pix_cr[(WIDTH/2) * HEIGHT]; struct RansDistSSBO { unsigned dist[4 * 256]; - std::pair ransdist[4 * 256]; + unsigned ransfreq[4 * 256]; + struct { + uint32_t x_max, rcp_freq, bias, rcp_shift_and_cmpl_freq; + } ransdist[4 * 256]; + unsigned sign_biases[4]; }; using namespace std; @@ -196,7 +198,7 @@ int main(int argc, char **argv) GLuint ssbo; glGenBuffers(1, &ssbo); glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo); - glNamedBufferStorage(ssbo, 256 * 16 * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + glNamedBufferStorage(ssbo, sizeof(RansDistSSBO), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); check_error(); // SSBOs for the rANS output (data and offsets). @@ -263,11 +265,11 @@ int main(int argc, char **argv) check_error(); } - glBindImageTexture(0, dc_ac7_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R16UI); - glBindImageTexture(1, ac1_ac6_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R16UI); - glBindImageTexture(2, ac2_ac5_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R16UI); - glBindImageTexture(3, ac3_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8I); - glBindImageTexture(4, ac4_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8I); + glBindImageTexture(0, dc_ac7_tex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16UI); + glBindImageTexture(1, ac1_ac6_tex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16UI); + glBindImageTexture(2, ac2_ac5_tex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16UI); + glBindImageTexture(3, ac3_tex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R8I); + glBindImageTexture(4, ac4_tex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R8I); glBindImageTexture(5, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI); check_error(); @@ -302,7 +304,7 @@ int main(int argc, char **argv) steady_clock::time_point start = steady_clock::now(); unsigned num_iterations = 100; for (unsigned i = 0; i < num_iterations; ++i) { - glClearNamedBufferSubData(ssbo, GL_R8, 0, 256 * 16 * sizeof(uint32_t), GL_RED, GL_UNSIGNED_BYTE, nullptr); + glClearNamedBufferSubData(ssbo, GL_R8, 0, sizeof(RansDistSSBO), GL_RED, GL_UNSIGNED_BYTE, nullptr); glUseProgram(glsl_program_num); glDispatchCompute(WIDTH_BLOCKS / 16, HEIGHT_BLOCKS, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); @@ -338,14 +340,20 @@ int main(int argc, char **argv) } // Write out the distributions. - const RansDistSSBO *rans_dist = (const RansDistSSBO *)glMapNamedBufferRange(ssbo, 0, 256 * 16 * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + const RansDistSSBO *rans_dist = (const RansDistSSBO *)glMapNamedBufferRange(ssbo, 0, sizeof(RansDistSSBO), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); for (unsigned r = 0; r < 2; ++r) { // Hack to write fake chroma tables. // TODO: rather gamma-k or something for (unsigned i = 0; i < 4; ++i) { printf("writing table %d\n", i); for (unsigned j = 0; j < NUM_SYMS; ++j) { - printf("%d,%d: start=%d freq=%d\n", i, j, rans_dist->ransdist[i * 256 + j].first, rans_dist->ransdist[i * 256 + j].second); - write_varint(rans_dist->ransdist[i * 256 + j].second, codedfp); + printf("%d,%d: freq=%d x_max=%d, rcp_freq=%08x, bias=%d, rcp_shift=%d, cmpl_freq=%d\n", + i, j, rans_dist->ransfreq[i * 256 + j], + rans_dist->ransdist[i * 256 + j].x_max, + rans_dist->ransdist[i * 256 + j].rcp_freq, + rans_dist->ransdist[i * 256 + j].bias, + rans_dist->ransdist[i * 256 + j].rcp_shift_and_cmpl_freq & 0xffff, + rans_dist->ransdist[i * 256 + j].rcp_shift_and_cmpl_freq >> 16); + write_varint(rans_dist->ransfreq[i * 256 + j], codedfp); } } }