]> git.sesse.net Git - narabu/blobdiff - narabu-encoder.cpp
Kill the division in the rANS GPU encoder.
[narabu] / narabu-encoder.cpp
index d13f0446516a73d3a68b333c35074d00ed032b19..ca285282110480de405eadd04acf495898d2be6c 100644 (file)
@@ -43,7 +43,11 @@ unsigned char pix_cr[(WIDTH/2) * HEIGHT];
 
 struct RansDistSSBO {
        unsigned dist[4 * 256];
-       std::pair<unsigned, unsigned> ransdist[4 * 256];
+       unsigned ransfreq[4 * 256];
+       struct {
+               uint32_t x_max, rcp_freq, bias, rcp_shift_and_cmpl_freq;
+       } ransdist[4 * 256];
+       unsigned sign_biases[4];
 };
 
 using namespace std;
@@ -194,7 +198,7 @@ int main(int argc, char **argv)
        GLuint ssbo;
        glGenBuffers(1, &ssbo);
        glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
-       glNamedBufferStorage(ssbo, 256 * 16 * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+       glNamedBufferStorage(ssbo, sizeof(RansDistSSBO), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
        check_error();
 
        // SSBOs for the rANS output (data and offsets).
@@ -300,7 +304,7 @@ int main(int argc, char **argv)
        steady_clock::time_point start = steady_clock::now();
        unsigned num_iterations = 100;
        for (unsigned i = 0; i < num_iterations; ++i) {
-               glClearNamedBufferSubData(ssbo, GL_R8, 0, 256 * 16 * sizeof(uint32_t), GL_RED, GL_UNSIGNED_BYTE, nullptr);
+               glClearNamedBufferSubData(ssbo, GL_R8, 0, sizeof(RansDistSSBO), GL_RED, GL_UNSIGNED_BYTE, nullptr);
                glUseProgram(glsl_program_num);
                glDispatchCompute(WIDTH_BLOCKS / 16, HEIGHT_BLOCKS, 1);
                glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
@@ -336,14 +340,20 @@ int main(int argc, char **argv)
        }
 
        // Write out the distributions.
-       const RansDistSSBO *rans_dist = (const RansDistSSBO *)glMapNamedBufferRange(ssbo, 0, 256 * 16 * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+       const RansDistSSBO *rans_dist = (const RansDistSSBO *)glMapNamedBufferRange(ssbo, 0, sizeof(RansDistSSBO), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
        for (unsigned r = 0; r < 2; ++r) {  // Hack to write fake chroma tables.
                // TODO: rather gamma-k or something
                for (unsigned i = 0; i < 4; ++i) {
                        printf("writing table %d\n", i);
                        for (unsigned j = 0; j < NUM_SYMS; ++j) {
-                               printf("%d,%d: start=%d freq=%d\n", i, j, rans_dist->ransdist[i * 256 + j].first, rans_dist->ransdist[i * 256 + j].second);
-                               write_varint(rans_dist->ransdist[i * 256 + j].second, codedfp);
+                               printf("%d,%d: freq=%d  x_max=%d, rcp_freq=%08x, bias=%d, rcp_shift=%d, cmpl_freq=%d\n",
+                                       i, j, rans_dist->ransfreq[i * 256 + j],
+                                       rans_dist->ransdist[i * 256 + j].x_max,
+                                       rans_dist->ransdist[i * 256 + j].rcp_freq,
+                                       rans_dist->ransdist[i * 256 + j].bias,
+                                       rans_dist->ransdist[i * 256 + j].rcp_shift_and_cmpl_freq & 0xffff,
+                                       rans_dist->ransdist[i * 256 + j].rcp_shift_and_cmpl_freq >> 16);
+                               write_varint(rans_dist->ransfreq[i * 256 + j], codedfp);
                        }
                }
        }