]> git.sesse.net Git - narabu/blobdiff - narabu-encoder.cpp
Make rans.shader write uint32s, shedding the GL_NV_gpu_shader5 demand.
[narabu] / narabu-encoder.cpp
index 36804d616f70681f411af412961a23ac87a542b4..fe270641634652debd37edba61470d585da5fbf8 100644 (file)
@@ -227,10 +227,10 @@ int main(int argc, char **argv)
        glNamedBufferStorage(output_ssbo, 45 * 64 * 1024, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
        check_error();
 
-       GLuint output_offset_ssbo;
-       glGenBuffers(1, &output_offset_ssbo);
-       glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_offset_ssbo);
-       glNamedBufferStorage(output_offset_ssbo, 45 * 64 * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+       GLuint bytes_written_ssbo;
+       glGenBuffers(1, &bytes_written_ssbo);
+       glBindBuffer(GL_SHADER_STORAGE_BUFFER, bytes_written_ssbo);
+       glNamedBufferStorage(bytes_written_ssbo, 45 * 64 * sizeof(uint32_t), nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
        check_error();
 
        // Bind SSBOs.
@@ -243,7 +243,7 @@ int main(int argc, char **argv)
 
        glUseProgram(glsl_rans_program_num);
        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, output_ssbo);
-       glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, output_offset_ssbo);
+       glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, bytes_written_ssbo);
        glBindBufferBase(GL_UNIFORM_BUFFER, 13, dist_ubo);
 
        glUseProgram(glsl_program_num);
@@ -383,9 +383,8 @@ int main(int argc, char **argv)
        }
 
        // Write out the actual data.
-       // TODO: Do the deduplication.
 
-       const uint32_t *offsets = (const uint32_t *)glMapNamedBufferRange(output_offset_ssbo, 0, 45 * 64 * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+       const uint32_t *bytes_written = (const uint32_t *)glMapNamedBufferRange(bytes_written_ssbo, 0, 45 * 64 * sizeof(uint32_t), GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
 #if 0
        for (int i = 0; i < 45*64; ++i) {
                printf("%d,%d,%d: %u\n", i / 64, (i / 8) % 8, i % 8, 1024 * (i + 1) - offsets[i]);
@@ -399,8 +398,8 @@ int main(int argc, char **argv)
                for (unsigned x = 0; x < 8; ++x) {
                        for (unsigned int stream_idx = 0; stream_idx < 45; ++stream_idx) {
                                const uint8_t *out_end = data + (stream_idx * 64 + y * 8 + x + 1) * 1024;
-                               const uint8_t *ptr = data + offsets[stream_idx * 64 + y * 8 + x];
-                               uint32_t num_rans_bytes = out_end - ptr;
+                               uint32_t num_rans_bytes = bytes_written[stream_idx * 64 + y * 8 + x];
+                               const uint8_t *ptr = out_end - num_rans_bytes;
                                assert(num_rans_bytes <= 1024);
 
                                if (num_rans_bytes == last_block.size() &&