X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=narabu.cpp;h=0ac4f71d75ab58794da432e64c22ea0811cdb437;hb=daf421e32981645e551621551c6b82697ad078de;hp=a0e58717e6b32da507d8621d4b73e908769e4806;hpb=de545fc4a6740687509e9ffd871e808a48281fb8;p=narabu diff --git a/narabu.cpp b/narabu.cpp index a0e5871..0ac4f71 100644 --- a/narabu.cpp +++ b/narabu.cpp @@ -223,6 +223,7 @@ int main(int argc, char **argv) printf("%d err=0x%x\n", __LINE__, glGetError()); // Decode all luma blocks. + size_t last_src_offset = 0, last_src_len = 0; for (unsigned y = 0; y < 8; ++y) { for (unsigned x = 0; x < 8; ++x) { unsigned coeff_num = y * 8 + x; @@ -235,8 +236,16 @@ int main(int argc, char **argv) } CoeffStream *stream = &streams[coeff_num * num_blocks + block_idx / BLOCKS_PER_STREAM]; - stream->src_offset = ptr - coded.data(); - stream->src_len = *num_rans_bytes; + if (*num_rans_bytes == 0) { + // Repeat last stream. + stream->src_offset = last_src_offset; + stream->src_len = last_src_len; + } else { + stream->src_offset = ptr - coded.data(); + stream->src_len = *num_rans_bytes; + last_src_offset = stream->src_offset; + last_src_len = last_src_len; + } // TODO: check len ptr += *num_rans_bytes; @@ -271,7 +280,8 @@ int main(int argc, char **argv) #define PARALLEL_SLICES 1 steady_clock::time_point start = steady_clock::now(); - for (int i = 0; i < 1000; ++i) { + unsigned num_iterations = 1000; + for (unsigned i = 0; i < num_iterations; ++i) { unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/BLOCKS_PER_STREAM; glDispatchCompute(1, (num_slices+PARALLEL_SLICES-1)/PARALLEL_SLICES, 1); } @@ -373,5 +383,5 @@ int main(int argc, char **argv) glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); // unbind printf("foo = 0x%x\n", glGetError()); - printf("Each iteration took %.3f ms.\n", 1e3 * duration(now - start).count() / 1000); + printf("Each iteration took %.3f ms.\n", 1e3 * duration(now - start).count() / num_iterations); }