printf("%d err=0x%x\n", __LINE__, glGetError());
// Decode all luma blocks.
+ size_t last_src_offset = 0, last_src_len = 0;
for (unsigned y = 0; y < 8; ++y) {
for (unsigned x = 0; x < 8; ++x) {
unsigned coeff_num = y * 8 + x;
}
CoeffStream *stream = &streams[coeff_num * num_blocks + block_idx / BLOCKS_PER_STREAM];
- stream->src_offset = ptr - coded.data();
- stream->src_len = *num_rans_bytes;
+ if (*num_rans_bytes == 0) {
+ // Repeat last stream.
+ stream->src_offset = last_src_offset;
+ stream->src_len = last_src_len;
+ } else {
+ stream->src_offset = ptr - coded.data();
+ stream->src_len = *num_rans_bytes;
+ last_src_offset = stream->src_offset;
+ last_src_len = last_src_len;
+ }
// TODO: check len
ptr += *num_rans_bytes;
#define PARALLEL_SLICES 1
steady_clock::time_point start = steady_clock::now();
- for (int i = 0; i < 1000; ++i) {
+ unsigned num_iterations = 1000;
+ for (unsigned i = 0; i < num_iterations; ++i) {
unsigned num_slices = (WIDTH/8)*(HEIGHT/8)/BLOCKS_PER_STREAM;
glDispatchCompute(1, (num_slices+PARALLEL_SLICES-1)/PARALLEL_SLICES, 1);
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); // unbind
printf("foo = 0x%x\n", glGetError());
- printf("Each iteration took %.3f ms.\n", 1e3 * duration<double>(now - start).count() / 1000);
+ printf("Each iteration took %.3f ms.\n", 1e3 * duration<double>(now - start).count() / num_iterations);
}