From: Steinar H. Gunderson Date: Sun, 24 Sep 2017 13:29:44 +0000 (+0200) Subject: Predict DC across the entire slice instead of resetting each row. Opens up for slices... X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;ds=sidebyside;h=2161c821c9c65ebae24eff9c2e4809c11c67cd02;p=narabu Predict DC across the entire slice instead of resetting each row. Opens up for slices crossing rows easier. --- diff --git a/coded.dat b/coded.dat index 50795ae..ea88547 100644 Binary files a/coded.dat and b/coded.dat differ diff --git a/decoder.shader b/decoder.shader index e7b83e0..cab6f8c 100644 --- a/decoder.shader +++ b/decoder.shader @@ -229,10 +229,6 @@ void main() pick_timer(start, local_timing[0]); for (uint block_idx = 40; block_idx --> 0; ) { - uint block_x = block_idx % 20; - uint block_y = block_idx / 20; - if (block_x == 19) last_k = 0; - pick_timer(start, local_timing[1]); // rANS decode one coefficient across eight blocks (so 64x8 coefficients). @@ -309,8 +305,12 @@ void main() pick_timer(start, local_timing[6]); - uint y = block_row * 16 + block_y * 8; - uint x = block_x * 64 + local_y * 8 + local_x; + uint global_block_idx = (block_row * 40 + block_idx) * 8 + local_y; + uint block_x = global_block_idx % 160; + uint block_y = global_block_idx / 160; + + uint y = block_y * 8; + uint x = block_x * 8 + local_x; for (uint yl = 0; yl < 8; ++yl) { imageStore(out_tex, ivec2(x, yl + y), vec4(temp[row_offset + yl * 8], 0.0, 0.0, 1.0)); } diff --git a/qdc.cpp b/qdc.cpp index 556a8a5..f1ff254 100644 --- a/qdc.cpp +++ b/qdc.cpp @@ -719,16 +719,35 @@ int main(int argc, char **argv) chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT)); #endif - // DC coefficient pred from the right to left - for (unsigned yb = 0; yb < HEIGHT; yb += 8) { - for (unsigned xb = 0; xb < WIDTH - 8; xb += 8) { - coeff_y[yb * WIDTH + xb] -= coeff_y[yb * WIDTH + (xb + 8)]; + // DC coefficient pred from the right to left (within each slice) + for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += 320) { + int prev_k = 0; + + for (unsigned subblock_idx = 320; subblock_idx --> 0; ) { + unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS; + unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS; + int k = coeff_y[(yb * 8) * WIDTH + (xb * 8)]; + + coeff_y[(yb * 8) * WIDTH + (xb * 8)] = k - prev_k; + + prev_k = k; } } - for (unsigned yb = 0; yb < HEIGHT; yb += 8) { - for (unsigned xb = 0; xb < WIDTH/2 - 8; xb += 8) { - coeff_cb[yb * WIDTH/2 + xb] -= coeff_cb[yb * WIDTH/2 + (xb + 8)]; - coeff_cr[yb * WIDTH/2 + xb] -= coeff_cr[yb * WIDTH/2 + (xb + 8)]; + for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; block_idx += 320) { + int prev_k_cb = 0; + int prev_k_cr = 0; + + for (unsigned subblock_idx = 320; subblock_idx --> 0; ) { + unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS_CHROMA; + unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS_CHROMA; + int k_cb = coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)]; + int k_cr = coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)]; + + coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cb - prev_k_cb; + coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cr - prev_k_cr; + + prev_k_cb = k_cb; + prev_k_cr = k_cr; } }