pick_timer(start, local_timing[0]);
for (uint block_idx = 40; block_idx --> 0; ) {
- uint block_x = block_idx % 20;
- uint block_y = block_idx / 20;
- if (block_x == 19) last_k = 0;
-
pick_timer(start, local_timing[1]);
// rANS decode one coefficient across eight blocks (so 64x8 coefficients).
pick_timer(start, local_timing[6]);
- uint y = block_row * 16 + block_y * 8;
- uint x = block_x * 64 + local_y * 8 + local_x;
+ uint global_block_idx = (block_row * 40 + block_idx) * 8 + local_y;
+ uint block_x = global_block_idx % 160;
+ uint block_y = global_block_idx / 160;
+
+ uint y = block_y * 8;
+ uint x = block_x * 8 + local_x;
for (uint yl = 0; yl < 8; ++yl) {
imageStore(out_tex, ivec2(x, yl + y), vec4(temp[row_offset + yl * 8], 0.0, 0.0, 1.0));
}
chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
#endif
- // DC coefficient pred from the right to left
- for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
- for (unsigned xb = 0; xb < WIDTH - 8; xb += 8) {
- coeff_y[yb * WIDTH + xb] -= coeff_y[yb * WIDTH + (xb + 8)];
+ // DC coefficient pred from the right to left (within each slice)
+ for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += 320) {
+ int prev_k = 0;
+
+ for (unsigned subblock_idx = 320; subblock_idx --> 0; ) {
+ unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS;
+ unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS;
+ int k = coeff_y[(yb * 8) * WIDTH + (xb * 8)];
+
+ coeff_y[(yb * 8) * WIDTH + (xb * 8)] = k - prev_k;
+
+ prev_k = k;
}
}
- for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
- for (unsigned xb = 0; xb < WIDTH/2 - 8; xb += 8) {
- coeff_cb[yb * WIDTH/2 + xb] -= coeff_cb[yb * WIDTH/2 + (xb + 8)];
- coeff_cr[yb * WIDTH/2 + xb] -= coeff_cr[yb * WIDTH/2 + (xb + 8)];
+ for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; block_idx += 320) {
+ int prev_k_cb = 0;
+ int prev_k_cr = 0;
+
+ for (unsigned subblock_idx = 320; subblock_idx --> 0; ) {
+ unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS_CHROMA;
+ unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS_CHROMA;
+ int k_cb = coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)];
+ int k_cr = coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)];
+
+ coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cb - prev_k_cb;
+ coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cr - prev_k_cr;
+
+ prev_k_cb = k_cb;
+ prev_k_cr = k_cr;
}
}