]> git.sesse.net Git - narabu/commitdiff
Predict DC across the entire slice instead of resetting each row. Opens up for slices...
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 24 Sep 2017 13:29:44 +0000 (15:29 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 24 Sep 2017 13:29:44 +0000 (15:29 +0200)
coded.dat
decoder.shader
qdc.cpp

index 50795ae9ec44cce9ca7798b511c4f45f40b8d760..ea88547b1c2997d6b5cbe69bc21135a48362db68 100644 (file)
Binary files a/coded.dat and b/coded.dat differ
index e7b83e0ccbb52402dcb0a827119fe272eff892fc..cab6f8cbecbb508421b01740a208b431a8387136 100644 (file)
@@ -229,10 +229,6 @@ void main()
        pick_timer(start, local_timing[0]);
 
        for (uint block_idx = 40; block_idx --> 0; ) {
-               uint block_x = block_idx % 20;
-               uint block_y = block_idx / 20;
-               if (block_x == 19) last_k = 0;
-
                pick_timer(start, local_timing[1]);
 
                // rANS decode one coefficient across eight blocks (so 64x8 coefficients).
@@ -309,8 +305,12 @@ void main()
 
                pick_timer(start, local_timing[6]);
 
-               uint y = block_row * 16 + block_y * 8;
-               uint x = block_x * 64 + local_y * 8 + local_x;
+               uint global_block_idx = (block_row * 40 + block_idx) * 8 + local_y;
+               uint block_x = global_block_idx % 160;
+               uint block_y = global_block_idx / 160;
+
+               uint y = block_y * 8;
+               uint x = block_x * 8 + local_x;
                for (uint yl = 0; yl < 8; ++yl) {
                        imageStore(out_tex, ivec2(x, yl + y), vec4(temp[row_offset + yl * 8], 0.0, 0.0, 1.0));
                }
diff --git a/qdc.cpp b/qdc.cpp
index 556a8a5d541315a54d6bddf332cdd746a5c1769a..f1ff2542575f866ae4de3c063b2c57acac1586f3 100644 (file)
--- a/qdc.cpp
+++ b/qdc.cpp
@@ -719,16 +719,35 @@ int main(int argc, char **argv)
                chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
 #endif
 
-       // DC coefficient pred from the right to left
-       for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
-               for (unsigned xb = 0; xb < WIDTH - 8; xb += 8) {
-                       coeff_y[yb * WIDTH + xb] -= coeff_y[yb * WIDTH + (xb + 8)];
+       // DC coefficient pred from the right to left (within each slice)
+       for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += 320) {
+               int prev_k = 0;
+
+               for (unsigned subblock_idx = 320; subblock_idx --> 0; ) {
+                       unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS;
+                       unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS;
+                       int k = coeff_y[(yb * 8) * WIDTH + (xb * 8)];
+
+                       coeff_y[(yb * 8) * WIDTH + (xb * 8)] = k - prev_k;
+
+                       prev_k = k;
                }
        }
-       for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
-               for (unsigned xb = 0; xb < WIDTH/2 - 8; xb += 8) {
-                       coeff_cb[yb * WIDTH/2 + xb] -= coeff_cb[yb * WIDTH/2 + (xb + 8)];
-                       coeff_cr[yb * WIDTH/2 + xb] -= coeff_cr[yb * WIDTH/2 + (xb + 8)];
+       for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; block_idx += 320) {
+               int prev_k_cb = 0;
+               int prev_k_cr = 0;
+
+               for (unsigned subblock_idx = 320; subblock_idx --> 0; ) {
+                       unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS_CHROMA;
+                       unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS_CHROMA;
+                       int k_cb = coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)];
+                       int k_cr = coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)];
+
+                       coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cb - prev_k_cb;
+                       coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cr - prev_k_cr;
+
+                       prev_k_cb = k_cb;
+                       prev_k_cr = k_cr;
                }
        }