Remove some obsolete caching code.

[narabu] / decoder.shader
diff --git a/decoder.shader b/decoder.shader

index 6d54e4dc5671173f540a1574666e51870a14b1c2..752a6a951d8b15fecfb3ae22a611e8207459e71e 100644 (file)
--- a/decoder.shader
+++ b/decoder.shader
@@ -48,32 +48,23 @@ layout(std430, binding = 10) buffer layoutName2
  };
  
  struct CoeffStream {
-       uint src_offset, src_len, sign_offset, sign_len, extra_bits;
+       uint src_offset, src_len;
  };
  layout(std430, binding = 0) buffer whatever3
  {
         CoeffStream streams[];
  };
-
-uniform uint src_offset, src_len, sign_offset, sign_len, extra_bits;
+uniform uint sign_bias_per_model[16];
  
  const uint RANS_BYTE_L = (1u << 23);  // lower bound of our normalization interval
  
-uint last_offset = -1, ransbuf;
-
  uint get_rans_byte(uint offset)
  {
-       if (last_offset != (offset >> 2)) {
-               last_offset = offset >> 2;
-               ransbuf = data_SSBO[offset >> 2];
-       }
-       return bitfieldExtract(ransbuf, 8 * int(offset & 3u), 8);
-
         // We assume little endian.
-//     return bitfieldExtract(data_SSBO[offset >> 2], 8 * int(offset & 3u), 8);
+       return bitfieldExtract(data_SSBO[offset >> 2], 8 * int(offset & 3u), 8);
  }
  
-void RansDecInit(out uint r, inout uint offset)
+uint RansDecInit(inout uint offset)
  {
         uint x;
  
@@ -83,7 +74,7 @@ void RansDecInit(out uint r, inout uint offset)
         x |= get_rans_byte(offset + 3) << 24;
         offset += 4;
  
-       r = x;
+       return x;
  }
  
  uint RansDecGet(uint r, uint scale_bits)
@@ -206,16 +197,11 @@ void main()
         const uint stream_num = coeff_num * num_blocks + block_row;
         //const uint stream_num = block_row * num_blocks + coeff_num;  // HACK
         const uint model_num = min((coeff_num % 8) + (coeff_num / 8), 7);
+       const uint sign_bias = sign_bias_per_model[model_num];
  
         // Initialize rANS decoder.
         uint offset = streams[stream_num].src_offset;
-       uint rans;
-       RansDecInit(rans, offset);
-
-       // Initialize sign bit decoder. TODO: this ought to be 32-bit-aligned instead!
-       uint soffset = streams[stream_num].sign_offset;
-       uint sign_buf = get_rans_byte(soffset++) >> streams[stream_num].extra_bits;
-       uint sign_bits_left = 8 - streams[stream_num].extra_bits;
+       uint rans = RansDecInit(offset);
  
         float q = (coeff_num == 0) ? 1.0 : (quant_matrix[coeff_num] * quant_scalefac / 128.0 / sqrt(2.0));  // FIXME: fold
         q *= (1.0 / 255.0);
@@ -234,22 +220,23 @@ void main()
                 // rANS decode one coefficient across eight blocks (so 64x8 coefficients).
                 for (uint subblock_idx = 8; subblock_idx --> 0; ) {
                         // Read a symbol.
-                       int k = int(cum2sym(RansDecGet(rans, prob_bits), model_num));
+                       uint bottom_bits = RansDecGet(rans, prob_bits + 1);
+                       bool sign = false;
+                       if (bottom_bits >= sign_bias) {
+                               bottom_bits -= sign_bias;
+                               rans -= sign_bias;
+                               sign = true;
+                       }
+                       int k = int(cum2sym(bottom_bits, model_num));  // Can go out-of-bounds; that will return zero.
                         uvec2 sym = get_dsym(k, model_num);
-                       RansDecAdvance(rans, offset, sym.x, sym.y, prob_bits);
+                       RansDecAdvance(rans, offset, sym.x, sym.y, prob_bits + 1);
  
                         if (k == ESCAPE_LIMIT) {
                                 k = int(RansDecGet(rans, prob_bits));
                                 RansDecAdvance(rans, offset, k, 1, prob_bits);
                         }
-                       if (k != 0) {
-                               if (sign_bits_left == 0) {
-                                       sign_buf = get_rans_byte(soffset++);
-                                       sign_bits_left = 8;
-                               }
-                               if ((sign_buf & 1u) == 1u) k = -k;
-                               --sign_bits_left;
-                               sign_buf >>= 1;
+                       if (sign) {
+                               k = -k;
                         }
  
                         if (coeff_num == 0) {