};
struct CoeffStream {
- uint src_offset, src_len, sign_offset, sign_len, extra_bits;
+ uint src_offset, src_len;
};
layout(std430, binding = 0) buffer whatever3
{
CoeffStream streams[];
};
-
-uniform uint src_offset, src_len, sign_offset, sign_len, extra_bits;
+uniform uint sign_bias_per_model[16];
const uint RANS_BYTE_L = (1u << 23); // lower bound of our normalization interval
-uint last_offset = -1, ransbuf;
-
uint get_rans_byte(uint offset)
{
- if (last_offset != (offset >> 2)) {
- last_offset = offset >> 2;
- ransbuf = data_SSBO[offset >> 2];
- }
- return bitfieldExtract(ransbuf, 8 * int(offset & 3u), 8);
-
// We assume little endian.
-// return bitfieldExtract(data_SSBO[offset >> 2], 8 * int(offset & 3u), 8);
+ return bitfieldExtract(data_SSBO[offset >> 2], 8 * int(offset & 3u), 8);
}
-void RansDecInit(out uint r, inout uint offset)
+uint RansDecInit(inout uint offset)
{
uint x;
x |= get_rans_byte(offset + 3) << 24;
offset += 4;
- r = x;
+ return x;
}
uint RansDecGet(uint r, uint scale_bits)
const uint stream_num = coeff_num * num_blocks + block_row;
//const uint stream_num = block_row * num_blocks + coeff_num; // HACK
const uint model_num = min((coeff_num % 8) + (coeff_num / 8), 7);
+ const uint sign_bias = sign_bias_per_model[model_num];
// Initialize rANS decoder.
uint offset = streams[stream_num].src_offset;
- uint rans;
- RansDecInit(rans, offset);
-
- // Initialize sign bit decoder. TODO: this ought to be 32-bit-aligned instead!
- uint soffset = streams[stream_num].sign_offset;
- uint sign_buf = get_rans_byte(soffset++) >> streams[stream_num].extra_bits;
- uint sign_bits_left = 8 - streams[stream_num].extra_bits;
+ uint rans = RansDecInit(offset);
float q = (coeff_num == 0) ? 1.0 : (quant_matrix[coeff_num] * quant_scalefac / 128.0 / sqrt(2.0)); // FIXME: fold
q *= (1.0 / 255.0);
// rANS decode one coefficient across eight blocks (so 64x8 coefficients).
for (uint subblock_idx = 8; subblock_idx --> 0; ) {
// Read a symbol.
- int k = int(cum2sym(RansDecGet(rans, prob_bits), model_num));
+ uint bottom_bits = RansDecGet(rans, prob_bits + 1);
+ bool sign = false;
+ if (bottom_bits >= sign_bias) {
+ bottom_bits -= sign_bias;
+ rans -= sign_bias;
+ sign = true;
+ }
+ int k = int(cum2sym(bottom_bits, model_num)); // Can go out-of-bounds; that will return zero.
uvec2 sym = get_dsym(k, model_num);
- RansDecAdvance(rans, offset, sym.x, sym.y, prob_bits);
+ RansDecAdvance(rans, offset, sym.x, sym.y, prob_bits + 1);
if (k == ESCAPE_LIMIT) {
k = int(RansDecGet(rans, prob_bits));
RansDecAdvance(rans, offset, k, 1, prob_bits);
}
- if (k != 0) {
- if (sign_bits_left == 0) {
- sign_buf = get_rans_byte(soffset++);
- sign_bits_left = 8;
- }
- if ((sign_buf & 1u) == 1u) k = -k;
- --sign_bits_left;
- sign_buf >>= 1;
+ if (sign) {
+ k = -k;
}
if (coeff_num == 0) {