layout(local_size_x = 1) in;
-const uint prob_bits = 13; // Note!
+const uint prob_bits = 12;
const uint prob_scale = 1 << prob_bits;
const uint RANS_BYTE_L = (1u << 23);
const uint BLOCKS_PER_STREAM = 320;
rans_output[rans_offset + 3] = uint8_t(rans >> 24);
}
-void encode_coeff(uint coeff, uint bits, inout RansEncoder enc)
+int sign_extend(uint coeff, uint bits)
+{
+ return int(coeff << (32 - bits)) >> (32 - bits);
+}
+
+void encode_coeff(int signed_k, uint sign_bias, inout RansEncoder enc)
{
- // Sign-extend to recover the coefficient.
- // FIXME: not needed for the bits == 8 case!
- int signed_k = int(coeff << (32 - bits)) >> (32 - bits);
uint k = abs(signed_k);
if (k >= ESCAPE_LIMIT) {
- // ... boring stuff here
+ // Put the coefficient as a 1/(2^12) symbol _before_
+ // the 255 coefficient, since the decoder will read the
+ // 255 coefficient first.
RansEncPut(enc.rans, enc.rans_offset, k, 1, prob_bits);
k = ESCAPE_LIMIT;
}
- uvec2 sym = ransdist[enc.lut_base + (k - 1) & (NUM_SYMS - 1)];
- RansEncPut(enc.rans, enc.rans_offset, sym.x, sym.y, prob_bits);
+ uvec2 sym = ransdist[enc.lut_base + ((k - 1) & (NUM_SYMS - 1))];
+ RansEncPut(enc.rans, enc.rans_offset, sym.x, sym.y, prob_bits + 1);
- // fix some bias stuff here
+ if (signed_k < 0) {
+ enc.rans += sign_bias;
+ }
}
void encode_end(inout RansEncoder enc)
RansEncInit(streamgroup_num, coeff_row, col1, dist1, enc1);
RansEncInit(streamgroup_num, coeff_row, col2, dist2, enc2);
- for (uint subblock_idx = BLOCKS_PER_STREAM; subblock_idx --> 0; ) {
+ uint sign_bias1 = ransdist[enc1.lut_base + 255].x + ransdist[enc1.lut_base + 255].y;
+ uint sign_bias2 = ransdist[enc2.lut_base + 255].x + ransdist[enc2.lut_base + 255].y;
+
+ for (uint subblock_idx = 0; subblock_idx < BLOCKS_PER_STREAM; ++subblock_idx) {
// TODO: Use SSBOs instead of a texture?
uint x = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) % 160;
uint y = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) / 160;
uint f = imageLoad(tex, ivec2(x, y * 8 + coeff_row)).x;
- encode_coeff(f & 0x1ffu, 9, enc1);
- encode_coeff(f >> 9, 7, enc2);
+ encode_coeff(sign_extend(f & 0x1ffu, 9), sign_bias1, enc1);
+ encode_coeff(sign_extend(f >> 9, 7), sign_bias2, enc2);
}
encode_end(enc1);
RansEncoder enc;
RansEncInit(streamgroup_num, coeff_row, col, dist, enc);
+ uint sign_bias = ransdist[enc.lut_base + 255].x + ransdist[enc.lut_base + 255].y;
+
for (uint subblock_idx = BLOCKS_PER_STREAM; subblock_idx --> 0; ) {
// TODO: Use SSBOs instead of a texture?
uint x = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) % 160;
uint y = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) / 160;
int f = imageLoad(tex, ivec2(x, y * 8 + coeff_row)).x;
- encode_coeff(f, 8, enc);
+ encode_coeff(f, sign_bias, enc);
}
encode_end(enc);