layout(std430, binding = 9) buffer layoutName
{
uint dist[4 * 256];
+ uvec2 ransdist[4 * 256];
};
const uint prob_bits = 12;
barrier();
// Stick the thread ID in the lower mantissa bits so we never get a tie.
- uint my_vote = (floatBitsToUint(loss) & ~0xff) | gl_LocalInvocationID.x;
+ uint my_vote = (floatBitsToUint(loss) & ~0xffu) | gl_LocalInvocationID.x;
if (new_val <= 1) {
// We can't touch this one any more, but it needs to participate in the barriers,
// so we can't break.
for ( ; actual_sum != prob_scale; ++actual_sum, ++vote_no) {
// Stick the thread ID in the lower mantissa bits so we never get a tie.
- uint my_vote = (floatBitsToUint(benefit) & ~0xff) | gl_LocalInvocationID.x;
+ uint my_vote = (floatBitsToUint(benefit) & ~0xffu) | gl_LocalInvocationID.x;
if (new_val == 0) {
// It's meaningless to increase this, but it needs to participate in the barriers,
// so we can't break.
}
// Parallel prefix sum.
- new_dist[(i + 255) & 255] = new_val; // Move the zero symbol last.
+ new_dist[(i + 255) & 255u] = new_val; // Move the zero symbol last.
memoryBarrierShared();
barrier();
memoryBarrierShared();
barrier();
}
- dist[base + i] = new_dist[i];
+ ransdist[base + i] = uvec2(new_val, new_dist[i]);
}