#include <SDL2/SDL_error.h>
#include <SDL2/SDL_video.h>
#include <epoxy/gl.h>
+#include <movit/util.h>
#include <string>
#include <optional>
#include <algorithm>
#include <vector>
#include <memory>
+#include <chrono>
#include "util.h"
using namespace std;
+using namespace std::chrono;
#define WIDTH 1280
#define HEIGHT 720
}
struct CoeffStream {
- uint src_offset, src_len, sign_offset, sign_len, extra_bits;
+ uint src_offset, src_len;
};
CoeffStream streams[45 * 64]; // HACK
glGetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &size);
printf("shared_memory_size=%u\n", size);
- string shader_src = read_file("decoder-pre-sign.shader");
+ string shader_src = ::read_file("decoder.shader");
GLuint shader_num = compile_shader(shader_src, GL_COMPUTE_SHADER);
GLuint glsl_program_num = glCreateProgram();
glAttachShader(glsl_program_num, shader_num);
glUseProgram(glsl_program_num);
- string coded = read_file(argc >= 2 ? argv[1] : "coded.dat");
+ string coded = ::read_file(argc >= 2 ? argv[1] : "coded.dat");
const char *ptr = &coded[0];
const char *end = ptr + coded.size();
+ GLuint sign_bias[NUM_TABLES];
// printf("first few bytes offs=%zu: %d %d %d %d %d %d %d %d\n", ptr - coded.data(),
// (uint8_t)ptr[0], (uint8_t)ptr[1], (uint8_t)ptr[2], (uint8_t)ptr[3],
exit(1);
}
- decode_tables[table].dsyms[sym].sym_start = cum_freq;
- decode_tables[table].dsyms[sym].sym_freq = *freq;
+ decode_tables[table].dsyms[(sym + 1) & 255].sym_start = cum_freq;
+ decode_tables[table].dsyms[(sym + 1) & 255].sym_freq = *freq;
for (uint32_t i = 0; i < freq; ++i) {
- decode_tables[table].cum2sym[cum_freq++] = sym;
+ if (cum_freq < prob_scale)
+ decode_tables[table].cum2sym[cum_freq] = (sym + 1) & 255;
+ ++cum_freq;
}
}
+ sign_bias[table] = cum_freq;
}
// Make cum2sym texture.
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R8UI, prob_scale, NUM_TABLES, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, cum2sym_data.get());
+ check_error();
// Make dsyms texture.
unique_ptr<pair<uint16_t, uint16_t>[]> dsyms_data(new pair<uint16_t, uint16_t>[NUM_SYMS * NUM_TABLES]);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RG16UI, NUM_SYMS, NUM_TABLES, 0, GL_RG_INTEGER, GL_UNSIGNED_SHORT, dsyms_data.get());
+ check_error();
+
+ GLuint coeff_tex;
+ glGenTextures(1, &coeff_tex);
+ glBindTexture(GL_TEXTURE_2D, coeff_tex);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
+ check_error();
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_R16I, 1280, 720, 0, GL_RED_INTEGER, GL_SHORT, nullptr);
+ check_error();
GLuint out_tex;
glGenTextures(1, &out_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, 1280, 720, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
//glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, 1280, 720, 0, GL_RED, GL_FLOAT, nullptr);
+ check_error();
- //GLint src_offset_pos = glGetUniformLocation(glsl_program_num, "src_offset");
- //GLint sign_offset_pos = glGetUniformLocation(glsl_program_num, "sign_offset");
- //GLint extra_bits_pos = glGetUniformLocation(glsl_program_num, "extra_bits");
GLint cum2sym_tex_pos = glGetUniformLocation(glsl_program_num, "cum2sym_tex");
GLint dsyms_tex_pos = glGetUniformLocation(glsl_program_num, "dsyms_tex");
GLint out_tex_pos = glGetUniformLocation(glsl_program_num, "out_tex");
- printf("%d err=0x%x pos=%d,%d,%d\n", __LINE__, glGetError(), cum2sym_tex_pos, dsyms_tex_pos, out_tex_pos);
+ GLint coeff_tex_pos = glGetUniformLocation(glsl_program_num, "coeff_tex");
+ GLint sign_bias_pos = glGetUniformLocation(glsl_program_num, "sign_bias_per_model");
+ printf("%d err=0x%x pos=%d,%d,%d,%d\n", __LINE__, glGetError(), cum2sym_tex_pos, dsyms_tex_pos, out_tex_pos, sign_bias_pos);
// Bind the textures.
glUniform1i(cum2sym_tex_pos, 0);
glUniform1i(dsyms_tex_pos, 1);
glUniform1i(out_tex_pos, 2);
+ glUniform1i(coeff_tex_pos, 3);
+ glUniform1uiv(sign_bias_pos, 16, sign_bias);
glBindImageTexture(0, cum2sym_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8UI);
glBindImageTexture(1, dsyms_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG16UI);
glBindImageTexture(2, out_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8);
+ glBindImageTexture(3, coeff_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R16I);
printf("%d err=0x%x\n", __LINE__, glGetError());
// Decode all luma blocks.
// TODO: check len
ptr += *num_rans_bytes;
- optional<uint32_t> num_sign_bytes = read_varint(&ptr, end);
- if (!num_sign_bytes) {
- fprintf(stderr, "Error parsing varint for block %d rANS bytes\n", yb);
- exit(1);
- }
-
- stream->sign_offset = ptr - coded.data();
- stream->sign_len = *num_sign_bytes >> 3;
- stream->extra_bits = *num_sign_bytes & 0x7;
-
- // TODO: check len
- // TODO: free bits
- ptr += *num_sign_bytes >> 3;
-
//printf("read %d rANS bytes, %d sign bytes\n", *num_rans_bytes, *num_sign_bytes);
}
}
glGenBuffers(1, &ssbo_out);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_out);
- glBufferData(GL_SHADER_STORAGE_BUFFER, 16384, nullptr, GL_STREAM_DRAW); // ??
+ glBufferData(GL_SHADER_STORAGE_BUFFER, 65536, nullptr, GL_STREAM_DRAW); // ??
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, ssbo_out);
+ check_error();
- for (int i = 0; i < 10000; ++i)
- glDispatchCompute(1, 45, 1);
+#define PARALLEL_SLICES 1
+ steady_clock::time_point start = steady_clock::now();
+ for (int i = 0; i < 1000; ++i)
+ glDispatchCompute(1, (45+PARALLEL_SLICES-1)/PARALLEL_SLICES, 1);
+ check_error();
+ glFinish();
+ steady_clock::time_point now = steady_clock::now();
- unsigned *timing = (unsigned *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 16384, GL_MAP_READ_BIT);
+ unsigned *timing = (unsigned *)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 65536, GL_MAP_READ_BIT);
//setlocale(LC_ALL, "nb_NO.UTF-8");
string phases[] = {
for (int i = 0; i < 10; ++i) {
//printf("%d: %'18.0f [%s]\n", i, double((uint64_t(timing[i * 2 + 1]) << 32) | timing[i * 2]), phases[i].c_str());
printf("%d,%s", i, phases[i].c_str());
- for (int j = 0; j < 64; ++j) {
+ for (int j = 0; j < 512; ++j) {
int idx = (j * 10 + i) * 2;
uint64_t val = (uint64_t(timing[idx + 1]) << 32) | timing[idx];
// printf(" %'18.0f", double(val));
unsigned char *data = new unsigned char[1280 * 720];
glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, data);
+ check_error();
printf("%d err=0x%x bufsize=%zu\n", __LINE__, glGetError(), coded.size());
#if 0
}
}
fclose(fp);
+
+ int16_t *coeff_data = new int16_t[1280 * 720];
+ glBindTexture(GL_TEXTURE_2D, coeff_tex);
+ check_error();
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_RED_INTEGER, GL_SHORT, coeff_data);
+ check_error();
+ for (int k = 0; k < 4; ++k) {
+ for (int y = 0; y < 8; ++y) {
+ for (int x = 0; x < 8; ++x) {
+ printf("%3d ", coeff_data[y * 1280 + x + k*8]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ }
+ printf("\n");
+
+ check_error();
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); // unbind
printf("foo = 0x%x\n", glGetError());
+ printf("Each iteration took %.3f ms.\n", 1e3 * duration<double>(now - start).count() / 1000);
}