8 //#include "ryg_rans/rans64.h"
9 #include "ryg_rans/rans_byte.h"
10 #include "ryg_rans/renormalize.h"
17 #include <unordered_map>
21 #define WIDTH_BLOCKS (WIDTH/8)
22 #define WIDTH_BLOCKS_CHROMA (WIDTH/16)
23 #define HEIGHT_BLOCKS (HEIGHT/8)
24 #define NUM_BLOCKS (WIDTH_BLOCKS * HEIGHT_BLOCKS)
25 #define NUM_BLOCKS_CHROMA (WIDTH_BLOCKS_CHROMA * HEIGHT_BLOCKS)
28 #define ESCAPE_LIMIT (NUM_SYMS - 1)
29 #define BLOCKS_PER_STREAM 320
31 // If you set this to 1, the program will try to optimize the placement
32 // of coefficients to rANS probability distributions. This is randomized,
33 // so you might want to run it a few times.
34 #define FIND_OPTIMAL_STREAM_ASSIGNMENT 0
35 #define NUM_CLUSTERS 4
37 static constexpr uint32_t prob_bits = 12;
38 static constexpr uint32_t prob_scale = 1 << prob_bits;
42 void fdct_int32(short *const In);
43 void idct_int32(short *const In);
45 unsigned char rgb[WIDTH * HEIGHT * 3];
46 unsigned char pix_y[WIDTH * HEIGHT];
47 unsigned char pix_cb[(WIDTH/2) * HEIGHT];
48 unsigned char pix_cr[(WIDTH/2) * HEIGHT];
49 unsigned char full_pix_cb[WIDTH * HEIGHT];
50 unsigned char full_pix_cr[WIDTH * HEIGHT];
51 short coeff_y[WIDTH * HEIGHT], coeff_cb[(WIDTH/2) * HEIGHT], coeff_cr[(WIDTH/2) * HEIGHT];
56 if (x > 255) return 255;
60 static const unsigned char std_luminance_quant_tbl[64] = {
62 16, 11, 10, 16, 24, 40, 51, 61,
63 12, 12, 14, 19, 26, 58, 60, 55,
64 14, 13, 16, 24, 40, 57, 69, 56,
65 14, 17, 22, 29, 51, 87, 80, 62,
66 18, 22, 37, 56, 68, 109, 103, 77,
67 24, 35, 55, 64, 81, 104, 113, 92,
68 49, 64, 78, 87, 103, 121, 120, 101,
69 72, 92, 95, 98, 112, 100, 103, 99
71 // ff_mpeg1_default_intra_matrix
72 8, 16, 19, 22, 26, 27, 29, 34,
73 16, 16, 22, 24, 27, 29, 34, 37,
74 19, 22, 26, 27, 29, 34, 34, 38,
75 22, 22, 26, 27, 29, 34, 37, 40,
76 22, 26, 27, 29, 32, 35, 40, 48,
77 26, 27, 29, 32, 35, 40, 48, 58,
78 26, 27, 29, 34, 38, 46, 56, 69,
79 27, 29, 35, 38, 46, 56, 69, 83
85 uint32_t freqs[NUM_SYMS];
86 uint32_t cum_freqs[NUM_SYMS + 1];
89 void calc_cum_freqs();
90 void normalize_freqs(uint32_t target_total);
93 void SymbolStats::clear()
95 for (int i=0; i < NUM_SYMS; i++)
99 void SymbolStats::calc_cum_freqs()
102 for (int i=0; i < NUM_SYMS; i++)
103 cum_freqs[i+1] = cum_freqs[i] + freqs[i];
106 void SymbolStats::normalize_freqs(uint32_t target_total)
108 uint64_t real_freq[NUM_SYMS + 1]; // hack
110 assert(target_total >= NUM_SYMS);
113 uint32_t cur_total = cum_freqs[NUM_SYMS];
115 if (cur_total == 0) return;
117 double ideal_cost = 0.0;
118 for (int i = 1; i <= NUM_SYMS; i++)
120 real_freq[i] = cum_freqs[i] - cum_freqs[i - 1];
121 if (real_freq[i] > 0)
122 ideal_cost -= real_freq[i] * log2(real_freq[i] / double(cur_total));
125 OptimalRenormalize(cum_freqs, NUM_SYMS, prob_scale);
127 // calculate updated freqs and make sure we didn't screw anything up
128 assert(cum_freqs[0] == 0 && cum_freqs[NUM_SYMS] == target_total);
129 for (int i=0; i < NUM_SYMS; i++) {
131 assert(cum_freqs[i+1] == cum_freqs[i]);
133 assert(cum_freqs[i+1] > cum_freqs[i]);
136 freqs[i] = cum_freqs[i+1] - cum_freqs[i];
139 double calc_cost = 0.0;
140 for (int i = 1; i <= NUM_SYMS; i++)
142 uint64_t freq = cum_freqs[i] - cum_freqs[i - 1];
143 if (real_freq[i] > 0)
144 calc_cost -= real_freq[i] * log2(freq / double(target_total));
147 static double total_loss = 0.0;
148 total_loss += calc_cost - ideal_cost;
149 static double total_loss_with_dp = 0.0;
150 double optimal_cost = 0.0;
151 //total_loss_with_dp += optimal_cost - ideal_cost;
152 printf("ideal cost = %.0f bits, DP cost = %.0f bits, calc cost = %.0f bits (loss = %.2f bytes, total loss = %.2f bytes, total loss with DP = %.2f bytes)\n",
153 ideal_cost, optimal_cost,
154 calc_cost, (calc_cost - ideal_cost) / 8.0, total_loss / 8.0, total_loss_with_dp / 8.0);
157 SymbolStats stats[128];
159 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
160 // Distance from one stream to the other, based on a hacked-up K-L divergence.
161 float kl_dist[64][64];
164 const int luma_mapping[64] = {
165 0, 0, 1, 1, 2, 2, 3, 3,
166 0, 0, 1, 2, 2, 2, 3, 3,
167 1, 1, 2, 2, 2, 3, 3, 3,
168 1, 1, 2, 2, 2, 3, 3, 3,
169 1, 2, 2, 2, 2, 3, 3, 3,
170 2, 2, 2, 2, 3, 3, 3, 3,
171 2, 2, 3, 3, 3, 3, 3, 3,
172 3, 3, 3, 3, 3, 3, 3, 3,
174 const int chroma_mapping[64] = {
175 0, 1, 1, 2, 2, 2, 3, 3,
176 1, 1, 2, 2, 2, 3, 3, 3,
177 2, 2, 2, 2, 3, 3, 3, 3,
178 2, 2, 2, 3, 3, 3, 3, 3,
179 2, 3, 3, 3, 3, 3, 3, 3,
180 3, 3, 3, 3, 3, 3, 3, 3,
181 3, 3, 3, 3, 3, 3, 3, 3,
182 3, 3, 3, 3, 3, 3, 3, 3,
185 int pick_stats_for(int x, int y, bool is_chroma)
187 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
188 return y * 8 + x + is_chroma * 64;
191 return chroma_mapping[y * 8 + x] + 4;
193 return luma_mapping[y * 8 + x];
199 void write_varint(int x, FILE *fp)
202 putc((x & 0x7f) | 0x80, fp);
212 out_buf.reset(new uint8_t[out_max_size]);
216 void init_prob(SymbolStats &s)
218 for (int i = 0; i < NUM_SYMS; i++) {
219 //printf("%d: cumfreqs=%d freqs=%d prob_bits=%d\n", i, s.cum_freqs[i], s.freqs[i], prob_bits + 1);
220 RansEncSymbolInit(&esyms[i], s.cum_freqs[i], s.freqs[i], prob_bits + 1);
222 sign_bias = s.cum_freqs[NUM_SYMS];
227 out_end = out_buf.get() + out_max_size;
228 ptr = out_end; // *end* of output buffer
232 uint32_t save_block(FILE *codedfp) // Returns number of bytes.
234 RansEncFlush(&rans, &ptr);
235 //printf("post-flush = %08x\n", rans);
237 uint32_t num_rans_bytes = out_end - ptr;
238 if (num_rans_bytes == last_block.size() &&
239 memcmp(last_block.data(), ptr, last_block.size()) == 0) {
240 write_varint(0, codedfp);
244 last_block = string((const char *)ptr, num_rans_bytes);
247 write_varint(num_rans_bytes, codedfp);
248 //fwrite(&num_rans_bytes, 1, 4, codedfp);
249 fwrite(ptr, 1, num_rans_bytes, codedfp);
251 //printf("first rANS bytes: %02x %02x %02x %02x %02x %02x %02x %02x\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7]);
256 //printf("Saving block: %d rANS bytes\n", num_rans_bytes);
257 return num_rans_bytes;
258 //return num_rans_bytes;
261 void encode_coeff(short signed_k)
263 //printf("encoding coeff %d (sym %d), rans before encoding = %08x\n", signed_k, ((abs(signed_k) - 1) & 255), rans);
264 unsigned short k = abs(signed_k);
265 if (k >= ESCAPE_LIMIT) {
266 // Put the coefficient as a 1/(2^12) symbol _before_
267 // the 255 coefficient, since the decoder will read the
268 // 255 coefficient first.
269 RansEncPut(&rans, &ptr, k, 1, prob_bits);
272 RansEncPutSymbol(&rans, &ptr, &esyms[(k - 1) & (NUM_SYMS - 1)]);
279 static constexpr size_t out_max_size = 32 << 20; // 32 MB.
280 static constexpr size_t max_num_sign = 1048576; // Way too big. And actually bytes.
282 unique_ptr<uint8_t[]> out_buf;
286 RansEncSymbol esyms[NUM_SYMS];
289 std::string last_block;
292 static constexpr int dc_scalefac = 8; // Matches the FDCT's gain.
293 static constexpr double quant_scalefac = 4.0; // whatever?
295 static inline int quantize(int f, int coeff_idx)
297 if (coeff_idx == 0) {
298 return f / dc_scalefac;
304 const int w = std_luminance_quant_tbl[coeff_idx];
305 const int s = quant_scalefac;
306 int sign_f = (f > 0) ? 1 : -1;
307 return (32 * f + sign_f * w * s) / (2 * w * s);
310 static inline int unquantize(int qf, int coeff_idx)
312 if (coeff_idx == 0) {
313 return qf * dc_scalefac;
319 const int w = std_luminance_quant_tbl[coeff_idx];
320 const int s = quant_scalefac;
321 return (2 * qf * w * s) / 32;
324 void readpix(unsigned char *ptr, const char *filename)
326 FILE *fp = fopen(filename, "rb");
332 fseek(fp, 0, SEEK_END);
333 long len = ftell(fp);
334 assert(len >= WIDTH * HEIGHT * 3);
335 fseek(fp, len - WIDTH * HEIGHT * 3, SEEK_SET);
337 fread(ptr, 1, WIDTH * HEIGHT * 3, fp);
343 double coeff[3] = { 0.2126, 0.7152, 0.0722 }; // sum = 1.0
344 double cb_fac = 1.0 / (coeff[0] + coeff[1] + 1.0f - coeff[2]); // 0.539
345 double cr_fac = 1.0 / (1.0f - coeff[0] + coeff[1] + coeff[2]); // 0.635
347 unique_ptr<float[]> temp_cb(new float[WIDTH * HEIGHT]);
348 unique_ptr<float[]> temp_cr(new float[WIDTH * HEIGHT]);
349 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
350 for (unsigned xb = 0; xb < WIDTH; ++xb) {
351 int r = rgb[((yb * WIDTH) + xb) * 3 + 0];
352 int g = rgb[((yb * WIDTH) + xb) * 3 + 1];
353 int b = rgb[((yb * WIDTH) + xb) * 3 + 2];
354 double y = std::min(std::max(coeff[0] * r + coeff[1] * g + coeff[2] * b, 0.0), 255.0);
355 double cb = (b - y) * cb_fac + 128.0;
356 double cr = (r - y) * cr_fac + 128.0;
357 pix_y[(yb * WIDTH) + xb] = lrint(y);
358 temp_cb[(yb * WIDTH) + xb] = cb;
359 temp_cr[(yb * WIDTH) + xb] = cr;
360 full_pix_cb[(yb * WIDTH) + xb] = lrint(std::min(std::max(cb, 0.0), 255.0));
361 full_pix_cr[(yb * WIDTH) + xb] = lrint(std::min(std::max(cr, 0.0), 255.0));
365 // Simple 4:2:2 subsampling with left convention.
366 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
367 for (unsigned xb = 0; xb < WIDTH / 2; ++xb) {
368 int c0 = yb * WIDTH + std::max(int(xb) * 2 - 1, 0);
369 int c1 = yb * WIDTH + xb * 2;
370 int c2 = yb * WIDTH + xb * 2 + 1;
372 double cb = 0.25 * temp_cb[c0] + 0.5 * temp_cb[c1] + 0.25 * temp_cb[c2];
373 double cr = 0.25 * temp_cr[c0] + 0.5 * temp_cr[c1] + 0.25 * temp_cr[c2];
374 cb = std::min(std::max(cb, 0.0), 255.0);
375 cr = std::min(std::max(cr, 0.0), 255.0);
376 pix_cb[(yb * WIDTH/2) + xb] = lrint(cb);
377 pix_cr[(yb * WIDTH/2) + xb] = lrint(cr);
382 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
383 double find_best_assignment(const int *medoids, int *assignment)
385 double current_score = 0.0;
386 for (int i = 0; i < 64; ++i) {
387 int best_medoid = medoids[0];
388 float best_medoid_score = kl_dist[i][medoids[0]];
389 for (int j = 1; j < NUM_CLUSTERS; ++j) {
390 if (kl_dist[i][medoids[j]] < best_medoid_score) {
391 best_medoid = medoids[j];
392 best_medoid_score = kl_dist[i][medoids[j]];
395 assignment[i] = best_medoid;
396 current_score += best_medoid_score;
398 return current_score;
401 void find_optimal_stream_assignment(int base)
404 for (unsigned i = 0; i < 64; ++i) {
406 for (unsigned k = 0; k < NUM_SYMS; ++k) {
407 s += stats[i + base].freqs[k] + 0.5;
409 inv_sum[i] = 1.0 / s;
412 for (unsigned i = 0; i < 64; ++i) {
413 for (unsigned j = 0; j < 64; ++j) {
415 for (unsigned k = 0; k < NUM_SYMS; ++k) {
416 double p1 = (stats[i + base].freqs[k] + 0.5) * inv_sum[i];
417 double p2 = (stats[j + base].freqs[k] + 0.5) * inv_sum[j];
419 // K-L divergence is asymmetric; this is a hack.
420 d += p1 * log(p1 / p2);
421 d += p2 * log(p2 / p1);
424 //printf("%.3f ", d);
430 int medoids[64]; // only the first NUM_CLUSTERS matter
431 bool is_medoid[64] = { false };
432 std::iota(medoids, medoids + 64, 0);
433 std::random_device rd;
434 std::mt19937 g(rd());
435 std::shuffle(medoids, medoids + 64, g);
436 for (int i = 0; i < NUM_CLUSTERS; ++i) {
437 printf("%d ", medoids[i]);
438 is_medoid[medoids[i]] = true;
442 // assign each data point to the closest medoid
444 double current_score = find_best_assignment(medoids, assignment);
446 for (int i = 0; i < 1000; ++i) {
447 printf("iter %d\n", i);
448 bool any_changed = false;
449 for (int m = 0; m < NUM_CLUSTERS; ++m) {
450 for (int o = 0; o < 64; ++o) {
451 if (is_medoid[o]) continue;
452 int old_medoid = medoids[m];
455 int new_assignment[64];
456 double candidate_score = find_best_assignment(medoids, new_assignment);
458 if (candidate_score < current_score) {
459 current_score = candidate_score;
460 memcpy(assignment, new_assignment, sizeof(assignment));
462 is_medoid[old_medoid] = false;
463 is_medoid[medoids[m]] = true;
464 printf("%f: ", current_score);
465 for (int i = 0; i < 64; ++i) {
466 printf("%d ", assignment[i]);
471 medoids[m] = old_medoid;
475 if (!any_changed) break;
478 std::unordered_map<int, int> rmap;
479 for (int i = 0; i < 64; ++i) {
480 if (i % 8 == 0) printf("\n");
481 if (!rmap.count(assignment[i])) {
482 rmap.emplace(assignment[i], rmap.size());
484 printf("%d, ", rmap[assignment[i]]);
490 int main(int argc, char **argv)
493 readpix(rgb, argv[1]);
495 readpix(rgb, "color.pnm");
498 double sum_sq_err = 0.0;
499 //double last_cb_cfl_fac = 0.0;
500 //double last_cr_cfl_fac = 0.0;
502 int max_val_x[8] = {0}, min_val_x[8] = {0};
503 int max_val_y[8] = {0}, min_val_y[8] = {0};
505 // DCT and quantize luma
506 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
507 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
510 for (unsigned y = 0; y < 8; ++y) {
511 for (unsigned x = 0; x < 8; ++x) {
512 in_y[y * 8 + x] = pix_y[(yb + y) * WIDTH + (xb + x)];
519 for (unsigned y = 0; y < 8; ++y) {
520 for (unsigned x = 0; x < 8; ++x) {
521 int coeff_idx = y * 8 + x;
522 int k = quantize(in_y[coeff_idx], coeff_idx);
523 coeff_y[(yb + y) * WIDTH + (xb + x)] = k;
525 max_val_x[x] = std::max(max_val_x[x], k);
526 min_val_x[x] = std::min(min_val_x[x], k);
527 max_val_y[y] = std::max(max_val_y[y], k);
528 min_val_y[y] = std::min(min_val_y[y], k);
530 // Store back for reconstruction / PSNR calculation
531 in_y[coeff_idx] = unquantize(k, coeff_idx);
537 for (unsigned y = 0; y < 8; ++y) {
538 for (unsigned x = 0; x < 8; ++x) {
539 int k = clamp(in_y[y * 8 + x]);
540 uint8_t *ptr = &pix_y[(yb + y) * WIDTH + (xb + x)];
541 sum_sq_err += (*ptr - k) * (*ptr - k);
547 double mse = sum_sq_err / double(WIDTH * HEIGHT);
548 double psnr_db = 20 * log10(255.0 / sqrt(mse));
549 printf("psnr = %.2f dB\n", psnr_db);
551 //double chroma_energy = 0.0, chroma_energy_pred = 0.0;
553 // DCT and quantize chroma
554 //double last_cb_cfl_fac = 0.0, last_cr_cfl_fac = 0.0;
555 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
556 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
558 // TF switch: Two 8x8 luma blocks -> one 16x8 block, then drop high frequencies
559 printf("in blocks:\n");
560 for (unsigned y = 0; y < 8; ++y) {
561 for (unsigned x = 0; x < 8; ++x) {
562 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
566 for (unsigned x = 0; x < 8; ++x) {
567 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
574 for (unsigned y = 0; y < 8; ++y) {
575 for (unsigned x = 0; x < 4; ++x) {
576 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
577 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
580 in_y[y * 8 + x * 2 + 0] = a;
581 in_y[y * 8 + x * 2 + 1] = b;
585 printf("tf-ed block:\n");
586 for (unsigned y = 0; y < 8; ++y) {
587 for (unsigned x = 0; x < 8; ++x) {
588 short a = in_y[y * 8 + x];
594 // Read Y block with no tf switch (from reconstructed luma)
596 for (unsigned y = 0; y < 8; ++y) {
597 for (unsigned x = 0; x < 8; ++x) {
598 in_y[y * 8 + x] = pix_y[(yb + y) * (WIDTH) + (xb + x) * 2];
605 short in_cb[64], in_cr[64];
606 for (unsigned y = 0; y < 8; ++y) {
607 for (unsigned x = 0; x < 8; ++x) {
608 in_cb[y * 8 + x] = pix_cb[(yb + y) * (WIDTH/2) + (xb + x)];
609 in_cr[y * 8 + x] = pix_cr[(yb + y) * (WIDTH/2) + (xb + x)];
622 double denom = (x0 * x0 + x1 * x1 + x2 * x2);
623 //double denom = (x1 * x1);
625 double y0 = in_cb[1];
626 double y1 = in_cb[8];
627 double y2 = in_cb[9];
628 double cb_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
629 //double cb_cfl_fac = (x1 * y1) / denom;
631 for (unsigned y = 0; y < 8; ++y) {
632 for (unsigned x = 0; x < 8; ++x) {
633 short a = in_y[y * 8 + x];
637 for (unsigned x = 0; x < 8; ++x) {
638 short a = in_cb[y * 8 + x];
643 printf("(%d,%d,%d) -> (%d,%d,%d) gives %f\n",
644 in_y[1], in_y[8], in_y[9],
645 in_cb[1], in_cb[8], in_cb[9],
651 double cr_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
652 //double cr_cfl_fac = (x1 * y1) / denom;
653 printf("cb CfL = %7.3f dc = %5d cr CfL = %7.3f dc = %d\n",
654 cb_cfl_fac, in_cb[0] - in_y[0],
655 cr_cfl_fac, in_cr[0] - in_y[0]);
657 if (denom == 0.0) { cb_cfl_fac = cr_cfl_fac = 0.0; }
660 //last_cb_cfl_fac = cb_cfl_fac;
661 //last_cr_cfl_fac = cr_cfl_fac;
663 for (unsigned coeff_idx = 1; coeff_idx < 64; ++coeff_idx) {
664 //printf("%2d: cb = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cb[coeff_idx], last_cb_cfl_fac, in_y[coeff_idx], last_cb_cfl_fac * in_y[coeff_idx]);
665 //printf("%2d: cr = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cr[coeff_idx], last_cr_cfl_fac, in_y[coeff_idx], last_cr_cfl_fac * in_y[coeff_idx]);
666 double cb_pred = last_cb_cfl_fac * in_y[coeff_idx];
667 chroma_energy += in_cb[coeff_idx] * in_cb[coeff_idx];
668 chroma_energy_pred += (in_cb[coeff_idx] - cb_pred) * (in_cb[coeff_idx] - cb_pred);
670 //in_cb[coeff_idx] -= lrint(last_cb_cfl_fac * in_y[coeff_idx]);
671 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
672 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
673 //in_cb[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
674 //in_cr[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
675 //in_cb[coeff_idx] = lrint(in_y[coeff_idx]);
676 //in_cr[coeff_idx] = lrint(in_y[coeff_idx]);
680 //in_cb[0] -= in_y[0];
681 //in_cr[0] -= in_y[0];
684 for (unsigned y = 0; y < 8; ++y) {
685 for (unsigned x = 0; x < 8; ++x) {
686 int coeff_idx = y * 8 + x;
687 int k_cb = quantize(in_cb[coeff_idx], coeff_idx);
688 coeff_cb[(yb + y) * (WIDTH/2) + (xb + x)] = k_cb;
689 int k_cr = quantize(in_cr[coeff_idx], coeff_idx);
690 coeff_cr[(yb + y) * (WIDTH/2) + (xb + x)] = k_cr;
692 // Store back for reconstruction / PSNR calculation
693 in_cb[coeff_idx] = unquantize(k_cb, coeff_idx);
694 in_cr[coeff_idx] = unquantize(k_cr, coeff_idx);
698 idct_int32(in_y); // DEBUG
702 for (unsigned y = 0; y < 8; ++y) {
703 for (unsigned x = 0; x < 8; ++x) {
704 pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cb[y * 8 + x]);
705 pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cr[y * 8 + x]);
707 // pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
708 // pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
713 last_cb_cfl_fac = cb_cfl_fac;
714 last_cr_cfl_fac = cr_cfl_fac;
720 printf("chroma_energy = %f, with_pred = %f\n",
721 chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
724 // DC coefficient pred from the right to left (within each slice)
725 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += BLOCKS_PER_STREAM) {
728 for (unsigned subblock_idx = BLOCKS_PER_STREAM; subblock_idx --> 0; ) {
729 unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS;
730 unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS;
731 int k = coeff_y[(yb * 8) * WIDTH + (xb * 8)];
733 coeff_y[(yb * 8) * WIDTH + (xb * 8)] = k - prev_k;
738 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; block_idx += BLOCKS_PER_STREAM) {
742 for (unsigned subblock_idx = BLOCKS_PER_STREAM; subblock_idx --> 0; ) {
743 unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS_CHROMA;
744 unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS_CHROMA;
745 int k_cb = coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)];
746 int k_cr = coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)];
748 coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cb - prev_k_cb;
749 coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cr - prev_k_cr;
756 FILE *fp = fopen("reconstructed.pgm", "wb");
757 fprintf(fp, "P5\n%d %d\n255\n", WIDTH, HEIGHT);
758 fwrite(pix_y, 1, WIDTH * HEIGHT, fp);
761 fp = fopen("reconstructed.pnm", "wb");
762 fprintf(fp, "P6\n%d %d\n255\n", WIDTH, HEIGHT);
763 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
764 for (unsigned xb = 0; xb < WIDTH; ++xb) {
765 int y = pix_y[(yb * WIDTH) + xb];
767 int c0 = yb * (WIDTH/2) + xb/2;
769 cb = pix_cb[c0] - 128.0;
770 cr = pix_cr[c0] - 128.0;
772 int c1 = yb * (WIDTH/2) + std::min<int>(xb/2 + 1, WIDTH/2 - 1);
773 cb = 0.5 * (pix_cb[c0] + pix_cb[c1]) - 128.0;
774 cr = 0.5 * (pix_cr[c0] + pix_cr[c1]) - 128.0;
777 double r = y + 1.5748 * cr;
778 double g = y - 0.1873 * cb - 0.4681 * cr;
779 double b = y + 1.8556 * cb;
781 putc(clamp(lrint(r)), fp);
782 putc(clamp(lrint(g)), fp);
783 putc(clamp(lrint(b)), fp);
788 // For each coefficient, make some tables.
789 size_t extra_bits = 0;
790 for (unsigned i = 0; i < 64; ++i) {
793 for (unsigned y = 0; y < 8; ++y) {
794 for (unsigned x = 0; x < 8; ++x) {
795 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
796 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
799 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
800 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
801 unsigned short k = abs(coeff_y[(yb + y) * WIDTH + (xb + x)]);
802 if (k >= ESCAPE_LIMIT) {
804 extra_bits += 12; // escape this one
806 ++s_luma.freqs[(k - 1) & (NUM_SYMS - 1)];
810 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
811 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
812 unsigned short k_cb = abs(coeff_cb[(yb + y) * WIDTH/2 + (xb + x)]);
813 unsigned short k_cr = abs(coeff_cr[(yb + y) * WIDTH/2 + (xb + x)]);
814 if (k_cb >= ESCAPE_LIMIT) {
816 extra_bits += 12; // escape this one
818 if (k_cr >= ESCAPE_LIMIT) {
820 extra_bits += 12; // escape this one
822 ++s_chroma.freqs[(k_cb - 1) & (NUM_SYMS - 1)];
823 ++s_chroma.freqs[(k_cr - 1) & (NUM_SYMS - 1)];
829 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
831 find_optimal_stream_assignment(0);
833 find_optimal_stream_assignment(64);
837 for (unsigned i = 0; i < 64; ++i) {
838 stats[i].freqs[NUM_SYMS - 1] /= 2; // zero, has no sign bits (yes, this is trickery)
839 stats[i].normalize_freqs(prob_scale);
840 stats[i].cum_freqs[NUM_SYMS] += stats[i].freqs[NUM_SYMS - 1];
841 stats[i].freqs[NUM_SYMS - 1] *= 2;
844 FILE *codedfp = fopen("coded.dat", "wb");
845 if (codedfp == nullptr) {
850 // TODO: rather gamma-k or something
851 for (unsigned i = 0; i < 64; ++i) {
852 if (stats[i].cum_freqs[NUM_SYMS] == 0) {
855 printf("writing table %d\n", i);
856 for (unsigned j = 0; j < NUM_SYMS; ++j) {
857 write_varint(stats[i].freqs[j], codedfp);
861 RansEncoder rans_encoder;
863 size_t tot_bytes = 0;
866 for (unsigned y = 0; y < 8; ++y) {
867 for (unsigned x = 0; x < 8; ++x) {
868 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
869 rans_encoder.init_prob(s_luma);
872 std::vector<int> lens;
874 // need to reverse later
875 rans_encoder.clear();
876 size_t num_bytes = 0;
877 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; ++block_idx) {
878 unsigned yb = block_idx / WIDTH_BLOCKS;
879 unsigned xb = block_idx % WIDTH_BLOCKS;
881 int k = coeff_y[(yb * 8 + y) * WIDTH + (xb * 8 + x)];
882 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
883 rans_encoder.encode_coeff(k);
885 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
886 int l = rans_encoder.save_block(codedfp);
891 tot_bytes += num_bytes;
892 printf("coeff %d Y': %ld bytes\n", y * 8 + x, num_bytes);
898 double avg_l = sum_l / lens.size();
900 double sum_sql = 0.0;
902 sum_sql += (l - avg_l) * (l - avg_l);
904 double stddev_l = sqrt(sum_sql / (lens.size() - 1));
905 printf("coeff %d: avg=%.2f bytes, stddev=%.2f bytes\n", y*8+x, avg_l, stddev_l);
910 for (unsigned y = 0; y < 8; ++y) {
911 for (unsigned x = 0; x < 8; ++x) {
912 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
913 rans_encoder.init_prob(s_chroma);
915 rans_encoder.clear();
916 size_t num_bytes = 0;
917 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; ++block_idx) {
918 unsigned yb = block_idx / WIDTH_BLOCKS_CHROMA;
919 unsigned xb = block_idx % WIDTH_BLOCKS_CHROMA;
921 int k = coeff_cb[(yb * 8 + y) * WIDTH/2 + (xb * 8 + x)];
922 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
923 rans_encoder.encode_coeff(k);
925 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
926 num_bytes += rans_encoder.save_block(codedfp);
929 tot_bytes += num_bytes;
930 printf("coeff %d Cb: %ld bytes\n", y * 8 + x, num_bytes);
935 for (unsigned y = 0; y < 8; ++y) {
936 for (unsigned x = 0; x < 8; ++x) {
937 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
938 rans_encoder.init_prob(s_chroma);
940 rans_encoder.clear();
941 size_t num_bytes = 0;
942 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; ++block_idx) {
943 unsigned yb = block_idx / WIDTH_BLOCKS_CHROMA;
944 unsigned xb = block_idx % WIDTH_BLOCKS_CHROMA;
946 int k = coeff_cr[(yb * 8 + y) * WIDTH/2 + (xb * 8 + x)];
947 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
948 rans_encoder.encode_coeff(k);
950 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
951 num_bytes += rans_encoder.save_block(codedfp);
954 tot_bytes += num_bytes;
955 printf("coeff %d Cr: %ld bytes\n", y * 8 + x, num_bytes);
959 printf("%ld bytes + %ld escape bits (%ld) = %ld total bytes\n",
960 tot_bytes - extra_bits / 8,
966 printf("Max coefficient ranges (as a function of x):\n\n");
967 for (unsigned x = 0; x < 8; ++x) {
968 int range = std::max(max_val_x[x], -min_val_x[x]);
969 printf(" [%4d, %4d] (%.2f bits)\n", min_val_x[x], max_val_x[x], log2(range * 2 + 1));
972 printf("Max coefficient ranges (as a function of y):\n\n");
973 for (unsigned y = 0; y < 8; ++y) {
974 int range = std::max(max_val_y[y], -min_val_y[y]);
975 printf(" [%4d, %4d] (%.2f bits)\n", min_val_y[y], max_val_y[y], log2(range * 2 + 1));