8 //#include "ryg_rans/rans64.h"
9 #include "ryg_rans/rans_byte.h"
10 #include "ryg_rans/renormalize.h"
17 #include <unordered_map>
21 #define WIDTH_BLOCKS (WIDTH/8)
22 #define WIDTH_BLOCKS_CHROMA (WIDTH/16)
23 #define HEIGHT_BLOCKS (HEIGHT/8)
24 #define NUM_BLOCKS (WIDTH_BLOCKS * HEIGHT_BLOCKS)
25 #define NUM_BLOCKS_CHROMA (WIDTH_BLOCKS_CHROMA * HEIGHT_BLOCKS)
28 #define ESCAPE_LIMIT (NUM_SYMS - 1)
29 #define BLOCKS_PER_STREAM 320
31 // If you set this to 1, the program will try to optimize the placement
32 // of coefficients to rANS probability distributions. This is randomized,
33 // so you might want to run it a few times.
34 #define FIND_OPTIMAL_STREAM_ASSIGNMENT 0
35 #define NUM_CLUSTERS 4
37 static constexpr uint32_t prob_bits = 12;
38 static constexpr uint32_t prob_scale = 1 << prob_bits;
42 void fdct_int32(short *const In);
43 void idct_int32(short *const In);
45 unsigned char rgb[WIDTH * HEIGHT * 3];
46 unsigned char pix_y[WIDTH * HEIGHT];
47 unsigned char pix_cb[(WIDTH/2) * HEIGHT];
48 unsigned char pix_cr[(WIDTH/2) * HEIGHT];
49 unsigned char full_pix_cb[WIDTH * HEIGHT];
50 unsigned char full_pix_cr[WIDTH * HEIGHT];
51 short coeff_y[WIDTH * HEIGHT], coeff_cb[(WIDTH/2) * HEIGHT], coeff_cr[(WIDTH/2) * HEIGHT];
56 if (x > 255) return 255;
60 static const unsigned char std_luminance_quant_tbl[64] = {
62 16, 11, 10, 16, 24, 40, 51, 61,
63 12, 12, 14, 19, 26, 58, 60, 55,
64 14, 13, 16, 24, 40, 57, 69, 56,
65 14, 17, 22, 29, 51, 87, 80, 62,
66 18, 22, 37, 56, 68, 109, 103, 77,
67 24, 35, 55, 64, 81, 104, 113, 92,
68 49, 64, 78, 87, 103, 121, 120, 101,
69 72, 92, 95, 98, 112, 100, 103, 99
71 // ff_mpeg1_default_intra_matrix
72 8, 16, 19, 22, 26, 27, 29, 34,
73 16, 16, 22, 24, 27, 29, 34, 37,
74 19, 22, 26, 27, 29, 34, 34, 38,
75 22, 22, 26, 27, 29, 34, 37, 40,
76 22, 26, 27, 29, 32, 35, 40, 48,
77 26, 27, 29, 32, 35, 40, 48, 58,
78 26, 27, 29, 34, 38, 46, 56, 69,
79 27, 29, 35, 38, 46, 56, 69, 83
85 uint32_t freqs[NUM_SYMS];
86 uint32_t cum_freqs[NUM_SYMS + 1];
89 void calc_cum_freqs();
90 void normalize_freqs(uint32_t target_total);
93 void SymbolStats::clear()
95 for (int i=0; i < NUM_SYMS; i++)
99 void SymbolStats::calc_cum_freqs()
102 for (int i=0; i < NUM_SYMS; i++)
103 cum_freqs[i+1] = cum_freqs[i] + freqs[i];
106 void SymbolStats::normalize_freqs(uint32_t target_total)
108 uint64_t real_freq[NUM_SYMS + 1]; // hack
110 assert(target_total >= NUM_SYMS);
113 uint32_t cur_total = cum_freqs[NUM_SYMS];
115 if (cur_total == 0) return;
117 double ideal_cost = 0.0;
118 for (int i = 1; i <= NUM_SYMS; i++)
120 real_freq[i] = cum_freqs[i] - cum_freqs[i - 1];
121 if (real_freq[i] > 0)
122 ideal_cost -= real_freq[i] * log2(real_freq[i] / double(cur_total));
125 OptimalRenormalize(cum_freqs, NUM_SYMS, prob_scale);
127 // calculate updated freqs and make sure we didn't screw anything up
128 assert(cum_freqs[0] == 0 && cum_freqs[NUM_SYMS] == target_total);
129 for (int i=0; i < NUM_SYMS; i++) {
131 assert(cum_freqs[i+1] == cum_freqs[i]);
133 assert(cum_freqs[i+1] > cum_freqs[i]);
136 freqs[i] = cum_freqs[i+1] - cum_freqs[i];
139 double calc_cost = 0.0;
140 for (int i = 1; i <= NUM_SYMS; i++)
142 uint64_t freq = cum_freqs[i] - cum_freqs[i - 1];
143 if (real_freq[i] > 0)
144 calc_cost -= real_freq[i] * log2(freq / double(target_total));
147 static double total_loss = 0.0;
148 total_loss += calc_cost - ideal_cost;
149 static double total_loss_with_dp = 0.0;
150 double optimal_cost = 0.0;
151 //total_loss_with_dp += optimal_cost - ideal_cost;
152 printf("ideal cost = %.0f bits, DP cost = %.0f bits, calc cost = %.0f bits (loss = %.2f bytes, total loss = %.2f bytes, total loss with DP = %.2f bytes)\n",
153 ideal_cost, optimal_cost,
154 calc_cost, (calc_cost - ideal_cost) / 8.0, total_loss / 8.0, total_loss_with_dp / 8.0);
157 SymbolStats stats[128];
159 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
160 // Distance from one stream to the other, based on a hacked-up K-L divergence.
161 float kl_dist[64][64];
164 const int luma_mapping[64] = {
165 0, 0, 1, 1, 2, 2, 3, 3,
166 0, 0, 1, 2, 2, 2, 3, 3,
167 1, 1, 2, 2, 2, 3, 3, 3,
168 1, 1, 2, 2, 2, 3, 3, 3,
169 1, 2, 2, 2, 2, 3, 3, 3,
170 2, 2, 2, 2, 3, 3, 3, 3,
171 2, 2, 3, 3, 3, 3, 3, 3,
172 3, 3, 3, 3, 3, 3, 3, 3,
174 const int chroma_mapping[64] = {
175 0, 1, 1, 2, 2, 2, 3, 3,
176 1, 1, 2, 2, 2, 3, 3, 3,
177 2, 2, 2, 2, 3, 3, 3, 3,
178 2, 2, 2, 3, 3, 3, 3, 3,
179 2, 3, 3, 3, 3, 3, 3, 3,
180 3, 3, 3, 3, 3, 3, 3, 3,
181 3, 3, 3, 3, 3, 3, 3, 3,
182 3, 3, 3, 3, 3, 3, 3, 3,
185 int pick_stats_for(int x, int y, bool is_chroma)
187 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
188 return y * 8 + x + is_chroma * 64;
191 return chroma_mapping[y * 8 + x] + 4;
193 return luma_mapping[y * 8 + x];
199 void write_varint(int x, FILE *fp)
202 putc((x & 0x7f) | 0x80, fp);
212 out_buf.reset(new uint8_t[out_max_size]);
216 void init_prob(SymbolStats &s)
218 for (int i = 0; i < NUM_SYMS; i++) {
219 printf("%d: cumfreqs=%d freqs=%d prob_bits=%d\n", i, s.cum_freqs[i], s.freqs[i], prob_bits + 1);
220 RansEncSymbolInit(&esyms[i], s.cum_freqs[i], s.freqs[i], prob_bits + 1);
222 sign_bias = s.cum_freqs[NUM_SYMS];
227 out_end = out_buf.get() + out_max_size;
228 ptr = out_end; // *end* of output buffer
232 uint32_t save_block(FILE *codedfp) // Returns number of bytes.
234 RansEncFlush(&rans, &ptr);
235 //printf("post-flush = %08x\n", rans);
237 uint32_t num_rans_bytes = out_end - ptr;
239 if (num_rans_bytes == 4) {
241 memcpy(&block, ptr, 4);
243 if (block == last_block) {
244 write_varint(0, codedfp);
255 write_varint(num_rans_bytes, codedfp);
256 //fwrite(&num_rans_bytes, 1, 4, codedfp);
257 fwrite(ptr, 1, num_rans_bytes, codedfp);
259 //printf("first rANS bytes: %02x %02x %02x %02x %02x %02x %02x %02x\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7]);
264 printf("Saving block: %d rANS bytes\n", num_rans_bytes);
265 return num_rans_bytes;
266 //return num_rans_bytes;
269 void encode_coeff(short signed_k)
271 //printf("encoding coeff %d (sym %d), rans before encoding = %08x\n", signed_k, ((abs(signed_k) - 1) & 255), rans);
272 unsigned short k = abs(signed_k);
273 if (k >= ESCAPE_LIMIT) {
274 // Put the coefficient as a 1/(2^12) symbol _before_
275 // the 255 coefficient, since the decoder will read the
276 // 255 coefficient first.
277 RansEncPut(&rans, &ptr, k, 1, prob_bits);
280 RansEncPutSymbol(&rans, &ptr, &esyms[(k - 1) & (NUM_SYMS - 1)]);
287 static constexpr size_t out_max_size = 32 << 20; // 32 MB.
288 static constexpr size_t max_num_sign = 1048576; // Way too big. And actually bytes.
290 unique_ptr<uint8_t[]> out_buf;
294 RansEncSymbol esyms[NUM_SYMS];
297 uint32_t last_block = 0; // Not a valid 4-byte rANS block (?)
300 static constexpr int dc_scalefac = 8; // Matches the FDCT's gain.
301 static constexpr double quant_scalefac = 4.0; // whatever?
303 static inline int quantize(int f, int coeff_idx)
305 if (coeff_idx == 0) {
306 return f / dc_scalefac;
312 const int w = std_luminance_quant_tbl[coeff_idx];
313 const int s = quant_scalefac;
314 int sign_f = (f > 0) ? 1 : -1;
315 return (32 * f + sign_f * w * s) / (2 * w * s);
318 static inline int unquantize(int qf, int coeff_idx)
320 if (coeff_idx == 0) {
321 return qf * dc_scalefac;
327 const int w = std_luminance_quant_tbl[coeff_idx];
328 const int s = quant_scalefac;
329 return (2 * qf * w * s) / 32;
332 void readpix(unsigned char *ptr, const char *filename)
334 FILE *fp = fopen(filename, "rb");
340 fseek(fp, 0, SEEK_END);
341 long len = ftell(fp);
342 assert(len >= WIDTH * HEIGHT * 3);
343 fseek(fp, len - WIDTH * HEIGHT * 3, SEEK_SET);
345 fread(ptr, 1, WIDTH * HEIGHT * 3, fp);
351 double coeff[3] = { 0.2126, 0.7152, 0.0722 }; // sum = 1.0
352 double cb_fac = 1.0 / (coeff[0] + coeff[1] + 1.0f - coeff[2]); // 0.539
353 double cr_fac = 1.0 / (1.0f - coeff[0] + coeff[1] + coeff[2]); // 0.635
355 unique_ptr<float[]> temp_cb(new float[WIDTH * HEIGHT]);
356 unique_ptr<float[]> temp_cr(new float[WIDTH * HEIGHT]);
357 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
358 for (unsigned xb = 0; xb < WIDTH; ++xb) {
359 int r = rgb[((yb * WIDTH) + xb) * 3 + 0];
360 int g = rgb[((yb * WIDTH) + xb) * 3 + 1];
361 int b = rgb[((yb * WIDTH) + xb) * 3 + 2];
362 double y = std::min(std::max(coeff[0] * r + coeff[1] * g + coeff[2] * b, 0.0), 255.0);
363 double cb = (b - y) * cb_fac + 128.0;
364 double cr = (r - y) * cr_fac + 128.0;
365 pix_y[(yb * WIDTH) + xb] = lrint(y);
366 temp_cb[(yb * WIDTH) + xb] = cb;
367 temp_cr[(yb * WIDTH) + xb] = cr;
368 full_pix_cb[(yb * WIDTH) + xb] = lrint(std::min(std::max(cb, 0.0), 255.0));
369 full_pix_cr[(yb * WIDTH) + xb] = lrint(std::min(std::max(cr, 0.0), 255.0));
373 // Simple 4:2:2 subsampling with left convention.
374 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
375 for (unsigned xb = 0; xb < WIDTH / 2; ++xb) {
376 int c0 = yb * WIDTH + std::max(int(xb) * 2 - 1, 0);
377 int c1 = yb * WIDTH + xb * 2;
378 int c2 = yb * WIDTH + xb * 2 + 1;
380 double cb = 0.25 * temp_cb[c0] + 0.5 * temp_cb[c1] + 0.25 * temp_cb[c2];
381 double cr = 0.25 * temp_cr[c0] + 0.5 * temp_cr[c1] + 0.25 * temp_cr[c2];
382 cb = std::min(std::max(cb, 0.0), 255.0);
383 cr = std::min(std::max(cr, 0.0), 255.0);
384 pix_cb[(yb * WIDTH/2) + xb] = lrint(cb);
385 pix_cr[(yb * WIDTH/2) + xb] = lrint(cr);
390 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
391 double find_best_assignment(const int *medoids, int *assignment)
393 double current_score = 0.0;
394 for (int i = 0; i < 64; ++i) {
395 int best_medoid = medoids[0];
396 float best_medoid_score = kl_dist[i][medoids[0]];
397 for (int j = 1; j < NUM_CLUSTERS; ++j) {
398 if (kl_dist[i][medoids[j]] < best_medoid_score) {
399 best_medoid = medoids[j];
400 best_medoid_score = kl_dist[i][medoids[j]];
403 assignment[i] = best_medoid;
404 current_score += best_medoid_score;
406 return current_score;
409 void find_optimal_stream_assignment(int base)
412 for (unsigned i = 0; i < 64; ++i) {
414 for (unsigned k = 0; k < NUM_SYMS; ++k) {
415 s += stats[i + base].freqs[k] + 0.5;
417 inv_sum[i] = 1.0 / s;
420 for (unsigned i = 0; i < 64; ++i) {
421 for (unsigned j = 0; j < 64; ++j) {
423 for (unsigned k = 0; k < NUM_SYMS; ++k) {
424 double p1 = (stats[i + base].freqs[k] + 0.5) * inv_sum[i];
425 double p2 = (stats[j + base].freqs[k] + 0.5) * inv_sum[j];
427 // K-L divergence is asymmetric; this is a hack.
428 d += p1 * log(p1 / p2);
429 d += p2 * log(p2 / p1);
432 //printf("%.3f ", d);
438 int medoids[64]; // only the first NUM_CLUSTERS matter
439 bool is_medoid[64] = { false };
440 std::iota(medoids, medoids + 64, 0);
441 std::random_device rd;
442 std::mt19937 g(rd());
443 std::shuffle(medoids, medoids + 64, g);
444 for (int i = 0; i < NUM_CLUSTERS; ++i) {
445 printf("%d ", medoids[i]);
446 is_medoid[medoids[i]] = true;
450 // assign each data point to the closest medoid
452 double current_score = find_best_assignment(medoids, assignment);
454 for (int i = 0; i < 1000; ++i) {
455 printf("iter %d\n", i);
456 bool any_changed = false;
457 for (int m = 0; m < NUM_CLUSTERS; ++m) {
458 for (int o = 0; o < 64; ++o) {
459 if (is_medoid[o]) continue;
460 int old_medoid = medoids[m];
463 int new_assignment[64];
464 double candidate_score = find_best_assignment(medoids, new_assignment);
466 if (candidate_score < current_score) {
467 current_score = candidate_score;
468 memcpy(assignment, new_assignment, sizeof(assignment));
470 is_medoid[old_medoid] = false;
471 is_medoid[medoids[m]] = true;
472 printf("%f: ", current_score);
473 for (int i = 0; i < 64; ++i) {
474 printf("%d ", assignment[i]);
479 medoids[m] = old_medoid;
483 if (!any_changed) break;
486 std::unordered_map<int, int> rmap;
487 for (int i = 0; i < 64; ++i) {
488 if (i % 8 == 0) printf("\n");
489 if (!rmap.count(assignment[i])) {
490 rmap.emplace(assignment[i], rmap.size());
492 printf("%d, ", rmap[assignment[i]]);
498 int main(int argc, char **argv)
501 readpix(rgb, argv[1]);
503 readpix(rgb, "color.pnm");
506 double sum_sq_err = 0.0;
507 //double last_cb_cfl_fac = 0.0;
508 //double last_cr_cfl_fac = 0.0;
510 // DCT and quantize luma
511 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
512 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
515 for (unsigned y = 0; y < 8; ++y) {
516 for (unsigned x = 0; x < 8; ++x) {
517 in_y[y * 8 + x] = pix_y[(yb + y) * WIDTH + (xb + x)];
524 for (unsigned y = 0; y < 8; ++y) {
525 for (unsigned x = 0; x < 8; ++x) {
526 int coeff_idx = y * 8 + x;
527 int k = quantize(in_y[coeff_idx], coeff_idx);
528 coeff_y[(yb + y) * WIDTH + (xb + x)] = k;
530 // Store back for reconstruction / PSNR calculation
531 in_y[coeff_idx] = unquantize(k, coeff_idx);
537 for (unsigned y = 0; y < 8; ++y) {
538 for (unsigned x = 0; x < 8; ++x) {
539 int k = clamp(in_y[y * 8 + x]);
540 uint8_t *ptr = &pix_y[(yb + y) * WIDTH + (xb + x)];
541 sum_sq_err += (*ptr - k) * (*ptr - k);
547 double mse = sum_sq_err / double(WIDTH * HEIGHT);
548 double psnr_db = 20 * log10(255.0 / sqrt(mse));
549 printf("psnr = %.2f dB\n", psnr_db);
551 //double chroma_energy = 0.0, chroma_energy_pred = 0.0;
553 // DCT and quantize chroma
554 //double last_cb_cfl_fac = 0.0, last_cr_cfl_fac = 0.0;
555 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
556 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
558 // TF switch: Two 8x8 luma blocks -> one 16x8 block, then drop high frequencies
559 printf("in blocks:\n");
560 for (unsigned y = 0; y < 8; ++y) {
561 for (unsigned x = 0; x < 8; ++x) {
562 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
566 for (unsigned x = 0; x < 8; ++x) {
567 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
574 for (unsigned y = 0; y < 8; ++y) {
575 for (unsigned x = 0; x < 4; ++x) {
576 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
577 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
580 in_y[y * 8 + x * 2 + 0] = a;
581 in_y[y * 8 + x * 2 + 1] = b;
585 printf("tf-ed block:\n");
586 for (unsigned y = 0; y < 8; ++y) {
587 for (unsigned x = 0; x < 8; ++x) {
588 short a = in_y[y * 8 + x];
594 // Read Y block with no tf switch (from reconstructed luma)
596 for (unsigned y = 0; y < 8; ++y) {
597 for (unsigned x = 0; x < 8; ++x) {
598 in_y[y * 8 + x] = pix_y[(yb + y) * (WIDTH) + (xb + x) * 2];
605 short in_cb[64], in_cr[64];
606 for (unsigned y = 0; y < 8; ++y) {
607 for (unsigned x = 0; x < 8; ++x) {
608 in_cb[y * 8 + x] = pix_cb[(yb + y) * (WIDTH/2) + (xb + x)];
609 in_cr[y * 8 + x] = pix_cr[(yb + y) * (WIDTH/2) + (xb + x)];
622 double denom = (x0 * x0 + x1 * x1 + x2 * x2);
623 //double denom = (x1 * x1);
625 double y0 = in_cb[1];
626 double y1 = in_cb[8];
627 double y2 = in_cb[9];
628 double cb_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
629 //double cb_cfl_fac = (x1 * y1) / denom;
631 for (unsigned y = 0; y < 8; ++y) {
632 for (unsigned x = 0; x < 8; ++x) {
633 short a = in_y[y * 8 + x];
637 for (unsigned x = 0; x < 8; ++x) {
638 short a = in_cb[y * 8 + x];
643 printf("(%d,%d,%d) -> (%d,%d,%d) gives %f\n",
644 in_y[1], in_y[8], in_y[9],
645 in_cb[1], in_cb[8], in_cb[9],
651 double cr_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
652 //double cr_cfl_fac = (x1 * y1) / denom;
653 printf("cb CfL = %7.3f dc = %5d cr CfL = %7.3f dc = %d\n",
654 cb_cfl_fac, in_cb[0] - in_y[0],
655 cr_cfl_fac, in_cr[0] - in_y[0]);
657 if (denom == 0.0) { cb_cfl_fac = cr_cfl_fac = 0.0; }
660 //last_cb_cfl_fac = cb_cfl_fac;
661 //last_cr_cfl_fac = cr_cfl_fac;
663 for (unsigned coeff_idx = 1; coeff_idx < 64; ++coeff_idx) {
664 //printf("%2d: cb = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cb[coeff_idx], last_cb_cfl_fac, in_y[coeff_idx], last_cb_cfl_fac * in_y[coeff_idx]);
665 //printf("%2d: cr = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cr[coeff_idx], last_cr_cfl_fac, in_y[coeff_idx], last_cr_cfl_fac * in_y[coeff_idx]);
666 double cb_pred = last_cb_cfl_fac * in_y[coeff_idx];
667 chroma_energy += in_cb[coeff_idx] * in_cb[coeff_idx];
668 chroma_energy_pred += (in_cb[coeff_idx] - cb_pred) * (in_cb[coeff_idx] - cb_pred);
670 //in_cb[coeff_idx] -= lrint(last_cb_cfl_fac * in_y[coeff_idx]);
671 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
672 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
673 //in_cb[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
674 //in_cr[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
675 //in_cb[coeff_idx] = lrint(in_y[coeff_idx]);
676 //in_cr[coeff_idx] = lrint(in_y[coeff_idx]);
680 //in_cb[0] -= in_y[0];
681 //in_cr[0] -= in_y[0];
684 for (unsigned y = 0; y < 8; ++y) {
685 for (unsigned x = 0; x < 8; ++x) {
686 int coeff_idx = y * 8 + x;
687 int k_cb = quantize(in_cb[coeff_idx], coeff_idx);
688 coeff_cb[(yb + y) * (WIDTH/2) + (xb + x)] = k_cb;
689 int k_cr = quantize(in_cr[coeff_idx], coeff_idx);
690 coeff_cr[(yb + y) * (WIDTH/2) + (xb + x)] = k_cr;
692 // Store back for reconstruction / PSNR calculation
693 in_cb[coeff_idx] = unquantize(k_cb, coeff_idx);
694 in_cr[coeff_idx] = unquantize(k_cr, coeff_idx);
698 idct_int32(in_y); // DEBUG
702 for (unsigned y = 0; y < 8; ++y) {
703 for (unsigned x = 0; x < 8; ++x) {
704 pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cb[y * 8 + x]);
705 pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cr[y * 8 + x]);
707 // pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
708 // pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
713 last_cb_cfl_fac = cb_cfl_fac;
714 last_cr_cfl_fac = cr_cfl_fac;
720 printf("chroma_energy = %f, with_pred = %f\n",
721 chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
724 // DC coefficient pred from the right to left (within each slice)
725 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += BLOCKS_PER_STREAM) {
728 for (unsigned subblock_idx = BLOCKS_PER_STREAM; subblock_idx --> 0; ) {
729 unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS;
730 unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS;
731 int k = coeff_y[(yb * 8) * WIDTH + (xb * 8)];
733 coeff_y[(yb * 8) * WIDTH + (xb * 8)] = k - prev_k;
738 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; block_idx += BLOCKS_PER_STREAM) {
742 for (unsigned subblock_idx = BLOCKS_PER_STREAM; subblock_idx --> 0; ) {
743 unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS_CHROMA;
744 unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS_CHROMA;
745 int k_cb = coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)];
746 int k_cr = coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)];
748 coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cb - prev_k_cb;
749 coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cr - prev_k_cr;
756 FILE *fp = fopen("reconstructed.pgm", "wb");
757 fprintf(fp, "P5\n%d %d\n255\n", WIDTH, HEIGHT);
758 fwrite(pix_y, 1, WIDTH * HEIGHT, fp);
761 fp = fopen("reconstructed.pnm", "wb");
762 fprintf(fp, "P6\n%d %d\n255\n", WIDTH, HEIGHT);
763 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
764 for (unsigned xb = 0; xb < WIDTH; ++xb) {
765 int y = pix_y[(yb * WIDTH) + xb];
767 int c0 = yb * (WIDTH/2) + xb/2;
769 cb = pix_cb[c0] - 128.0;
770 cr = pix_cr[c0] - 128.0;
772 int c1 = yb * (WIDTH/2) + std::min<int>(xb/2 + 1, WIDTH/2 - 1);
773 cb = 0.5 * (pix_cb[c0] + pix_cb[c1]) - 128.0;
774 cr = 0.5 * (pix_cr[c0] + pix_cr[c1]) - 128.0;
777 double r = y + 1.5748 * cr;
778 double g = y - 0.1873 * cb - 0.4681 * cr;
779 double b = y + 1.8556 * cb;
781 putc(clamp(lrint(r)), fp);
782 putc(clamp(lrint(g)), fp);
783 putc(clamp(lrint(b)), fp);
788 // For each coefficient, make some tables.
789 size_t extra_bits = 0;
790 for (unsigned i = 0; i < 64; ++i) {
793 for (unsigned y = 0; y < 8; ++y) {
794 for (unsigned x = 0; x < 8; ++x) {
795 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
796 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
799 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
800 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
801 unsigned short k = abs(coeff_y[(yb + y) * WIDTH + (xb + x)]);
802 if (k >= ESCAPE_LIMIT) {
804 extra_bits += 12; // escape this one
806 ++s_luma.freqs[(k - 1) & (NUM_SYMS - 1)];
810 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
811 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
812 unsigned short k_cb = abs(coeff_cb[(yb + y) * WIDTH/2 + (xb + x)]);
813 unsigned short k_cr = abs(coeff_cr[(yb + y) * WIDTH/2 + (xb + x)]);
814 if (k_cb >= ESCAPE_LIMIT) {
816 extra_bits += 12; // escape this one
818 if (k_cr >= ESCAPE_LIMIT) {
820 extra_bits += 12; // escape this one
822 ++s_chroma.freqs[(k_cb - 1) & (NUM_SYMS - 1)];
823 ++s_chroma.freqs[(k_cr - 1) & (NUM_SYMS - 1)];
829 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
831 find_optimal_stream_assignment(0);
833 find_optimal_stream_assignment(64);
837 for (unsigned i = 0; i < 64; ++i) {
838 stats[i].freqs[NUM_SYMS - 1] /= 2; // zero, has no sign bits (yes, this is trickery)
839 stats[i].normalize_freqs(prob_scale);
840 stats[i].cum_freqs[NUM_SYMS] += stats[i].freqs[NUM_SYMS - 1];
841 stats[i].freqs[NUM_SYMS - 1] *= 2;
844 FILE *codedfp = fopen("coded.dat", "wb");
845 if (codedfp == nullptr) {
850 // TODO: rather gamma-k or something
851 for (unsigned i = 0; i < 64; ++i) {
852 if (stats[i].cum_freqs[NUM_SYMS] == 0) {
855 printf("writing table %d\n", i);
856 for (unsigned j = 0; j < NUM_SYMS; ++j) {
857 write_varint(stats[i].freqs[j], codedfp);
861 RansEncoder rans_encoder;
863 size_t tot_bytes = 0;
866 for (unsigned y = 0; y < 8; ++y) {
867 for (unsigned x = 0; x < 8; ++x) {
868 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
869 rans_encoder.init_prob(s_luma);
872 std::vector<int> lens;
874 // need to reverse later
875 rans_encoder.clear();
876 size_t num_bytes = 0;
877 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; ++block_idx) {
878 unsigned yb = block_idx / WIDTH_BLOCKS;
879 unsigned xb = block_idx % WIDTH_BLOCKS;
881 int k = coeff_y[(yb * 8 + y) * WIDTH + (xb * 8 + x)];
882 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
883 rans_encoder.encode_coeff(k);
885 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
886 int l = rans_encoder.save_block(codedfp);
891 tot_bytes += num_bytes;
892 printf("coeff %d Y': %ld bytes\n", y * 8 + x, num_bytes);
898 double avg_l = sum_l / lens.size();
900 double sum_sql = 0.0;
902 sum_sql += (l - avg_l) * (l - avg_l);
904 double stddev_l = sqrt(sum_sql / (lens.size() - 1));
905 printf("coeff %d: avg=%.2f bytes, stddev=%.2f bytes\n", y*8+x, avg_l, stddev_l);
910 for (unsigned y = 0; y < 8; ++y) {
911 for (unsigned x = 0; x < 8; ++x) {
912 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
913 rans_encoder.init_prob(s_chroma);
915 rans_encoder.clear();
916 size_t num_bytes = 0;
917 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; ++block_idx) {
918 unsigned yb = block_idx / WIDTH_BLOCKS_CHROMA;
919 unsigned xb = block_idx % WIDTH_BLOCKS_CHROMA;
921 int k = coeff_cb[(yb * 8 + y) * WIDTH/2 + (xb * 8 + x)];
922 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
923 rans_encoder.encode_coeff(k);
925 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
926 num_bytes += rans_encoder.save_block(codedfp);
929 tot_bytes += num_bytes;
930 printf("coeff %d Cb: %ld bytes\n", y * 8 + x, num_bytes);
935 for (unsigned y = 0; y < 8; ++y) {
936 for (unsigned x = 0; x < 8; ++x) {
937 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
938 rans_encoder.init_prob(s_chroma);
940 rans_encoder.clear();
941 size_t num_bytes = 0;
942 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; ++block_idx) {
943 unsigned yb = block_idx / WIDTH_BLOCKS_CHROMA;
944 unsigned xb = block_idx % WIDTH_BLOCKS_CHROMA;
946 int k = coeff_cr[(yb * 8 + y) * WIDTH/2 + (xb * 8 + x)];
947 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
948 rans_encoder.encode_coeff(k);
950 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
951 num_bytes += rans_encoder.save_block(codedfp);
954 tot_bytes += num_bytes;
955 printf("coeff %d Cr: %ld bytes\n", y * 8 + x, num_bytes);
959 printf("%ld bytes + %ld escape bits (%ld) = %ld total bytes\n",
960 tot_bytes - extra_bits / 8,