8 //#include "ryg_rans/rans64.h"
9 #include "ryg_rans/rans_byte.h"
10 #include "ryg_rans/renormalize.h"
17 #include <unordered_map>
21 #define WIDTH_BLOCKS (WIDTH/8)
22 #define WIDTH_BLOCKS_CHROMA (WIDTH/16)
23 #define HEIGHT_BLOCKS (HEIGHT/8)
24 #define NUM_BLOCKS (WIDTH_BLOCKS * HEIGHT_BLOCKS)
25 #define NUM_BLOCKS_CHROMA (WIDTH_BLOCKS_CHROMA * HEIGHT_BLOCKS)
28 #define ESCAPE_LIMIT (NUM_SYMS - 1)
29 #define BLOCKS_PER_STREAM 320
31 // If you set this to 1, the program will try to optimize the placement
32 // of coefficients to rANS probability distributions. This is randomized,
33 // so you might want to run it a few times.
34 #define FIND_OPTIMAL_STREAM_ASSIGNMENT 0
35 #define NUM_CLUSTERS 4
37 static constexpr uint32_t prob_bits = 12;
38 static constexpr uint32_t prob_scale = 1 << prob_bits;
42 void fdct_int32(short *const In);
43 void idct_int32(short *const In);
45 unsigned char rgb[WIDTH * HEIGHT * 3];
46 unsigned char pix_y[WIDTH * HEIGHT];
47 unsigned char pix_cb[(WIDTH/2) * HEIGHT];
48 unsigned char pix_cr[(WIDTH/2) * HEIGHT];
49 unsigned char full_pix_cb[WIDTH * HEIGHT];
50 unsigned char full_pix_cr[WIDTH * HEIGHT];
51 short coeff_y[WIDTH * HEIGHT], coeff_cb[(WIDTH/2) * HEIGHT], coeff_cr[(WIDTH/2) * HEIGHT];
56 if (x > 255) return 255;
60 static const unsigned char std_luminance_quant_tbl[64] = {
62 16, 11, 10, 16, 24, 40, 51, 61,
63 12, 12, 14, 19, 26, 58, 60, 55,
64 14, 13, 16, 24, 40, 57, 69, 56,
65 14, 17, 22, 29, 51, 87, 80, 62,
66 18, 22, 37, 56, 68, 109, 103, 77,
67 24, 35, 55, 64, 81, 104, 113, 92,
68 49, 64, 78, 87, 103, 121, 120, 101,
69 72, 92, 95, 98, 112, 100, 103, 99
71 // ff_mpeg1_default_intra_matrix
72 8, 16, 19, 22, 26, 27, 29, 34,
73 16, 16, 22, 24, 27, 29, 34, 37,
74 19, 22, 26, 27, 29, 34, 34, 38,
75 22, 22, 26, 27, 29, 34, 37, 40,
76 22, 26, 27, 29, 32, 35, 40, 48,
77 26, 27, 29, 32, 35, 40, 48, 58,
78 26, 27, 29, 34, 38, 46, 56, 69,
79 27, 29, 35, 38, 46, 56, 69, 83
85 uint32_t freqs[NUM_SYMS];
86 uint32_t cum_freqs[NUM_SYMS + 1];
89 void calc_cum_freqs();
90 void normalize_freqs(uint32_t target_total);
93 void SymbolStats::clear()
95 for (int i=0; i < NUM_SYMS; i++)
99 void SymbolStats::calc_cum_freqs()
102 for (int i=0; i < NUM_SYMS; i++)
103 cum_freqs[i+1] = cum_freqs[i] + freqs[i];
106 void SymbolStats::normalize_freqs(uint32_t target_total)
108 uint64_t real_freq[NUM_SYMS + 1]; // hack
110 assert(target_total >= NUM_SYMS);
113 uint32_t cur_total = cum_freqs[NUM_SYMS];
115 if (cur_total == 0) return;
117 double ideal_cost = 0.0;
118 for (int i = 1; i <= NUM_SYMS; i++)
120 real_freq[i] = cum_freqs[i] - cum_freqs[i - 1];
121 if (real_freq[i] > 0)
122 ideal_cost -= real_freq[i] * log2(real_freq[i] / double(cur_total));
125 OptimalRenormalize(cum_freqs, NUM_SYMS, prob_scale);
127 // calculate updated freqs and make sure we didn't screw anything up
128 assert(cum_freqs[0] == 0 && cum_freqs[NUM_SYMS] == target_total);
129 for (int i=0; i < NUM_SYMS; i++) {
131 assert(cum_freqs[i+1] == cum_freqs[i]);
133 assert(cum_freqs[i+1] > cum_freqs[i]);
136 freqs[i] = cum_freqs[i+1] - cum_freqs[i];
139 double calc_cost = 0.0;
140 for (int i = 1; i <= NUM_SYMS; i++)
142 uint64_t freq = cum_freqs[i] - cum_freqs[i - 1];
143 if (real_freq[i] > 0)
144 calc_cost -= real_freq[i] * log2(freq / double(target_total));
147 static double total_loss = 0.0;
148 total_loss += calc_cost - ideal_cost;
149 static double total_loss_with_dp = 0.0;
150 double optimal_cost = 0.0;
151 //total_loss_with_dp += optimal_cost - ideal_cost;
152 printf("ideal cost = %.0f bits, DP cost = %.0f bits, calc cost = %.0f bits (loss = %.2f bytes, total loss = %.2f bytes, total loss with DP = %.2f bytes)\n",
153 ideal_cost, optimal_cost,
154 calc_cost, (calc_cost - ideal_cost) / 8.0, total_loss / 8.0, total_loss_with_dp / 8.0);
157 SymbolStats stats[128];
159 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
160 // Distance from one stream to the other, based on a hacked-up K-L divergence.
161 float kl_dist[64][64];
164 const int luma_mapping[64] = {
165 0, 0, 1, 1, 2, 2, 3, 3,
166 0, 0, 1, 2, 2, 2, 3, 3,
167 1, 1, 2, 2, 2, 3, 3, 3,
168 1, 1, 2, 2, 2, 3, 3, 3,
169 1, 2, 2, 2, 2, 3, 3, 3,
170 2, 2, 2, 2, 3, 3, 3, 3,
171 2, 2, 3, 3, 3, 3, 3, 3,
172 3, 3, 3, 3, 3, 3, 3, 3,
174 const int chroma_mapping[64] = {
175 0, 1, 1, 2, 2, 2, 3, 3,
176 1, 1, 2, 2, 2, 3, 3, 3,
177 2, 2, 2, 2, 3, 3, 3, 3,
178 2, 2, 2, 3, 3, 3, 3, 3,
179 2, 3, 3, 3, 3, 3, 3, 3,
180 3, 3, 3, 3, 3, 3, 3, 3,
181 3, 3, 3, 3, 3, 3, 3, 3,
182 3, 3, 3, 3, 3, 3, 3, 3,
185 int pick_stats_for(int x, int y, bool is_chroma)
187 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
188 return y * 8 + x + is_chroma * 64;
191 return chroma_mapping[y * 8 + x] + 4;
193 return luma_mapping[y * 8 + x];
199 void write_varint(int x, FILE *fp)
202 putc((x & 0x7f) | 0x80, fp);
212 out_buf.reset(new uint8_t[out_max_size]);
216 void init_prob(SymbolStats &s)
218 for (int i = 0; i < NUM_SYMS; i++) {
219 //printf("%d: cumfreqs=%d freqs=%d prob_bits=%d\n", i, s.cum_freqs[i], s.freqs[i], prob_bits + 1);
220 RansEncSymbolInit(&esyms[i], s.cum_freqs[i], s.freqs[i], prob_bits + 1);
222 sign_bias = s.cum_freqs[NUM_SYMS];
227 out_end = out_buf.get() + out_max_size;
228 ptr = out_end; // *end* of output buffer
232 uint32_t save_block(FILE *codedfp) // Returns number of bytes.
234 RansEncFlush(&rans, &ptr);
235 //printf("post-flush = %08x\n", rans);
237 uint32_t num_rans_bytes = out_end - ptr;
239 if (num_rans_bytes == 4) {
241 memcpy(&block, ptr, 4);
243 if (block == last_block) {
244 write_varint(0, codedfp);
255 write_varint(num_rans_bytes, codedfp);
256 //fwrite(&num_rans_bytes, 1, 4, codedfp);
257 fwrite(ptr, 1, num_rans_bytes, codedfp);
259 //printf("first rANS bytes: %02x %02x %02x %02x %02x %02x %02x %02x\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7]);
264 //printf("Saving block: %d rANS bytes\n", num_rans_bytes);
265 return num_rans_bytes;
266 //return num_rans_bytes;
269 void encode_coeff(short signed_k)
271 //printf("encoding coeff %d (sym %d), rans before encoding = %08x\n", signed_k, ((abs(signed_k) - 1) & 255), rans);
272 unsigned short k = abs(signed_k);
273 if (k >= ESCAPE_LIMIT) {
274 // Put the coefficient as a 1/(2^12) symbol _before_
275 // the 255 coefficient, since the decoder will read the
276 // 255 coefficient first.
277 RansEncPut(&rans, &ptr, k, 1, prob_bits);
280 RansEncPutSymbol(&rans, &ptr, &esyms[(k - 1) & (NUM_SYMS - 1)]);
287 static constexpr size_t out_max_size = 32 << 20; // 32 MB.
288 static constexpr size_t max_num_sign = 1048576; // Way too big. And actually bytes.
290 unique_ptr<uint8_t[]> out_buf;
294 RansEncSymbol esyms[NUM_SYMS];
297 uint32_t last_block = 0; // Not a valid 4-byte rANS block (?)
300 static constexpr int dc_scalefac = 8; // Matches the FDCT's gain.
301 static constexpr double quant_scalefac = 4.0; // whatever?
303 static inline int quantize(int f, int coeff_idx)
305 if (coeff_idx == 0) {
306 return f / dc_scalefac;
312 const int w = std_luminance_quant_tbl[coeff_idx];
313 const int s = quant_scalefac;
314 int sign_f = (f > 0) ? 1 : -1;
315 return (32 * f + sign_f * w * s) / (2 * w * s);
318 static inline int unquantize(int qf, int coeff_idx)
320 if (coeff_idx == 0) {
321 return qf * dc_scalefac;
327 const int w = std_luminance_quant_tbl[coeff_idx];
328 const int s = quant_scalefac;
329 return (2 * qf * w * s) / 32;
332 void readpix(unsigned char *ptr, const char *filename)
334 FILE *fp = fopen(filename, "rb");
340 fseek(fp, 0, SEEK_END);
341 long len = ftell(fp);
342 assert(len >= WIDTH * HEIGHT * 3);
343 fseek(fp, len - WIDTH * HEIGHT * 3, SEEK_SET);
345 fread(ptr, 1, WIDTH * HEIGHT * 3, fp);
351 double coeff[3] = { 0.2126, 0.7152, 0.0722 }; // sum = 1.0
352 double cb_fac = 1.0 / (coeff[0] + coeff[1] + 1.0f - coeff[2]); // 0.539
353 double cr_fac = 1.0 / (1.0f - coeff[0] + coeff[1] + coeff[2]); // 0.635
355 unique_ptr<float[]> temp_cb(new float[WIDTH * HEIGHT]);
356 unique_ptr<float[]> temp_cr(new float[WIDTH * HEIGHT]);
357 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
358 for (unsigned xb = 0; xb < WIDTH; ++xb) {
359 int r = rgb[((yb * WIDTH) + xb) * 3 + 0];
360 int g = rgb[((yb * WIDTH) + xb) * 3 + 1];
361 int b = rgb[((yb * WIDTH) + xb) * 3 + 2];
362 double y = std::min(std::max(coeff[0] * r + coeff[1] * g + coeff[2] * b, 0.0), 255.0);
363 double cb = (b - y) * cb_fac + 128.0;
364 double cr = (r - y) * cr_fac + 128.0;
365 pix_y[(yb * WIDTH) + xb] = lrint(y);
366 temp_cb[(yb * WIDTH) + xb] = cb;
367 temp_cr[(yb * WIDTH) + xb] = cr;
368 full_pix_cb[(yb * WIDTH) + xb] = lrint(std::min(std::max(cb, 0.0), 255.0));
369 full_pix_cr[(yb * WIDTH) + xb] = lrint(std::min(std::max(cr, 0.0), 255.0));
373 // Simple 4:2:2 subsampling with left convention.
374 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
375 for (unsigned xb = 0; xb < WIDTH / 2; ++xb) {
376 int c0 = yb * WIDTH + std::max(int(xb) * 2 - 1, 0);
377 int c1 = yb * WIDTH + xb * 2;
378 int c2 = yb * WIDTH + xb * 2 + 1;
380 double cb = 0.25 * temp_cb[c0] + 0.5 * temp_cb[c1] + 0.25 * temp_cb[c2];
381 double cr = 0.25 * temp_cr[c0] + 0.5 * temp_cr[c1] + 0.25 * temp_cr[c2];
382 cb = std::min(std::max(cb, 0.0), 255.0);
383 cr = std::min(std::max(cr, 0.0), 255.0);
384 pix_cb[(yb * WIDTH/2) + xb] = lrint(cb);
385 pix_cr[(yb * WIDTH/2) + xb] = lrint(cr);
390 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
391 double find_best_assignment(const int *medoids, int *assignment)
393 double current_score = 0.0;
394 for (int i = 0; i < 64; ++i) {
395 int best_medoid = medoids[0];
396 float best_medoid_score = kl_dist[i][medoids[0]];
397 for (int j = 1; j < NUM_CLUSTERS; ++j) {
398 if (kl_dist[i][medoids[j]] < best_medoid_score) {
399 best_medoid = medoids[j];
400 best_medoid_score = kl_dist[i][medoids[j]];
403 assignment[i] = best_medoid;
404 current_score += best_medoid_score;
406 return current_score;
409 void find_optimal_stream_assignment(int base)
412 for (unsigned i = 0; i < 64; ++i) {
414 for (unsigned k = 0; k < NUM_SYMS; ++k) {
415 s += stats[i + base].freqs[k] + 0.5;
417 inv_sum[i] = 1.0 / s;
420 for (unsigned i = 0; i < 64; ++i) {
421 for (unsigned j = 0; j < 64; ++j) {
423 for (unsigned k = 0; k < NUM_SYMS; ++k) {
424 double p1 = (stats[i + base].freqs[k] + 0.5) * inv_sum[i];
425 double p2 = (stats[j + base].freqs[k] + 0.5) * inv_sum[j];
427 // K-L divergence is asymmetric; this is a hack.
428 d += p1 * log(p1 / p2);
429 d += p2 * log(p2 / p1);
432 //printf("%.3f ", d);
438 int medoids[64]; // only the first NUM_CLUSTERS matter
439 bool is_medoid[64] = { false };
440 std::iota(medoids, medoids + 64, 0);
441 std::random_device rd;
442 std::mt19937 g(rd());
443 std::shuffle(medoids, medoids + 64, g);
444 for (int i = 0; i < NUM_CLUSTERS; ++i) {
445 printf("%d ", medoids[i]);
446 is_medoid[medoids[i]] = true;
450 // assign each data point to the closest medoid
452 double current_score = find_best_assignment(medoids, assignment);
454 for (int i = 0; i < 1000; ++i) {
455 printf("iter %d\n", i);
456 bool any_changed = false;
457 for (int m = 0; m < NUM_CLUSTERS; ++m) {
458 for (int o = 0; o < 64; ++o) {
459 if (is_medoid[o]) continue;
460 int old_medoid = medoids[m];
463 int new_assignment[64];
464 double candidate_score = find_best_assignment(medoids, new_assignment);
466 if (candidate_score < current_score) {
467 current_score = candidate_score;
468 memcpy(assignment, new_assignment, sizeof(assignment));
470 is_medoid[old_medoid] = false;
471 is_medoid[medoids[m]] = true;
472 printf("%f: ", current_score);
473 for (int i = 0; i < 64; ++i) {
474 printf("%d ", assignment[i]);
479 medoids[m] = old_medoid;
483 if (!any_changed) break;
486 std::unordered_map<int, int> rmap;
487 for (int i = 0; i < 64; ++i) {
488 if (i % 8 == 0) printf("\n");
489 if (!rmap.count(assignment[i])) {
490 rmap.emplace(assignment[i], rmap.size());
492 printf("%d, ", rmap[assignment[i]]);
498 int main(int argc, char **argv)
501 readpix(rgb, argv[1]);
503 readpix(rgb, "color.pnm");
506 double sum_sq_err = 0.0;
507 //double last_cb_cfl_fac = 0.0;
508 //double last_cr_cfl_fac = 0.0;
510 int max_val_x[8] = {0}, min_val_x[8] = {0};
511 int max_val_y[8] = {0}, min_val_y[8] = {0};
513 // DCT and quantize luma
514 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
515 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
518 for (unsigned y = 0; y < 8; ++y) {
519 for (unsigned x = 0; x < 8; ++x) {
520 in_y[y * 8 + x] = pix_y[(yb + y) * WIDTH + (xb + x)];
527 for (unsigned y = 0; y < 8; ++y) {
528 for (unsigned x = 0; x < 8; ++x) {
529 int coeff_idx = y * 8 + x;
530 int k = quantize(in_y[coeff_idx], coeff_idx);
531 coeff_y[(yb + y) * WIDTH + (xb + x)] = k;
533 max_val_x[x] = std::max(max_val_x[x], k);
534 min_val_x[x] = std::min(min_val_x[x], k);
535 max_val_y[y] = std::max(max_val_y[y], k);
536 min_val_y[y] = std::min(min_val_y[y], k);
538 // Store back for reconstruction / PSNR calculation
539 in_y[coeff_idx] = unquantize(k, coeff_idx);
545 for (unsigned y = 0; y < 8; ++y) {
546 for (unsigned x = 0; x < 8; ++x) {
547 int k = clamp(in_y[y * 8 + x]);
548 uint8_t *ptr = &pix_y[(yb + y) * WIDTH + (xb + x)];
549 sum_sq_err += (*ptr - k) * (*ptr - k);
555 double mse = sum_sq_err / double(WIDTH * HEIGHT);
556 double psnr_db = 20 * log10(255.0 / sqrt(mse));
557 printf("psnr = %.2f dB\n", psnr_db);
559 //double chroma_energy = 0.0, chroma_energy_pred = 0.0;
561 // DCT and quantize chroma
562 //double last_cb_cfl_fac = 0.0, last_cr_cfl_fac = 0.0;
563 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
564 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
566 // TF switch: Two 8x8 luma blocks -> one 16x8 block, then drop high frequencies
567 printf("in blocks:\n");
568 for (unsigned y = 0; y < 8; ++y) {
569 for (unsigned x = 0; x < 8; ++x) {
570 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
574 for (unsigned x = 0; x < 8; ++x) {
575 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
582 for (unsigned y = 0; y < 8; ++y) {
583 for (unsigned x = 0; x < 4; ++x) {
584 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
585 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
588 in_y[y * 8 + x * 2 + 0] = a;
589 in_y[y * 8 + x * 2 + 1] = b;
593 printf("tf-ed block:\n");
594 for (unsigned y = 0; y < 8; ++y) {
595 for (unsigned x = 0; x < 8; ++x) {
596 short a = in_y[y * 8 + x];
602 // Read Y block with no tf switch (from reconstructed luma)
604 for (unsigned y = 0; y < 8; ++y) {
605 for (unsigned x = 0; x < 8; ++x) {
606 in_y[y * 8 + x] = pix_y[(yb + y) * (WIDTH) + (xb + x) * 2];
613 short in_cb[64], in_cr[64];
614 for (unsigned y = 0; y < 8; ++y) {
615 for (unsigned x = 0; x < 8; ++x) {
616 in_cb[y * 8 + x] = pix_cb[(yb + y) * (WIDTH/2) + (xb + x)];
617 in_cr[y * 8 + x] = pix_cr[(yb + y) * (WIDTH/2) + (xb + x)];
630 double denom = (x0 * x0 + x1 * x1 + x2 * x2);
631 //double denom = (x1 * x1);
633 double y0 = in_cb[1];
634 double y1 = in_cb[8];
635 double y2 = in_cb[9];
636 double cb_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
637 //double cb_cfl_fac = (x1 * y1) / denom;
639 for (unsigned y = 0; y < 8; ++y) {
640 for (unsigned x = 0; x < 8; ++x) {
641 short a = in_y[y * 8 + x];
645 for (unsigned x = 0; x < 8; ++x) {
646 short a = in_cb[y * 8 + x];
651 printf("(%d,%d,%d) -> (%d,%d,%d) gives %f\n",
652 in_y[1], in_y[8], in_y[9],
653 in_cb[1], in_cb[8], in_cb[9],
659 double cr_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
660 //double cr_cfl_fac = (x1 * y1) / denom;
661 printf("cb CfL = %7.3f dc = %5d cr CfL = %7.3f dc = %d\n",
662 cb_cfl_fac, in_cb[0] - in_y[0],
663 cr_cfl_fac, in_cr[0] - in_y[0]);
665 if (denom == 0.0) { cb_cfl_fac = cr_cfl_fac = 0.0; }
668 //last_cb_cfl_fac = cb_cfl_fac;
669 //last_cr_cfl_fac = cr_cfl_fac;
671 for (unsigned coeff_idx = 1; coeff_idx < 64; ++coeff_idx) {
672 //printf("%2d: cb = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cb[coeff_idx], last_cb_cfl_fac, in_y[coeff_idx], last_cb_cfl_fac * in_y[coeff_idx]);
673 //printf("%2d: cr = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cr[coeff_idx], last_cr_cfl_fac, in_y[coeff_idx], last_cr_cfl_fac * in_y[coeff_idx]);
674 double cb_pred = last_cb_cfl_fac * in_y[coeff_idx];
675 chroma_energy += in_cb[coeff_idx] * in_cb[coeff_idx];
676 chroma_energy_pred += (in_cb[coeff_idx] - cb_pred) * (in_cb[coeff_idx] - cb_pred);
678 //in_cb[coeff_idx] -= lrint(last_cb_cfl_fac * in_y[coeff_idx]);
679 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
680 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
681 //in_cb[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
682 //in_cr[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
683 //in_cb[coeff_idx] = lrint(in_y[coeff_idx]);
684 //in_cr[coeff_idx] = lrint(in_y[coeff_idx]);
688 //in_cb[0] -= in_y[0];
689 //in_cr[0] -= in_y[0];
692 for (unsigned y = 0; y < 8; ++y) {
693 for (unsigned x = 0; x < 8; ++x) {
694 int coeff_idx = y * 8 + x;
695 int k_cb = quantize(in_cb[coeff_idx], coeff_idx);
696 coeff_cb[(yb + y) * (WIDTH/2) + (xb + x)] = k_cb;
697 int k_cr = quantize(in_cr[coeff_idx], coeff_idx);
698 coeff_cr[(yb + y) * (WIDTH/2) + (xb + x)] = k_cr;
700 // Store back for reconstruction / PSNR calculation
701 in_cb[coeff_idx] = unquantize(k_cb, coeff_idx);
702 in_cr[coeff_idx] = unquantize(k_cr, coeff_idx);
706 idct_int32(in_y); // DEBUG
710 for (unsigned y = 0; y < 8; ++y) {
711 for (unsigned x = 0; x < 8; ++x) {
712 pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cb[y * 8 + x]);
713 pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cr[y * 8 + x]);
715 // pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
716 // pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
721 last_cb_cfl_fac = cb_cfl_fac;
722 last_cr_cfl_fac = cr_cfl_fac;
728 printf("chroma_energy = %f, with_pred = %f\n",
729 chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
732 // DC coefficient pred from the right to left (within each slice)
733 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; block_idx += BLOCKS_PER_STREAM) {
736 for (unsigned subblock_idx = BLOCKS_PER_STREAM; subblock_idx --> 0; ) {
737 unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS;
738 unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS;
739 int k = coeff_y[(yb * 8) * WIDTH + (xb * 8)];
741 coeff_y[(yb * 8) * WIDTH + (xb * 8)] = k - prev_k;
746 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; block_idx += BLOCKS_PER_STREAM) {
750 for (unsigned subblock_idx = BLOCKS_PER_STREAM; subblock_idx --> 0; ) {
751 unsigned yb = (block_idx + subblock_idx) / WIDTH_BLOCKS_CHROMA;
752 unsigned xb = (block_idx + subblock_idx) % WIDTH_BLOCKS_CHROMA;
753 int k_cb = coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)];
754 int k_cr = coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)];
756 coeff_cb[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cb - prev_k_cb;
757 coeff_cr[(yb * 8) * WIDTH/2 + (xb * 8)] = k_cr - prev_k_cr;
764 FILE *fp = fopen("reconstructed.pgm", "wb");
765 fprintf(fp, "P5\n%d %d\n255\n", WIDTH, HEIGHT);
766 fwrite(pix_y, 1, WIDTH * HEIGHT, fp);
769 fp = fopen("reconstructed.pnm", "wb");
770 fprintf(fp, "P6\n%d %d\n255\n", WIDTH, HEIGHT);
771 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
772 for (unsigned xb = 0; xb < WIDTH; ++xb) {
773 int y = pix_y[(yb * WIDTH) + xb];
775 int c0 = yb * (WIDTH/2) + xb/2;
777 cb = pix_cb[c0] - 128.0;
778 cr = pix_cr[c0] - 128.0;
780 int c1 = yb * (WIDTH/2) + std::min<int>(xb/2 + 1, WIDTH/2 - 1);
781 cb = 0.5 * (pix_cb[c0] + pix_cb[c1]) - 128.0;
782 cr = 0.5 * (pix_cr[c0] + pix_cr[c1]) - 128.0;
785 double r = y + 1.5748 * cr;
786 double g = y - 0.1873 * cb - 0.4681 * cr;
787 double b = y + 1.8556 * cb;
789 putc(clamp(lrint(r)), fp);
790 putc(clamp(lrint(g)), fp);
791 putc(clamp(lrint(b)), fp);
796 // For each coefficient, make some tables.
797 size_t extra_bits = 0;
798 for (unsigned i = 0; i < 64; ++i) {
801 for (unsigned y = 0; y < 8; ++y) {
802 for (unsigned x = 0; x < 8; ++x) {
803 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
804 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
807 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
808 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
809 unsigned short k = abs(coeff_y[(yb + y) * WIDTH + (xb + x)]);
810 if (k >= ESCAPE_LIMIT) {
812 extra_bits += 12; // escape this one
814 ++s_luma.freqs[(k - 1) & (NUM_SYMS - 1)];
818 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
819 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
820 unsigned short k_cb = abs(coeff_cb[(yb + y) * WIDTH/2 + (xb + x)]);
821 unsigned short k_cr = abs(coeff_cr[(yb + y) * WIDTH/2 + (xb + x)]);
822 if (k_cb >= ESCAPE_LIMIT) {
824 extra_bits += 12; // escape this one
826 if (k_cr >= ESCAPE_LIMIT) {
828 extra_bits += 12; // escape this one
830 ++s_chroma.freqs[(k_cb - 1) & (NUM_SYMS - 1)];
831 ++s_chroma.freqs[(k_cr - 1) & (NUM_SYMS - 1)];
837 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
839 find_optimal_stream_assignment(0);
841 find_optimal_stream_assignment(64);
845 for (unsigned i = 0; i < 64; ++i) {
846 stats[i].freqs[NUM_SYMS - 1] /= 2; // zero, has no sign bits (yes, this is trickery)
847 stats[i].normalize_freqs(prob_scale);
848 stats[i].cum_freqs[NUM_SYMS] += stats[i].freqs[NUM_SYMS - 1];
849 stats[i].freqs[NUM_SYMS - 1] *= 2;
852 FILE *codedfp = fopen("coded.dat", "wb");
853 if (codedfp == nullptr) {
858 // TODO: rather gamma-k or something
859 for (unsigned i = 0; i < 64; ++i) {
860 if (stats[i].cum_freqs[NUM_SYMS] == 0) {
863 printf("writing table %d\n", i);
864 for (unsigned j = 0; j < NUM_SYMS; ++j) {
865 write_varint(stats[i].freqs[j], codedfp);
869 RansEncoder rans_encoder;
871 size_t tot_bytes = 0;
874 for (unsigned y = 0; y < 8; ++y) {
875 for (unsigned x = 0; x < 8; ++x) {
876 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
877 rans_encoder.init_prob(s_luma);
880 std::vector<int> lens;
882 // need to reverse later
883 rans_encoder.clear();
884 size_t num_bytes = 0;
885 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; ++block_idx) {
886 unsigned yb = block_idx / WIDTH_BLOCKS;
887 unsigned xb = block_idx % WIDTH_BLOCKS;
889 int k = coeff_y[(yb * 8 + y) * WIDTH + (xb * 8 + x)];
890 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
891 rans_encoder.encode_coeff(k);
893 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
894 int l = rans_encoder.save_block(codedfp);
899 tot_bytes += num_bytes;
900 printf("coeff %d Y': %ld bytes\n", y * 8 + x, num_bytes);
906 double avg_l = sum_l / lens.size();
908 double sum_sql = 0.0;
910 sum_sql += (l - avg_l) * (l - avg_l);
912 double stddev_l = sqrt(sum_sql / (lens.size() - 1));
913 printf("coeff %d: avg=%.2f bytes, stddev=%.2f bytes\n", y*8+x, avg_l, stddev_l);
918 for (unsigned y = 0; y < 8; ++y) {
919 for (unsigned x = 0; x < 8; ++x) {
920 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
921 rans_encoder.init_prob(s_chroma);
923 rans_encoder.clear();
924 size_t num_bytes = 0;
925 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; ++block_idx) {
926 unsigned yb = block_idx / WIDTH_BLOCKS_CHROMA;
927 unsigned xb = block_idx % WIDTH_BLOCKS_CHROMA;
929 int k = coeff_cb[(yb * 8 + y) * WIDTH/2 + (xb * 8 + x)];
930 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
931 rans_encoder.encode_coeff(k);
933 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
934 num_bytes += rans_encoder.save_block(codedfp);
937 tot_bytes += num_bytes;
938 printf("coeff %d Cb: %ld bytes\n", y * 8 + x, num_bytes);
943 for (unsigned y = 0; y < 8; ++y) {
944 for (unsigned x = 0; x < 8; ++x) {
945 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
946 rans_encoder.init_prob(s_chroma);
948 rans_encoder.clear();
949 size_t num_bytes = 0;
950 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; ++block_idx) {
951 unsigned yb = block_idx / WIDTH_BLOCKS_CHROMA;
952 unsigned xb = block_idx % WIDTH_BLOCKS_CHROMA;
954 int k = coeff_cr[(yb * 8 + y) * WIDTH/2 + (xb * 8 + x)];
955 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
956 rans_encoder.encode_coeff(k);
958 if (block_idx % BLOCKS_PER_STREAM == (BLOCKS_PER_STREAM - 1) || block_idx == NUM_BLOCKS - 1) {
959 num_bytes += rans_encoder.save_block(codedfp);
962 tot_bytes += num_bytes;
963 printf("coeff %d Cr: %ld bytes\n", y * 8 + x, num_bytes);
967 printf("%ld bytes + %ld escape bits (%ld) = %ld total bytes\n",
968 tot_bytes - extra_bits / 8,
974 printf("Max coefficient ranges (as a function of x):\n\n");
975 for (unsigned x = 0; x < 8; ++x) {
976 int range = std::max(max_val_x[x], -min_val_x[x]);
977 printf(" [%4d, %4d] (%.2f bits)\n", min_val_x[x], max_val_x[x], log2(range * 2 + 1));
980 printf("Max coefficient ranges (as a function of y):\n\n");
981 for (unsigned y = 0; y < 8; ++y) {
982 int range = std::max(max_val_y[y], -min_val_y[y]);
983 printf(" [%4d, %4d] (%.2f bits)\n", min_val_y[y], max_val_y[y], log2(range * 2 + 1));