8 //#include "ryg_rans/rans64.h"
9 #include "ryg_rans/rans_byte.h"
10 #include "ryg_rans/renormalize.h"
17 #include <unordered_map>
22 #define ESCAPE_LIMIT (NUM_SYMS - 1)
24 // If you set this to 1, the program will try to optimize the placement
25 // of coefficients to rANS probability distributions. This is randomized,
26 // so you might want to run it a few times.
27 #define FIND_OPTIMAL_STREAM_ASSIGNMENT 0
28 #define NUM_CLUSTERS 4
30 static constexpr uint32_t prob_bits = 12;
31 static constexpr uint32_t prob_scale = 1 << prob_bits;
35 void fdct_int32(short *const In);
36 void idct_int32(short *const In);
38 unsigned char rgb[WIDTH * HEIGHT * 3];
39 unsigned char pix_y[WIDTH * HEIGHT];
40 unsigned char pix_cb[(WIDTH/2) * HEIGHT];
41 unsigned char pix_cr[(WIDTH/2) * HEIGHT];
42 unsigned char full_pix_cb[WIDTH * HEIGHT];
43 unsigned char full_pix_cr[WIDTH * HEIGHT];
44 short coeff_y[WIDTH * HEIGHT], coeff_cb[(WIDTH/2) * HEIGHT], coeff_cr[(WIDTH/2) * HEIGHT];
49 if (x > 255) return 255;
53 static const unsigned char std_luminance_quant_tbl[64] = {
55 16, 11, 10, 16, 24, 40, 51, 61,
56 12, 12, 14, 19, 26, 58, 60, 55,
57 14, 13, 16, 24, 40, 57, 69, 56,
58 14, 17, 22, 29, 51, 87, 80, 62,
59 18, 22, 37, 56, 68, 109, 103, 77,
60 24, 35, 55, 64, 81, 104, 113, 92,
61 49, 64, 78, 87, 103, 121, 120, 101,
62 72, 92, 95, 98, 112, 100, 103, 99
64 // ff_mpeg1_default_intra_matrix
65 8, 16, 19, 22, 26, 27, 29, 34,
66 16, 16, 22, 24, 27, 29, 34, 37,
67 19, 22, 26, 27, 29, 34, 34, 38,
68 22, 22, 26, 27, 29, 34, 37, 40,
69 22, 26, 27, 29, 32, 35, 40, 48,
70 26, 27, 29, 32, 35, 40, 48, 58,
71 26, 27, 29, 34, 38, 46, 56, 69,
72 27, 29, 35, 38, 46, 56, 69, 83
78 uint32_t freqs[NUM_SYMS];
79 uint32_t cum_freqs[NUM_SYMS + 1];
82 void calc_cum_freqs();
83 void normalize_freqs(uint32_t target_total);
86 void SymbolStats::clear()
88 for (int i=0; i < NUM_SYMS; i++)
92 void SymbolStats::calc_cum_freqs()
95 for (int i=0; i < NUM_SYMS; i++)
96 cum_freqs[i+1] = cum_freqs[i] + freqs[i];
99 void SymbolStats::normalize_freqs(uint32_t target_total)
101 uint64_t real_freq[NUM_SYMS + 1]; // hack
103 assert(target_total >= NUM_SYMS);
106 uint32_t cur_total = cum_freqs[NUM_SYMS];
108 if (cur_total == 0) return;
110 double ideal_cost = 0.0;
111 for (int i = 1; i <= NUM_SYMS; i++)
113 real_freq[i] = cum_freqs[i] - cum_freqs[i - 1];
114 if (real_freq[i] > 0)
115 ideal_cost -= real_freq[i] * log2(real_freq[i] / double(cur_total));
118 OptimalRenormalize(cum_freqs, NUM_SYMS, prob_scale);
120 // calculate updated freqs and make sure we didn't screw anything up
121 assert(cum_freqs[0] == 0 && cum_freqs[NUM_SYMS] == target_total);
122 for (int i=0; i < NUM_SYMS; i++) {
124 assert(cum_freqs[i+1] == cum_freqs[i]);
126 assert(cum_freqs[i+1] > cum_freqs[i]);
129 freqs[i] = cum_freqs[i+1] - cum_freqs[i];
132 double calc_cost = 0.0;
133 for (int i = 1; i <= NUM_SYMS; i++)
135 uint64_t freq = cum_freqs[i] - cum_freqs[i - 1];
136 if (real_freq[i] > 0)
137 calc_cost -= real_freq[i] * log2(freq / double(target_total));
140 static double total_loss = 0.0;
141 total_loss += calc_cost - ideal_cost;
142 static double total_loss_with_dp = 0.0;
143 double optimal_cost = 0.0;
144 //total_loss_with_dp += optimal_cost - ideal_cost;
145 printf("ideal cost = %.0f bits, DP cost = %.0f bits, calc cost = %.0f bits (loss = %.2f bytes, total loss = %.2f bytes, total loss with DP = %.2f bytes)\n",
146 ideal_cost, optimal_cost,
147 calc_cost, (calc_cost - ideal_cost) / 8.0, total_loss / 8.0, total_loss_with_dp / 8.0);
150 SymbolStats stats[128];
152 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
153 // Distance from one stream to the other, based on a hacked-up K-L divergence.
154 float kl_dist[64][64];
157 const int luma_mapping[64] = {
158 0, 0, 1, 1, 2, 2, 3, 3,
159 0, 0, 1, 2, 2, 2, 3, 3,
160 1, 1, 2, 2, 2, 3, 3, 3,
161 1, 1, 2, 2, 2, 3, 3, 3,
162 1, 2, 2, 2, 2, 3, 3, 3,
163 2, 2, 2, 2, 3, 3, 3, 3,
164 2, 2, 3, 3, 3, 3, 3, 3,
165 3, 3, 3, 3, 3, 3, 3, 3,
167 const int chroma_mapping[64] = {
168 0, 1, 1, 2, 2, 2, 3, 3,
169 1, 1, 2, 2, 2, 3, 3, 3,
170 2, 2, 2, 2, 3, 3, 3, 3,
171 2, 2, 2, 3, 3, 3, 3, 3,
172 2, 3, 3, 3, 3, 3, 3, 3,
173 3, 3, 3, 3, 3, 3, 3, 3,
174 3, 3, 3, 3, 3, 3, 3, 3,
175 3, 3, 3, 3, 3, 3, 3, 3,
178 int pick_stats_for(int x, int y, bool is_chroma)
180 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
181 return y * 8 + x + is_chroma * 64;
184 return chroma_mapping[y * 8 + x] + 4;
186 return luma_mapping[y * 8 + x];
192 void write_varint(int x, FILE *fp)
195 putc((x & 0x7f) | 0x80, fp);
205 out_buf.reset(new uint8_t[out_max_size]);
209 void init_prob(SymbolStats &s)
211 for (int i = 0; i < NUM_SYMS; i++) {
212 printf("%d: cumfreqs=%d freqs=%d prob_bits=%d\n", i, s.cum_freqs[i], s.freqs[i], prob_bits + 1);
213 RansEncSymbolInit(&esyms[i], s.cum_freqs[i], s.freqs[i], prob_bits + 1);
215 sign_bias = s.cum_freqs[256];
220 out_end = out_buf.get() + out_max_size;
221 ptr = out_end; // *end* of output buffer
225 uint32_t save_block(FILE *codedfp) // Returns number of bytes.
227 RansEncFlush(&rans, &ptr);
228 //printf("post-flush = %08x\n", rans);
230 uint32_t num_rans_bytes = out_end - ptr;
232 if (num_rans_bytes == 4) {
234 memcpy(&block, ptr, 4);
236 if (block == last_block) {
237 write_varint(0, codedfp);
248 write_varint(num_rans_bytes, codedfp);
249 //fwrite(&num_rans_bytes, 1, 4, codedfp);
250 fwrite(ptr, 1, num_rans_bytes, codedfp);
252 //printf("first rANS bytes: %02x %02x %02x %02x %02x %02x %02x %02x\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7]);
257 printf("Saving block: %d rANS bytes\n", num_rans_bytes);
258 return num_rans_bytes;
259 //return num_rans_bytes;
262 void encode_coeff(short signed_k)
264 //printf("encoding coeff %d (sym %d), rans before encoding = %08x\n", signed_k, ((abs(signed_k) - 1) & 255), rans);
265 unsigned short k = abs(signed_k);
266 if (k >= ESCAPE_LIMIT) {
267 // Put the coefficient as a 1/(2^12) symbol _before_
268 // the 255 coefficient, since the decoder will read the
269 // 255 coefficient first.
270 RansEncPut(&rans, &ptr, k, 1, prob_bits);
273 RansEncPutSymbol(&rans, &ptr, &esyms[(k - 1) & 255]);
280 static constexpr size_t out_max_size = 32 << 20; // 32 MB.
281 static constexpr size_t max_num_sign = 1048576; // Way too big. And actually bytes.
283 unique_ptr<uint8_t[]> out_buf;
287 RansEncSymbol esyms[NUM_SYMS];
290 uint32_t last_block = 0; // Not a valid 4-byte rANS block (?)
293 static constexpr int dc_scalefac = 8; // Matches the FDCT's gain.
294 static constexpr double quant_scalefac = 4.0; // whatever?
296 static inline int quantize(int f, int coeff_idx)
298 if (coeff_idx == 0) {
299 return f / dc_scalefac;
305 const int w = std_luminance_quant_tbl[coeff_idx];
306 const int s = quant_scalefac;
307 int sign_f = (f > 0) ? 1 : -1;
308 return (32 * f + sign_f * w * s) / (2 * w * s);
311 static inline int unquantize(int qf, int coeff_idx)
313 if (coeff_idx == 0) {
314 return qf * dc_scalefac;
320 const int w = std_luminance_quant_tbl[coeff_idx];
321 const int s = quant_scalefac;
322 return (2 * qf * w * s) / 32;
325 void readpix(unsigned char *ptr, const char *filename)
327 FILE *fp = fopen(filename, "rb");
333 fseek(fp, 0, SEEK_END);
334 long len = ftell(fp);
335 assert(len >= WIDTH * HEIGHT * 3);
336 fseek(fp, len - WIDTH * HEIGHT * 3, SEEK_SET);
338 fread(ptr, 1, WIDTH * HEIGHT * 3, fp);
344 double coeff[3] = { 0.2126, 0.7152, 0.0722 }; // sum = 1.0
345 double cb_fac = 1.0 / (coeff[0] + coeff[1] + 1.0f - coeff[2]); // 0.539
346 double cr_fac = 1.0 / (1.0f - coeff[0] + coeff[1] + coeff[2]); // 0.635
348 unique_ptr<float[]> temp_cb(new float[WIDTH * HEIGHT]);
349 unique_ptr<float[]> temp_cr(new float[WIDTH * HEIGHT]);
350 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
351 for (unsigned xb = 0; xb < WIDTH; ++xb) {
352 int r = rgb[((yb * WIDTH) + xb) * 3 + 0];
353 int g = rgb[((yb * WIDTH) + xb) * 3 + 1];
354 int b = rgb[((yb * WIDTH) + xb) * 3 + 2];
355 double y = std::min(std::max(coeff[0] * r + coeff[1] * g + coeff[2] * b, 0.0), 255.0);
356 double cb = (b - y) * cb_fac + 128.0;
357 double cr = (r - y) * cr_fac + 128.0;
358 pix_y[(yb * WIDTH) + xb] = lrint(y);
359 temp_cb[(yb * WIDTH) + xb] = cb;
360 temp_cr[(yb * WIDTH) + xb] = cr;
361 full_pix_cb[(yb * WIDTH) + xb] = lrint(std::min(std::max(cb, 0.0), 255.0));
362 full_pix_cr[(yb * WIDTH) + xb] = lrint(std::min(std::max(cr, 0.0), 255.0));
366 // Simple 4:2:2 subsampling with left convention.
367 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
368 for (unsigned xb = 0; xb < WIDTH / 2; ++xb) {
369 int c0 = yb * WIDTH + std::max(int(xb) * 2 - 1, 0);
370 int c1 = yb * WIDTH + xb * 2;
371 int c2 = yb * WIDTH + xb * 2 + 1;
373 double cb = 0.25 * temp_cb[c0] + 0.5 * temp_cb[c1] + 0.25 * temp_cb[c2];
374 double cr = 0.25 * temp_cr[c0] + 0.5 * temp_cr[c1] + 0.25 * temp_cr[c2];
375 cb = std::min(std::max(cb, 0.0), 255.0);
376 cr = std::min(std::max(cr, 0.0), 255.0);
377 pix_cb[(yb * WIDTH/2) + xb] = lrint(cb);
378 pix_cr[(yb * WIDTH/2) + xb] = lrint(cr);
383 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
384 double find_best_assignment(const int *medoids, int *assignment)
386 double current_score = 0.0;
387 for (int i = 0; i < 64; ++i) {
388 int best_medoid = medoids[0];
389 float best_medoid_score = kl_dist[i][medoids[0]];
390 for (int j = 1; j < NUM_CLUSTERS; ++j) {
391 if (kl_dist[i][medoids[j]] < best_medoid_score) {
392 best_medoid = medoids[j];
393 best_medoid_score = kl_dist[i][medoids[j]];
396 assignment[i] = best_medoid;
397 current_score += best_medoid_score;
399 return current_score;
402 void find_optimal_stream_assignment(int base)
405 for (unsigned i = 0; i < 64; ++i) {
407 for (unsigned k = 0; k < 256; ++k) {
408 s += stats[i + base].freqs[k] + 0.5;
410 inv_sum[i] = 1.0 / s;
413 for (unsigned i = 0; i < 64; ++i) {
414 for (unsigned j = 0; j < 64; ++j) {
416 for (unsigned k = 0; k < 256; ++k) {
417 double p1 = (stats[i + base].freqs[k] + 0.5) * inv_sum[i];
418 double p2 = (stats[j + base].freqs[k] + 0.5) * inv_sum[j];
420 // K-L divergence is asymmetric; this is a hack.
421 d += p1 * log(p1 / p2);
422 d += p2 * log(p2 / p1);
425 //printf("%.3f ", d);
431 int medoids[64]; // only the first NUM_CLUSTERS matter
432 bool is_medoid[64] = { false };
433 std::iota(medoids, medoids + 64, 0);
434 std::random_device rd;
435 std::mt19937 g(rd());
436 std::shuffle(medoids, medoids + 64, g);
437 for (int i = 0; i < NUM_CLUSTERS; ++i) {
438 printf("%d ", medoids[i]);
439 is_medoid[medoids[i]] = true;
443 // assign each data point to the closest medoid
445 double current_score = find_best_assignment(medoids, assignment);
447 for (int i = 0; i < 1000; ++i) {
448 printf("iter %d\n", i);
449 bool any_changed = false;
450 for (int m = 0; m < NUM_CLUSTERS; ++m) {
451 for (int o = 0; o < 64; ++o) {
452 if (is_medoid[o]) continue;
453 int old_medoid = medoids[m];
456 int new_assignment[64];
457 double candidate_score = find_best_assignment(medoids, new_assignment);
459 if (candidate_score < current_score) {
460 current_score = candidate_score;
461 memcpy(assignment, new_assignment, sizeof(assignment));
463 is_medoid[old_medoid] = false;
464 is_medoid[medoids[m]] = true;
465 printf("%f: ", current_score);
466 for (int i = 0; i < 64; ++i) {
467 printf("%d ", assignment[i]);
472 medoids[m] = old_medoid;
476 if (!any_changed) break;
479 std::unordered_map<int, int> rmap;
480 for (int i = 0; i < 64; ++i) {
481 if (i % 8 == 0) printf("\n");
482 if (!rmap.count(assignment[i])) {
483 rmap.emplace(assignment[i], rmap.size());
485 printf("%d, ", rmap[assignment[i]]);
491 int main(int argc, char **argv)
494 readpix(rgb, argv[1]);
496 readpix(rgb, "color.pnm");
499 double sum_sq_err = 0.0;
500 //double last_cb_cfl_fac = 0.0;
501 //double last_cr_cfl_fac = 0.0;
503 // DCT and quantize luma
504 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
505 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
508 for (unsigned y = 0; y < 8; ++y) {
509 for (unsigned x = 0; x < 8; ++x) {
510 in_y[y * 8 + x] = pix_y[(yb + y) * WIDTH + (xb + x)];
517 for (unsigned y = 0; y < 8; ++y) {
518 for (unsigned x = 0; x < 8; ++x) {
519 int coeff_idx = y * 8 + x;
520 int k = quantize(in_y[coeff_idx], coeff_idx);
521 coeff_y[(yb + y) * WIDTH + (xb + x)] = k;
523 // Store back for reconstruction / PSNR calculation
524 in_y[coeff_idx] = unquantize(k, coeff_idx);
530 for (unsigned y = 0; y < 8; ++y) {
531 for (unsigned x = 0; x < 8; ++x) {
532 int k = clamp(in_y[y * 8 + x]);
533 uint8_t *ptr = &pix_y[(yb + y) * WIDTH + (xb + x)];
534 sum_sq_err += (*ptr - k) * (*ptr - k);
540 double mse = sum_sq_err / double(WIDTH * HEIGHT);
541 double psnr_db = 20 * log10(255.0 / sqrt(mse));
542 printf("psnr = %.2f dB\n", psnr_db);
544 //double chroma_energy = 0.0, chroma_energy_pred = 0.0;
546 // DCT and quantize chroma
547 //double last_cb_cfl_fac = 0.0, last_cr_cfl_fac = 0.0;
548 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
549 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
551 // TF switch: Two 8x8 luma blocks -> one 16x8 block, then drop high frequencies
552 printf("in blocks:\n");
553 for (unsigned y = 0; y < 8; ++y) {
554 for (unsigned x = 0; x < 8; ++x) {
555 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
559 for (unsigned x = 0; x < 8; ++x) {
560 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
567 for (unsigned y = 0; y < 8; ++y) {
568 for (unsigned x = 0; x < 4; ++x) {
569 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
570 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
573 in_y[y * 8 + x * 2 + 0] = a;
574 in_y[y * 8 + x * 2 + 1] = b;
578 printf("tf-ed block:\n");
579 for (unsigned y = 0; y < 8; ++y) {
580 for (unsigned x = 0; x < 8; ++x) {
581 short a = in_y[y * 8 + x];
587 // Read Y block with no tf switch (from reconstructed luma)
589 for (unsigned y = 0; y < 8; ++y) {
590 for (unsigned x = 0; x < 8; ++x) {
591 in_y[y * 8 + x] = pix_y[(yb + y) * (WIDTH) + (xb + x) * 2];
598 short in_cb[64], in_cr[64];
599 for (unsigned y = 0; y < 8; ++y) {
600 for (unsigned x = 0; x < 8; ++x) {
601 in_cb[y * 8 + x] = pix_cb[(yb + y) * (WIDTH/2) + (xb + x)];
602 in_cr[y * 8 + x] = pix_cr[(yb + y) * (WIDTH/2) + (xb + x)];
615 double denom = (x0 * x0 + x1 * x1 + x2 * x2);
616 //double denom = (x1 * x1);
618 double y0 = in_cb[1];
619 double y1 = in_cb[8];
620 double y2 = in_cb[9];
621 double cb_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
622 //double cb_cfl_fac = (x1 * y1) / denom;
624 for (unsigned y = 0; y < 8; ++y) {
625 for (unsigned x = 0; x < 8; ++x) {
626 short a = in_y[y * 8 + x];
630 for (unsigned x = 0; x < 8; ++x) {
631 short a = in_cb[y * 8 + x];
636 printf("(%d,%d,%d) -> (%d,%d,%d) gives %f\n",
637 in_y[1], in_y[8], in_y[9],
638 in_cb[1], in_cb[8], in_cb[9],
644 double cr_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
645 //double cr_cfl_fac = (x1 * y1) / denom;
646 printf("cb CfL = %7.3f dc = %5d cr CfL = %7.3f dc = %d\n",
647 cb_cfl_fac, in_cb[0] - in_y[0],
648 cr_cfl_fac, in_cr[0] - in_y[0]);
650 if (denom == 0.0) { cb_cfl_fac = cr_cfl_fac = 0.0; }
653 //last_cb_cfl_fac = cb_cfl_fac;
654 //last_cr_cfl_fac = cr_cfl_fac;
656 for (unsigned coeff_idx = 1; coeff_idx < 64; ++coeff_idx) {
657 //printf("%2d: cb = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cb[coeff_idx], last_cb_cfl_fac, in_y[coeff_idx], last_cb_cfl_fac * in_y[coeff_idx]);
658 //printf("%2d: cr = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cr[coeff_idx], last_cr_cfl_fac, in_y[coeff_idx], last_cr_cfl_fac * in_y[coeff_idx]);
659 double cb_pred = last_cb_cfl_fac * in_y[coeff_idx];
660 chroma_energy += in_cb[coeff_idx] * in_cb[coeff_idx];
661 chroma_energy_pred += (in_cb[coeff_idx] - cb_pred) * (in_cb[coeff_idx] - cb_pred);
663 //in_cb[coeff_idx] -= lrint(last_cb_cfl_fac * in_y[coeff_idx]);
664 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
665 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
666 //in_cb[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
667 //in_cr[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
668 //in_cb[coeff_idx] = lrint(in_y[coeff_idx]);
669 //in_cr[coeff_idx] = lrint(in_y[coeff_idx]);
673 //in_cb[0] -= in_y[0];
674 //in_cr[0] -= in_y[0];
677 for (unsigned y = 0; y < 8; ++y) {
678 for (unsigned x = 0; x < 8; ++x) {
679 int coeff_idx = y * 8 + x;
680 int k_cb = quantize(in_cb[coeff_idx], coeff_idx);
681 coeff_cb[(yb + y) * (WIDTH/2) + (xb + x)] = k_cb;
682 int k_cr = quantize(in_cr[coeff_idx], coeff_idx);
683 coeff_cr[(yb + y) * (WIDTH/2) + (xb + x)] = k_cr;
685 // Store back for reconstruction / PSNR calculation
686 in_cb[coeff_idx] = unquantize(k_cb, coeff_idx);
687 in_cr[coeff_idx] = unquantize(k_cr, coeff_idx);
691 idct_int32(in_y); // DEBUG
695 for (unsigned y = 0; y < 8; ++y) {
696 for (unsigned x = 0; x < 8; ++x) {
697 pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cb[y * 8 + x]);
698 pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cr[y * 8 + x]);
700 // pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
701 // pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
706 last_cb_cfl_fac = cb_cfl_fac;
707 last_cr_cfl_fac = cr_cfl_fac;
713 printf("chroma_energy = %f, with_pred = %f\n",
714 chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
717 // DC coefficient pred from the right to left
718 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
719 for (unsigned xb = 0; xb < WIDTH - 8; xb += 8) {
720 coeff_y[yb * WIDTH + xb] -= coeff_y[yb * WIDTH + (xb + 8)];
724 FILE *fp = fopen("reconstructed.pgm", "wb");
725 fprintf(fp, "P5\n%d %d\n255\n", WIDTH, HEIGHT);
726 fwrite(pix_y, 1, WIDTH * HEIGHT, fp);
729 fp = fopen("reconstructed.pnm", "wb");
730 fprintf(fp, "P6\n%d %d\n255\n", WIDTH, HEIGHT);
731 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
732 for (unsigned xb = 0; xb < WIDTH; ++xb) {
733 int y = pix_y[(yb * WIDTH) + xb];
735 int c0 = yb * (WIDTH/2) + xb/2;
737 cb = pix_cb[c0] - 128.0;
738 cr = pix_cr[c0] - 128.0;
740 int c1 = yb * (WIDTH/2) + std::min<int>(xb/2 + 1, WIDTH/2 - 1);
741 cb = 0.5 * (pix_cb[c0] + pix_cb[c1]) - 128.0;
742 cr = 0.5 * (pix_cr[c0] + pix_cr[c1]) - 128.0;
745 double r = y + 1.5748 * cr;
746 double g = y - 0.1873 * cb - 0.4681 * cr;
747 double b = y + 1.8556 * cb;
749 putc(clamp(lrint(r)), fp);
750 putc(clamp(lrint(g)), fp);
751 putc(clamp(lrint(b)), fp);
756 // For each coefficient, make some tables.
757 size_t extra_bits = 0;
758 for (unsigned i = 0; i < 64; ++i) {
761 for (unsigned y = 0; y < 8; ++y) {
762 for (unsigned x = 0; x < 8; ++x) {
763 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
764 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
767 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
768 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
769 unsigned short k = abs(coeff_y[(yb + y) * WIDTH + (xb + x)]);
770 if (k >= ESCAPE_LIMIT) {
772 extra_bits += 12; // escape this one
774 ++s_luma.freqs[(k - 1) & 255];
778 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
779 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
780 unsigned short k_cb = abs(coeff_cb[(yb + y) * WIDTH/2 + (xb + x)]);
781 unsigned short k_cr = abs(coeff_cr[(yb + y) * WIDTH/2 + (xb + x)]);
782 if (k_cb >= ESCAPE_LIMIT) {
784 extra_bits += 12; // escape this one
786 if (k_cr >= ESCAPE_LIMIT) {
788 extra_bits += 12; // escape this one
790 ++s_chroma.freqs[(k_cb - 1) & 255];
791 ++s_chroma.freqs[(k_cr - 1) & 255];
797 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
799 find_optimal_stream_assignment(0);
801 find_optimal_stream_assignment(64);
805 for (unsigned i = 0; i < 64; ++i) {
806 stats[i].freqs[255] /= 2; // zero, has no sign bits (yes, this is trickery)
807 stats[i].normalize_freqs(prob_scale);
808 stats[i].cum_freqs[256] += stats[i].freqs[255];
809 stats[i].freqs[255] *= 2;
812 FILE *codedfp = fopen("coded.dat", "wb");
813 if (codedfp == nullptr) {
818 // TODO: rather gamma-k or something
819 for (unsigned i = 0; i < 64; ++i) {
820 if (stats[i].cum_freqs[NUM_SYMS] == 0) {
823 printf("writing table %d\n", i);
824 for (unsigned j = 0; j < NUM_SYMS; ++j) {
825 write_varint(stats[i].freqs[j], codedfp);
829 RansEncoder rans_encoder;
831 size_t tot_bytes = 0;
834 for (unsigned y = 0; y < 8; ++y) {
835 for (unsigned x = 0; x < 8; ++x) {
836 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
837 rans_encoder.init_prob(s_luma);
840 std::vector<int> lens;
842 // need to reverse later
843 rans_encoder.clear();
844 size_t num_bytes = 0;
845 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
846 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
847 int k = coeff_y[(yb + y) * WIDTH + (xb + x)];
848 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
849 rans_encoder.encode_coeff(k);
852 int l = rans_encoder.save_block(codedfp);
857 if (HEIGHT % 16 != 0) {
858 num_bytes += rans_encoder.save_block(codedfp);
860 tot_bytes += num_bytes;
861 printf("coeff %d Y': %ld bytes\n", y * 8 + x, num_bytes);
867 double avg_l = sum_l / lens.size();
869 double sum_sql = 0.0;
871 sum_sql += (l - avg_l) * (l - avg_l);
873 double stddev_l = sqrt(sum_sql / (lens.size() - 1));
874 printf("coeff %d: avg=%.2f bytes, stddev=%.2f bytes\n", y*8+x, avg_l, stddev_l);
879 for (unsigned y = 0; y < 8; ++y) {
880 for (unsigned x = 0; x < 8; ++x) {
881 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
882 rans_encoder.init_prob(s_chroma);
884 rans_encoder.clear();
885 size_t num_bytes = 0;
886 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
887 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
888 int k = coeff_cb[(yb + y) * WIDTH/2 + (xb + x)];
889 rans_encoder.encode_coeff(k);
892 num_bytes += rans_encoder.save_block(codedfp);
895 if (HEIGHT % 16 != 0) {
896 num_bytes += rans_encoder.save_block(codedfp);
898 tot_bytes += num_bytes;
899 printf("coeff %d Cb: %ld bytes\n", y * 8 + x, num_bytes);
904 for (unsigned y = 0; y < 8; ++y) {
905 for (unsigned x = 0; x < 8; ++x) {
906 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
907 rans_encoder.init_prob(s_chroma);
909 rans_encoder.clear();
910 size_t num_bytes = 0;
911 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
912 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
913 int k = coeff_cr[(yb + y) * WIDTH/2 + (xb + x)];
914 rans_encoder.encode_coeff(k);
917 num_bytes += rans_encoder.save_block(codedfp);
920 if (HEIGHT % 16 != 0) {
921 num_bytes += rans_encoder.save_block(codedfp);
923 tot_bytes += num_bytes;
924 printf("coeff %d Cr: %ld bytes\n", y * 8 + x, num_bytes);
928 printf("%ld bytes + %ld escape bits (%ld) = %ld total bytes\n",
929 tot_bytes - extra_bits / 8,