8 //#include "ryg_rans/rans64.h"
9 #include "ryg_rans/rans_byte.h"
10 #include "ryg_rans/renormalize.h"
17 #include <unordered_map>
22 #define ESCAPE_LIMIT (NUM_SYMS - 1)
24 // If you set this to 1, the program will try to optimize the placement
25 // of coefficients to rANS probability distributions. This is randomized,
26 // so you might want to run it a few times.
27 #define FIND_OPTIMAL_STREAM_ASSIGNMENT 0
28 #define NUM_CLUSTERS 4
30 static constexpr uint32_t prob_bits = 12;
31 static constexpr uint32_t prob_scale = 1 << prob_bits;
35 void fdct_int32(short *const In);
36 void idct_int32(short *const In);
38 unsigned char rgb[WIDTH * HEIGHT * 3];
39 unsigned char pix_y[WIDTH * HEIGHT];
40 unsigned char pix_cb[(WIDTH/2) * HEIGHT];
41 unsigned char pix_cr[(WIDTH/2) * HEIGHT];
42 unsigned char full_pix_cb[WIDTH * HEIGHT];
43 unsigned char full_pix_cr[WIDTH * HEIGHT];
44 short coeff_y[WIDTH * HEIGHT], coeff_cb[(WIDTH/2) * HEIGHT], coeff_cr[(WIDTH/2) * HEIGHT];
49 if (x > 255) return 255;
53 static const unsigned char std_luminance_quant_tbl[64] = {
55 16, 11, 10, 16, 24, 40, 51, 61,
56 12, 12, 14, 19, 26, 58, 60, 55,
57 14, 13, 16, 24, 40, 57, 69, 56,
58 14, 17, 22, 29, 51, 87, 80, 62,
59 18, 22, 37, 56, 68, 109, 103, 77,
60 24, 35, 55, 64, 81, 104, 113, 92,
61 49, 64, 78, 87, 103, 121, 120, 101,
62 72, 92, 95, 98, 112, 100, 103, 99
64 // ff_mpeg1_default_intra_matrix
65 8, 16, 19, 22, 26, 27, 29, 34,
66 16, 16, 22, 24, 27, 29, 34, 37,
67 19, 22, 26, 27, 29, 34, 34, 38,
68 22, 22, 26, 27, 29, 34, 37, 40,
69 22, 26, 27, 29, 32, 35, 40, 48,
70 26, 27, 29, 32, 35, 40, 48, 58,
71 26, 27, 29, 34, 38, 46, 56, 69,
72 27, 29, 35, 38, 46, 56, 69, 83
78 uint32_t freqs[NUM_SYMS];
79 uint32_t cum_freqs[NUM_SYMS + 1];
82 void calc_cum_freqs();
83 void normalize_freqs(uint32_t target_total);
86 void SymbolStats::clear()
88 for (int i=0; i < NUM_SYMS; i++)
92 void SymbolStats::calc_cum_freqs()
95 for (int i=0; i < NUM_SYMS; i++)
96 cum_freqs[i+1] = cum_freqs[i] + freqs[i];
99 void SymbolStats::normalize_freqs(uint32_t target_total)
101 uint64_t real_freq[NUM_SYMS + 1]; // hack
103 assert(target_total >= NUM_SYMS);
106 uint32_t cur_total = cum_freqs[NUM_SYMS];
108 if (cur_total == 0) return;
110 double ideal_cost = 0.0;
111 for (int i = 1; i <= NUM_SYMS; i++)
113 real_freq[i] = cum_freqs[i] - cum_freqs[i - 1];
114 if (real_freq[i] > 0)
115 ideal_cost -= real_freq[i] * log2(real_freq[i] / double(cur_total));
118 OptimalRenormalize(cum_freqs, NUM_SYMS, prob_scale);
120 // calculate updated freqs and make sure we didn't screw anything up
121 assert(cum_freqs[0] == 0 && cum_freqs[NUM_SYMS] == target_total);
122 for (int i=0; i < NUM_SYMS; i++) {
124 assert(cum_freqs[i+1] == cum_freqs[i]);
126 assert(cum_freqs[i+1] > cum_freqs[i]);
129 freqs[i] = cum_freqs[i+1] - cum_freqs[i];
132 double calc_cost = 0.0;
133 for (int i = 1; i <= NUM_SYMS; i++)
135 uint64_t freq = cum_freqs[i] - cum_freqs[i - 1];
136 if (real_freq[i] > 0)
137 calc_cost -= real_freq[i] * log2(freq / double(target_total));
140 static double total_loss = 0.0;
141 total_loss += calc_cost - ideal_cost;
142 static double total_loss_with_dp = 0.0;
143 double optimal_cost = 0.0;
144 //total_loss_with_dp += optimal_cost - ideal_cost;
145 printf("ideal cost = %.0f bits, DP cost = %.0f bits, calc cost = %.0f bits (loss = %.2f bytes, total loss = %.2f bytes, total loss with DP = %.2f bytes)\n",
146 ideal_cost, optimal_cost,
147 calc_cost, (calc_cost - ideal_cost) / 8.0, total_loss / 8.0, total_loss_with_dp / 8.0);
150 SymbolStats stats[128];
152 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
153 // Distance from one stream to the other, based on a hacked-up K-L divergence.
154 float kl_dist[64][64];
157 const int luma_mapping[64] = {
158 0, 0, 1, 1, 2, 2, 3, 3,
159 0, 0, 1, 2, 2, 2, 3, 3,
160 1, 1, 2, 2, 2, 3, 3, 3,
161 1, 1, 2, 2, 2, 3, 3, 3,
162 1, 2, 2, 2, 2, 3, 3, 3,
163 2, 2, 2, 2, 3, 3, 3, 3,
164 2, 2, 3, 3, 3, 3, 3, 3,
165 3, 3, 3, 3, 3, 3, 3, 3,
167 const int chroma_mapping[64] = {
168 0, 1, 1, 2, 2, 2, 3, 3,
169 1, 1, 2, 2, 2, 3, 3, 3,
170 2, 2, 2, 2, 3, 3, 3, 3,
171 2, 2, 2, 3, 3, 3, 3, 3,
172 2, 3, 3, 3, 3, 3, 3, 3,
173 3, 3, 3, 3, 3, 3, 3, 3,
174 3, 3, 3, 3, 3, 3, 3, 3,
175 3, 3, 3, 3, 3, 3, 3, 3,
178 int pick_stats_for(int x, int y, bool is_chroma)
180 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
181 return y * 8 + x + is_chroma * 64;
184 return chroma_mapping[y * 8 + x] + 4;
186 return luma_mapping[y * 8 + x];
192 void write_varint(int x, FILE *fp)
195 putc((x & 0x7f) | 0x80, fp);
205 out_buf.reset(new uint8_t[out_max_size]);
209 void init_prob(SymbolStats &s)
211 for (int i = 0; i < NUM_SYMS; i++) {
212 printf("%d: cumfreqs=%d freqs=%d prob_bits=%d\n", i, s.cum_freqs[i], s.freqs[i], prob_bits + 1);
213 RansEncSymbolInit(&esyms[i], s.cum_freqs[i], s.freqs[i], prob_bits + 1);
215 sign_bias = s.cum_freqs[NUM_SYMS];
220 out_end = out_buf.get() + out_max_size;
221 ptr = out_end; // *end* of output buffer
225 uint32_t save_block(FILE *codedfp) // Returns number of bytes.
227 RansEncFlush(&rans, &ptr);
228 //printf("post-flush = %08x\n", rans);
230 uint32_t num_rans_bytes = out_end - ptr;
232 if (num_rans_bytes == 4) {
234 memcpy(&block, ptr, 4);
236 if (block == last_block) {
237 write_varint(0, codedfp);
248 write_varint(num_rans_bytes, codedfp);
249 //fwrite(&num_rans_bytes, 1, 4, codedfp);
250 fwrite(ptr, 1, num_rans_bytes, codedfp);
252 //printf("first rANS bytes: %02x %02x %02x %02x %02x %02x %02x %02x\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7]);
257 printf("Saving block: %d rANS bytes\n", num_rans_bytes);
258 return num_rans_bytes;
259 //return num_rans_bytes;
262 void encode_coeff(short signed_k)
264 //printf("encoding coeff %d (sym %d), rans before encoding = %08x\n", signed_k, ((abs(signed_k) - 1) & 255), rans);
265 unsigned short k = abs(signed_k);
266 if (k >= ESCAPE_LIMIT) {
267 // Put the coefficient as a 1/(2^12) symbol _before_
268 // the 255 coefficient, since the decoder will read the
269 // 255 coefficient first.
270 RansEncPut(&rans, &ptr, k, 1, prob_bits);
273 RansEncPutSymbol(&rans, &ptr, &esyms[(k - 1) & (NUM_SYMS - 1)]);
280 static constexpr size_t out_max_size = 32 << 20; // 32 MB.
281 static constexpr size_t max_num_sign = 1048576; // Way too big. And actually bytes.
283 unique_ptr<uint8_t[]> out_buf;
287 RansEncSymbol esyms[NUM_SYMS];
290 uint32_t last_block = 0; // Not a valid 4-byte rANS block (?)
293 static constexpr int dc_scalefac = 8; // Matches the FDCT's gain.
294 static constexpr double quant_scalefac = 4.0; // whatever?
296 static inline int quantize(int f, int coeff_idx)
298 if (coeff_idx == 0) {
299 return f / dc_scalefac;
305 const int w = std_luminance_quant_tbl[coeff_idx];
306 const int s = quant_scalefac;
307 int sign_f = (f > 0) ? 1 : -1;
308 return (32 * f + sign_f * w * s) / (2 * w * s);
311 static inline int unquantize(int qf, int coeff_idx)
313 if (coeff_idx == 0) {
314 return qf * dc_scalefac;
320 const int w = std_luminance_quant_tbl[coeff_idx];
321 const int s = quant_scalefac;
322 return (2 * qf * w * s) / 32;
325 void readpix(unsigned char *ptr, const char *filename)
327 FILE *fp = fopen(filename, "rb");
333 fseek(fp, 0, SEEK_END);
334 long len = ftell(fp);
335 assert(len >= WIDTH * HEIGHT * 3);
336 fseek(fp, len - WIDTH * HEIGHT * 3, SEEK_SET);
338 fread(ptr, 1, WIDTH * HEIGHT * 3, fp);
344 double coeff[3] = { 0.2126, 0.7152, 0.0722 }; // sum = 1.0
345 double cb_fac = 1.0 / (coeff[0] + coeff[1] + 1.0f - coeff[2]); // 0.539
346 double cr_fac = 1.0 / (1.0f - coeff[0] + coeff[1] + coeff[2]); // 0.635
348 unique_ptr<float[]> temp_cb(new float[WIDTH * HEIGHT]);
349 unique_ptr<float[]> temp_cr(new float[WIDTH * HEIGHT]);
350 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
351 for (unsigned xb = 0; xb < WIDTH; ++xb) {
352 int r = rgb[((yb * WIDTH) + xb) * 3 + 0];
353 int g = rgb[((yb * WIDTH) + xb) * 3 + 1];
354 int b = rgb[((yb * WIDTH) + xb) * 3 + 2];
355 double y = std::min(std::max(coeff[0] * r + coeff[1] * g + coeff[2] * b, 0.0), 255.0);
356 double cb = (b - y) * cb_fac + 128.0;
357 double cr = (r - y) * cr_fac + 128.0;
358 pix_y[(yb * WIDTH) + xb] = lrint(y);
359 temp_cb[(yb * WIDTH) + xb] = cb;
360 temp_cr[(yb * WIDTH) + xb] = cr;
361 full_pix_cb[(yb * WIDTH) + xb] = lrint(std::min(std::max(cb, 0.0), 255.0));
362 full_pix_cr[(yb * WIDTH) + xb] = lrint(std::min(std::max(cr, 0.0), 255.0));
366 // Simple 4:2:2 subsampling with left convention.
367 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
368 for (unsigned xb = 0; xb < WIDTH / 2; ++xb) {
369 int c0 = yb * WIDTH + std::max(int(xb) * 2 - 1, 0);
370 int c1 = yb * WIDTH + xb * 2;
371 int c2 = yb * WIDTH + xb * 2 + 1;
373 double cb = 0.25 * temp_cb[c0] + 0.5 * temp_cb[c1] + 0.25 * temp_cb[c2];
374 double cr = 0.25 * temp_cr[c0] + 0.5 * temp_cr[c1] + 0.25 * temp_cr[c2];
375 cb = std::min(std::max(cb, 0.0), 255.0);
376 cr = std::min(std::max(cr, 0.0), 255.0);
377 pix_cb[(yb * WIDTH/2) + xb] = lrint(cb);
378 pix_cr[(yb * WIDTH/2) + xb] = lrint(cr);
383 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
384 double find_best_assignment(const int *medoids, int *assignment)
386 double current_score = 0.0;
387 for (int i = 0; i < 64; ++i) {
388 int best_medoid = medoids[0];
389 float best_medoid_score = kl_dist[i][medoids[0]];
390 for (int j = 1; j < NUM_CLUSTERS; ++j) {
391 if (kl_dist[i][medoids[j]] < best_medoid_score) {
392 best_medoid = medoids[j];
393 best_medoid_score = kl_dist[i][medoids[j]];
396 assignment[i] = best_medoid;
397 current_score += best_medoid_score;
399 return current_score;
402 void find_optimal_stream_assignment(int base)
405 for (unsigned i = 0; i < 64; ++i) {
407 for (unsigned k = 0; k < NUM_SYMS; ++k) {
408 s += stats[i + base].freqs[k] + 0.5;
410 inv_sum[i] = 1.0 / s;
413 for (unsigned i = 0; i < 64; ++i) {
414 for (unsigned j = 0; j < 64; ++j) {
416 for (unsigned k = 0; k < NUM_SYMS; ++k) {
417 double p1 = (stats[i + base].freqs[k] + 0.5) * inv_sum[i];
418 double p2 = (stats[j + base].freqs[k] + 0.5) * inv_sum[j];
420 // K-L divergence is asymmetric; this is a hack.
421 d += p1 * log(p1 / p2);
422 d += p2 * log(p2 / p1);
425 //printf("%.3f ", d);
431 int medoids[64]; // only the first NUM_CLUSTERS matter
432 bool is_medoid[64] = { false };
433 std::iota(medoids, medoids + 64, 0);
434 std::random_device rd;
435 std::mt19937 g(rd());
436 std::shuffle(medoids, medoids + 64, g);
437 for (int i = 0; i < NUM_CLUSTERS; ++i) {
438 printf("%d ", medoids[i]);
439 is_medoid[medoids[i]] = true;
443 // assign each data point to the closest medoid
445 double current_score = find_best_assignment(medoids, assignment);
447 for (int i = 0; i < 1000; ++i) {
448 printf("iter %d\n", i);
449 bool any_changed = false;
450 for (int m = 0; m < NUM_CLUSTERS; ++m) {
451 for (int o = 0; o < 64; ++o) {
452 if (is_medoid[o]) continue;
453 int old_medoid = medoids[m];
456 int new_assignment[64];
457 double candidate_score = find_best_assignment(medoids, new_assignment);
459 if (candidate_score < current_score) {
460 current_score = candidate_score;
461 memcpy(assignment, new_assignment, sizeof(assignment));
463 is_medoid[old_medoid] = false;
464 is_medoid[medoids[m]] = true;
465 printf("%f: ", current_score);
466 for (int i = 0; i < 64; ++i) {
467 printf("%d ", assignment[i]);
472 medoids[m] = old_medoid;
476 if (!any_changed) break;
479 std::unordered_map<int, int> rmap;
480 for (int i = 0; i < 64; ++i) {
481 if (i % 8 == 0) printf("\n");
482 if (!rmap.count(assignment[i])) {
483 rmap.emplace(assignment[i], rmap.size());
485 printf("%d, ", rmap[assignment[i]]);
491 int main(int argc, char **argv)
494 readpix(rgb, argv[1]);
496 readpix(rgb, "color.pnm");
499 double sum_sq_err = 0.0;
500 //double last_cb_cfl_fac = 0.0;
501 //double last_cr_cfl_fac = 0.0;
503 // DCT and quantize luma
504 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
505 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
508 for (unsigned y = 0; y < 8; ++y) {
509 for (unsigned x = 0; x < 8; ++x) {
510 in_y[y * 8 + x] = pix_y[(yb + y) * WIDTH + (xb + x)];
517 for (unsigned y = 0; y < 8; ++y) {
518 for (unsigned x = 0; x < 8; ++x) {
519 int coeff_idx = y * 8 + x;
520 int k = quantize(in_y[coeff_idx], coeff_idx);
521 coeff_y[(yb + y) * WIDTH + (xb + x)] = k;
523 // Store back for reconstruction / PSNR calculation
524 in_y[coeff_idx] = unquantize(k, coeff_idx);
530 for (unsigned y = 0; y < 8; ++y) {
531 for (unsigned x = 0; x < 8; ++x) {
532 int k = clamp(in_y[y * 8 + x]);
533 uint8_t *ptr = &pix_y[(yb + y) * WIDTH + (xb + x)];
534 sum_sq_err += (*ptr - k) * (*ptr - k);
540 double mse = sum_sq_err / double(WIDTH * HEIGHT);
541 double psnr_db = 20 * log10(255.0 / sqrt(mse));
542 printf("psnr = %.2f dB\n", psnr_db);
544 //double chroma_energy = 0.0, chroma_energy_pred = 0.0;
546 // DCT and quantize chroma
547 //double last_cb_cfl_fac = 0.0, last_cr_cfl_fac = 0.0;
548 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
549 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
551 // TF switch: Two 8x8 luma blocks -> one 16x8 block, then drop high frequencies
552 printf("in blocks:\n");
553 for (unsigned y = 0; y < 8; ++y) {
554 for (unsigned x = 0; x < 8; ++x) {
555 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
559 for (unsigned x = 0; x < 8; ++x) {
560 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
567 for (unsigned y = 0; y < 8; ++y) {
568 for (unsigned x = 0; x < 4; ++x) {
569 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
570 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
573 in_y[y * 8 + x * 2 + 0] = a;
574 in_y[y * 8 + x * 2 + 1] = b;
578 printf("tf-ed block:\n");
579 for (unsigned y = 0; y < 8; ++y) {
580 for (unsigned x = 0; x < 8; ++x) {
581 short a = in_y[y * 8 + x];
587 // Read Y block with no tf switch (from reconstructed luma)
589 for (unsigned y = 0; y < 8; ++y) {
590 for (unsigned x = 0; x < 8; ++x) {
591 in_y[y * 8 + x] = pix_y[(yb + y) * (WIDTH) + (xb + x) * 2];
598 short in_cb[64], in_cr[64];
599 for (unsigned y = 0; y < 8; ++y) {
600 for (unsigned x = 0; x < 8; ++x) {
601 in_cb[y * 8 + x] = pix_cb[(yb + y) * (WIDTH/2) + (xb + x)];
602 in_cr[y * 8 + x] = pix_cr[(yb + y) * (WIDTH/2) + (xb + x)];
615 double denom = (x0 * x0 + x1 * x1 + x2 * x2);
616 //double denom = (x1 * x1);
618 double y0 = in_cb[1];
619 double y1 = in_cb[8];
620 double y2 = in_cb[9];
621 double cb_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
622 //double cb_cfl_fac = (x1 * y1) / denom;
624 for (unsigned y = 0; y < 8; ++y) {
625 for (unsigned x = 0; x < 8; ++x) {
626 short a = in_y[y * 8 + x];
630 for (unsigned x = 0; x < 8; ++x) {
631 short a = in_cb[y * 8 + x];
636 printf("(%d,%d,%d) -> (%d,%d,%d) gives %f\n",
637 in_y[1], in_y[8], in_y[9],
638 in_cb[1], in_cb[8], in_cb[9],
644 double cr_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
645 //double cr_cfl_fac = (x1 * y1) / denom;
646 printf("cb CfL = %7.3f dc = %5d cr CfL = %7.3f dc = %d\n",
647 cb_cfl_fac, in_cb[0] - in_y[0],
648 cr_cfl_fac, in_cr[0] - in_y[0]);
650 if (denom == 0.0) { cb_cfl_fac = cr_cfl_fac = 0.0; }
653 //last_cb_cfl_fac = cb_cfl_fac;
654 //last_cr_cfl_fac = cr_cfl_fac;
656 for (unsigned coeff_idx = 1; coeff_idx < 64; ++coeff_idx) {
657 //printf("%2d: cb = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cb[coeff_idx], last_cb_cfl_fac, in_y[coeff_idx], last_cb_cfl_fac * in_y[coeff_idx]);
658 //printf("%2d: cr = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cr[coeff_idx], last_cr_cfl_fac, in_y[coeff_idx], last_cr_cfl_fac * in_y[coeff_idx]);
659 double cb_pred = last_cb_cfl_fac * in_y[coeff_idx];
660 chroma_energy += in_cb[coeff_idx] * in_cb[coeff_idx];
661 chroma_energy_pred += (in_cb[coeff_idx] - cb_pred) * (in_cb[coeff_idx] - cb_pred);
663 //in_cb[coeff_idx] -= lrint(last_cb_cfl_fac * in_y[coeff_idx]);
664 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
665 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
666 //in_cb[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
667 //in_cr[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
668 //in_cb[coeff_idx] = lrint(in_y[coeff_idx]);
669 //in_cr[coeff_idx] = lrint(in_y[coeff_idx]);
673 //in_cb[0] -= in_y[0];
674 //in_cr[0] -= in_y[0];
677 for (unsigned y = 0; y < 8; ++y) {
678 for (unsigned x = 0; x < 8; ++x) {
679 int coeff_idx = y * 8 + x;
680 int k_cb = quantize(in_cb[coeff_idx], coeff_idx);
681 coeff_cb[(yb + y) * (WIDTH/2) + (xb + x)] = k_cb;
682 int k_cr = quantize(in_cr[coeff_idx], coeff_idx);
683 coeff_cr[(yb + y) * (WIDTH/2) + (xb + x)] = k_cr;
685 // Store back for reconstruction / PSNR calculation
686 in_cb[coeff_idx] = unquantize(k_cb, coeff_idx);
687 in_cr[coeff_idx] = unquantize(k_cr, coeff_idx);
691 idct_int32(in_y); // DEBUG
695 for (unsigned y = 0; y < 8; ++y) {
696 for (unsigned x = 0; x < 8; ++x) {
697 pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cb[y * 8 + x]);
698 pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cr[y * 8 + x]);
700 // pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
701 // pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
706 last_cb_cfl_fac = cb_cfl_fac;
707 last_cr_cfl_fac = cr_cfl_fac;
713 printf("chroma_energy = %f, with_pred = %f\n",
714 chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
717 // DC coefficient pred from the right to left
718 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
719 for (unsigned xb = 0; xb < WIDTH - 8; xb += 8) {
720 coeff_y[yb * WIDTH + xb] -= coeff_y[yb * WIDTH + (xb + 8)];
723 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
724 for (unsigned xb = 0; xb < WIDTH/2 - 8; xb += 8) {
725 coeff_cb[yb * WIDTH/2 + xb] -= coeff_cb[yb * WIDTH/2 + (xb + 8)];
726 coeff_cr[yb * WIDTH/2 + xb] -= coeff_cr[yb * WIDTH/2 + (xb + 8)];
730 FILE *fp = fopen("reconstructed.pgm", "wb");
731 fprintf(fp, "P5\n%d %d\n255\n", WIDTH, HEIGHT);
732 fwrite(pix_y, 1, WIDTH * HEIGHT, fp);
735 fp = fopen("reconstructed.pnm", "wb");
736 fprintf(fp, "P6\n%d %d\n255\n", WIDTH, HEIGHT);
737 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
738 for (unsigned xb = 0; xb < WIDTH; ++xb) {
739 int y = pix_y[(yb * WIDTH) + xb];
741 int c0 = yb * (WIDTH/2) + xb/2;
743 cb = pix_cb[c0] - 128.0;
744 cr = pix_cr[c0] - 128.0;
746 int c1 = yb * (WIDTH/2) + std::min<int>(xb/2 + 1, WIDTH/2 - 1);
747 cb = 0.5 * (pix_cb[c0] + pix_cb[c1]) - 128.0;
748 cr = 0.5 * (pix_cr[c0] + pix_cr[c1]) - 128.0;
751 double r = y + 1.5748 * cr;
752 double g = y - 0.1873 * cb - 0.4681 * cr;
753 double b = y + 1.8556 * cb;
755 putc(clamp(lrint(r)), fp);
756 putc(clamp(lrint(g)), fp);
757 putc(clamp(lrint(b)), fp);
762 // For each coefficient, make some tables.
763 size_t extra_bits = 0;
764 for (unsigned i = 0; i < 64; ++i) {
767 for (unsigned y = 0; y < 8; ++y) {
768 for (unsigned x = 0; x < 8; ++x) {
769 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
770 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
773 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
774 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
775 unsigned short k = abs(coeff_y[(yb + y) * WIDTH + (xb + x)]);
776 if (k >= ESCAPE_LIMIT) {
778 extra_bits += 12; // escape this one
780 ++s_luma.freqs[(k - 1) & (NUM_SYMS - 1)];
784 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
785 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
786 unsigned short k_cb = abs(coeff_cb[(yb + y) * WIDTH/2 + (xb + x)]);
787 unsigned short k_cr = abs(coeff_cr[(yb + y) * WIDTH/2 + (xb + x)]);
788 if (k_cb >= ESCAPE_LIMIT) {
790 extra_bits += 12; // escape this one
792 if (k_cr >= ESCAPE_LIMIT) {
794 extra_bits += 12; // escape this one
796 ++s_chroma.freqs[(k_cb - 1) & (NUM_SYMS - 1)];
797 ++s_chroma.freqs[(k_cr - 1) & (NUM_SYMS - 1)];
803 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
805 find_optimal_stream_assignment(0);
807 find_optimal_stream_assignment(64);
811 for (unsigned i = 0; i < 64; ++i) {
812 stats[i].freqs[NUM_SYMS - 1] /= 2; // zero, has no sign bits (yes, this is trickery)
813 stats[i].normalize_freqs(prob_scale);
814 stats[i].cum_freqs[NUM_SYMS] += stats[i].freqs[NUM_SYMS - 1];
815 stats[i].freqs[NUM_SYMS - 1] *= 2;
818 FILE *codedfp = fopen("coded.dat", "wb");
819 if (codedfp == nullptr) {
824 // TODO: rather gamma-k or something
825 for (unsigned i = 0; i < 64; ++i) {
826 if (stats[i].cum_freqs[NUM_SYMS] == 0) {
829 printf("writing table %d\n", i);
830 for (unsigned j = 0; j < NUM_SYMS; ++j) {
831 write_varint(stats[i].freqs[j], codedfp);
835 RansEncoder rans_encoder;
837 size_t tot_bytes = 0;
840 for (unsigned y = 0; y < 8; ++y) {
841 for (unsigned x = 0; x < 8; ++x) {
842 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
843 rans_encoder.init_prob(s_luma);
846 std::vector<int> lens;
848 // need to reverse later
849 rans_encoder.clear();
850 size_t num_bytes = 0;
851 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
852 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
853 int k = coeff_y[(yb + y) * WIDTH + (xb + x)];
854 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
855 rans_encoder.encode_coeff(k);
858 int l = rans_encoder.save_block(codedfp);
863 if (HEIGHT % 16 != 0) {
864 num_bytes += rans_encoder.save_block(codedfp);
866 tot_bytes += num_bytes;
867 printf("coeff %d Y': %ld bytes\n", y * 8 + x, num_bytes);
873 double avg_l = sum_l / lens.size();
875 double sum_sql = 0.0;
877 sum_sql += (l - avg_l) * (l - avg_l);
879 double stddev_l = sqrt(sum_sql / (lens.size() - 1));
880 printf("coeff %d: avg=%.2f bytes, stddev=%.2f bytes\n", y*8+x, avg_l, stddev_l);
885 for (unsigned y = 0; y < 8; ++y) {
886 for (unsigned x = 0; x < 8; ++x) {
887 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
888 rans_encoder.init_prob(s_chroma);
890 rans_encoder.clear();
891 size_t num_bytes = 0;
892 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
893 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
894 int k = coeff_cb[(yb + y) * WIDTH/2 + (xb + x)];
895 rans_encoder.encode_coeff(k);
898 num_bytes += rans_encoder.save_block(codedfp);
901 if (HEIGHT % 16 != 0) {
902 num_bytes += rans_encoder.save_block(codedfp);
904 tot_bytes += num_bytes;
905 printf("coeff %d Cb: %ld bytes\n", y * 8 + x, num_bytes);
910 for (unsigned y = 0; y < 8; ++y) {
911 for (unsigned x = 0; x < 8; ++x) {
912 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
913 rans_encoder.init_prob(s_chroma);
915 rans_encoder.clear();
916 size_t num_bytes = 0;
917 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
918 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
919 int k = coeff_cr[(yb + y) * WIDTH/2 + (xb + x)];
920 rans_encoder.encode_coeff(k);
923 num_bytes += rans_encoder.save_block(codedfp);
926 if (HEIGHT % 16 != 0) {
927 num_bytes += rans_encoder.save_block(codedfp);
929 tot_bytes += num_bytes;
930 printf("coeff %d Cr: %ld bytes\n", y * 8 + x, num_bytes);
934 printf("%ld bytes + %ld escape bits (%ld) = %ld total bytes\n",
935 tot_bytes - extra_bits / 8,