7 //#include "ryg_rans/rans64.h"
8 #include "ryg_rans/rans_byte.h"
9 #include "ryg_rans/renormalize.h"
16 #define ESCAPE_LIMIT (NUM_SYMS - 1)
18 static constexpr uint32_t prob_bits = 12;
19 static constexpr uint32_t prob_scale = 1 << prob_bits;
23 void fdct_int32(short *const In);
24 void idct_int32(short *const In);
26 unsigned char rgb[WIDTH * HEIGHT * 3];
27 unsigned char pix_y[WIDTH * HEIGHT];
28 unsigned char pix_cb[(WIDTH/2) * HEIGHT];
29 unsigned char pix_cr[(WIDTH/2) * HEIGHT];
30 unsigned char full_pix_cb[WIDTH * HEIGHT];
31 unsigned char full_pix_cr[WIDTH * HEIGHT];
32 short coeff_y[WIDTH * HEIGHT], coeff_cb[(WIDTH/2) * HEIGHT], coeff_cr[(WIDTH/2) * HEIGHT];
37 if (x > 255) return 255;
41 static const unsigned char std_luminance_quant_tbl[64] = {
43 16, 11, 10, 16, 24, 40, 51, 61,
44 12, 12, 14, 19, 26, 58, 60, 55,
45 14, 13, 16, 24, 40, 57, 69, 56,
46 14, 17, 22, 29, 51, 87, 80, 62,
47 18, 22, 37, 56, 68, 109, 103, 77,
48 24, 35, 55, 64, 81, 104, 113, 92,
49 49, 64, 78, 87, 103, 121, 120, 101,
50 72, 92, 95, 98, 112, 100, 103, 99
52 // ff_mpeg1_default_intra_matrix
53 8, 16, 19, 22, 26, 27, 29, 34,
54 16, 16, 22, 24, 27, 29, 34, 37,
55 19, 22, 26, 27, 29, 34, 34, 38,
56 22, 22, 26, 27, 29, 34, 37, 40,
57 22, 26, 27, 29, 32, 35, 40, 48,
58 26, 27, 29, 32, 35, 40, 48, 58,
59 26, 27, 29, 34, 38, 46, 56, 69,
60 27, 29, 35, 38, 46, 56, 69, 83
66 uint32_t freqs[NUM_SYMS];
67 uint32_t cum_freqs[NUM_SYMS + 1];
70 void count_freqs(uint8_t const* in, size_t nbytes);
71 void calc_cum_freqs();
72 void normalize_freqs(uint32_t target_total);
75 void SymbolStats::clear()
77 for (int i=0; i < NUM_SYMS; i++)
81 void SymbolStats::count_freqs(uint8_t const* in, size_t nbytes)
85 for (size_t i=0; i < nbytes; i++)
89 void SymbolStats::calc_cum_freqs()
92 for (int i=0; i < NUM_SYMS; i++)
93 cum_freqs[i+1] = cum_freqs[i] + freqs[i];
96 static double cache[NUM_SYMS + 1][prob_scale + 1];
97 static double log2cache[prob_scale + 1];
98 static int64_t cachefill = 0;
100 double find_optimal_cost(const uint32_t *cum_freqs, int sym_to, int available_slots)
104 while (sym_to > 0 && cum_freqs[sym_to] == cum_freqs[sym_to - 1]) { --sym_to; }
105 if (cache[sym_to][available_slots] >= 0.0) {
106 //printf("CACHE: %d,%d\n", sym_to, available_slots);
107 return cache[sym_to][available_slots];
113 return cum_freqs[0] * log2cache[available_slots];
115 if (available_slots == 1) {
116 return cum_freqs[0] * log2cache[1] + find_optimal_cost(cum_freqs, sym_to - 1, 0);
119 // printf("UNCACHE: %d,%d\n", sym_to, available_slots);
121 // ok, test all possible options for the last symbol (TODO: save the choice)
122 double best_so_far = HUGE_VAL;
123 //for (int i = num_syms - 1; i < available_slots; ++i) {
124 double f = freqs[sym_to - 1];
125 for (int i = available_slots; i --> 0; ) {
126 double cost1 = f * log2cache[available_slots - i];
127 double cost2 = find_optimal_cost(freqs, sym_to - 1, i);
129 if (sym_to == 3 && available_slots == 838) {
130 printf("%d %f\n", i, cost1 + cost2);
132 if (cost1 + cost2 > best_so_far) {
135 best_so_far = cost1 + cost2;
138 // Minimize the number of total bits spent as a function of how many slots
139 // we assign to this symbol.
141 // The cost function is convex (I don't know how to prove it, but it makes
142 // intuitively a lot of sense). Find a reasonable guess and see what way
143 // we should search, then iterate until we either hit the end or we start
145 double f = cum_freqs[sym_to - 1] - cum_freqs[sym_to - 2];
146 double start = lrint(available_slots * f / cum_freqs[sym_to - 1]);
148 int x1 = std::max<int>(floor(start), 1);
151 double f1 = f * log2cache[x1] + find_optimal_cost(cum_freqs, sym_to - 1, available_slots - x1);
152 double f2 = f * log2cache[x2] + find_optimal_cost(cum_freqs, sym_to - 1, available_slots - x2);
154 int x, direction; // -1 or +1
155 double best_so_far = std::min(f1, f2);
156 if (isinf(f1) && isinf(f2)) {
157 // The cost isn't infinite due to the first term, so we need to go downwards
158 // to give the second term more room to breathe.
161 } else if (f1 < f2) {
169 //printf("[%d,%d] freq=%ld cumfreq=%d From %d and %d, chose %d [%f] and direction=%d\n",
170 // sym_to, available_slots, freqs[sym_to - 1], cum_freqs[sym_to - 1], x1, x2, x, best_so_far, direction);
172 while ((x + direction) > 0 && (x + direction) <= available_slots) {
174 double fn = f * log2cache[x] + find_optimal_cost(cum_freqs, sym_to - 1, available_slots - x);
175 // printf("[%d,%d] %d is %f\n", sym_to, available_slots, x, fn);
176 if (fn > best_so_far) {
182 if (++cachefill % 131072 == 0) {
183 // printf("%d,%d = %f (cachefill = %.2f%%)\n", sym_to, available_slots, best_so_far,
184 // 100.0 * (cachefill / double((NUM_SYMS + 1) * (prob_scale + 1))));
186 assert(best_so_far >= 0.0);
187 assert(cache[sym_to][available_slots] < 0.0);
188 cache[sym_to][available_slots] = best_so_far;
192 double find_optimal_cost(const uint32_t *cum_freqs, const uint64_t *freqs)
194 for (int j = 0; j <= NUM_SYMS; ++j) {
195 for (unsigned k = 0; k <= prob_scale; ++k) {
199 for (unsigned k = 0; k <= prob_scale; ++k) {
200 log2cache[k] = -log2(k * (1.0 / prob_scale));
201 //printf("log2cache[%d] = %f\n", k, log2cache[k]);
204 double ret = find_optimal_cost(cum_freqs, NUM_SYMS, prob_scale);
205 printf("Used %ld function invocations\n", cachefill);
209 void SymbolStats::normalize_freqs(uint32_t target_total)
211 uint64_t real_freq[NUM_SYMS + 1]; // hack
213 assert(target_total >= NUM_SYMS);
216 uint32_t cur_total = cum_freqs[NUM_SYMS];
218 if (cur_total == 0) return;
220 double ideal_cost = 0.0;
221 for (int i = 1; i <= NUM_SYMS; i++)
223 real_freq[i] = cum_freqs[i] - cum_freqs[i - 1];
224 if (real_freq[i] > 0)
225 ideal_cost -= real_freq[i] * log2(real_freq[i] / double(cur_total));
228 OptimalRenormalize(cum_freqs, NUM_SYMS, prob_scale);
231 double optimal_cost = find_optimal_cost(cum_freqs + 1, real_freq + 1);
233 // resample distribution based on cumulative freqs
234 for (int i = 1; i <= NUM_SYMS; i++)
235 //cum_freqs[i] = ((uint64_t)target_total * cum_freqs[i])/cur_total;
236 cum_freqs[i] = lrint(cum_freqs[i] * double(target_total) / cur_total);
238 // if we nuked any non-0 frequency symbol to 0, we need to steal
239 // the range to make the frequency nonzero from elsewhere.
241 // this is not at all optimal, i'm just doing the first thing that comes to mind.
242 for (int i=0; i < NUM_SYMS; i++) {
243 if (freqs[i] && cum_freqs[i+1] == cum_freqs[i]) {
244 // symbol i was set to zero freq
246 // find best symbol to steal frequency from (try to steal from low-freq ones)
247 uint32_t best_freq = ~0u;
249 for (int j=0; j < NUM_SYMS; j++) {
250 uint32_t freq = cum_freqs[j+1] - cum_freqs[j];
251 if (freq > 1 && freq < best_freq) {
256 assert(best_steal != -1);
258 // and steal from it!
259 if (best_steal < i) {
260 for (int j = best_steal + 1; j <= i; j++)
263 assert(best_steal > i);
264 for (int j = i + 1; j <= best_steal; j++)
271 // calculate updated freqs and make sure we didn't screw anything up
272 assert(cum_freqs[0] == 0 && cum_freqs[NUM_SYMS] == target_total);
273 for (int i=0; i < NUM_SYMS; i++) {
275 assert(cum_freqs[i+1] == cum_freqs[i]);
277 assert(cum_freqs[i+1] > cum_freqs[i]);
280 freqs[i] = cum_freqs[i+1] - cum_freqs[i];
283 double calc_cost = 0.0;
284 for (int i = 1; i <= NUM_SYMS; i++)
286 uint64_t freq = cum_freqs[i] - cum_freqs[i - 1];
287 if (real_freq[i] > 0)
288 calc_cost -= real_freq[i] * log2(freq / double(target_total));
291 static double total_loss = 0.0;
292 total_loss += calc_cost - ideal_cost;
293 static double total_loss_with_dp = 0.0;
294 double optimal_cost = 0.0;
295 //total_loss_with_dp += optimal_cost - ideal_cost;
296 printf("ideal cost = %.0f bits, DP cost = %.0f bits, calc cost = %.0f bits (loss = %.2f bytes, total loss = %.2f bytes, total loss with DP = %.2f bytes)\n",
297 ideal_cost, optimal_cost,
298 calc_cost, (calc_cost - ideal_cost) / 8.0, total_loss / 8.0, total_loss_with_dp / 8.0);
301 SymbolStats stats[64];
303 int pick_stats_for(int y, int x)
305 //return std::min<int>(hypot(x, y), 7);
306 return std::min<int>(x + y, 7);
307 //if (x + y >= 7) return 7;
311 if (y == 0 && x == 0) {
320 void write_varint(int x, FILE *fp)
323 putc((x & 0x7f) | 0x80, fp);
333 out_buf.reset(new uint8_t[out_max_size]);
334 sign_buf.reset(new uint8_t[max_num_sign]);
338 void init_prob(const SymbolStats &s)
340 for (int i = 0; i < NUM_SYMS; i++) {
341 printf("%d: cumfreqs=%d freqs=%d prob_bits=%d\n", i, s.cum_freqs[i], s.freqs[i], prob_bits);
342 RansEncSymbolInit(&esyms[i], s.cum_freqs[i], s.freqs[i], prob_bits);
348 out_end = out_buf.get() + out_max_size;
349 sign_end = sign_buf.get() + max_num_sign;
350 ptr = out_end; // *end* of output buffer
351 sign_ptr = sign_end; // *end* of output buffer
356 uint32_t save_block(FILE *codedfp) // Returns number of bytes.
358 RansEncFlush(&rans, &ptr);
359 //printf("post-flush = %08x\n", rans);
361 uint32_t num_rans_bytes = out_end - ptr;
362 write_varint(num_rans_bytes, codedfp);
363 //fwrite(&num_rans_bytes, 1, 4, codedfp);
364 fwrite(ptr, 1, num_rans_bytes, codedfp);
366 //printf("first rANS bytes: %02x %02x %02x %02x %02x %02x %02x %02x\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7]);
368 if (free_sign_bits > 0) {
369 *sign_ptr <<= free_sign_bits;
373 uint32_t num_sign_bytes = sign_end - sign_ptr;
374 write_varint((num_sign_bytes << 3) | free_sign_bits, codedfp);
375 fwrite(sign_ptr, 1, num_sign_bytes, codedfp);
380 printf("Saving block: %d rANS bytes, %d sign bytes\n", num_rans_bytes, num_sign_bytes);
381 return num_rans_bytes + num_sign_bytes;
382 //return num_rans_bytes;
385 void encode_coeff(short signed_k)
387 printf("encoding coeff %d\n", signed_k);
388 short k = abs(signed_k);
389 if (k >= ESCAPE_LIMIT) {
390 // Put the coefficient as a 1/(2^12) symbol _before_
391 // the 255 coefficient, since the decoder will read the
392 // 255 coefficient first.
393 RansEncPut(&rans, &ptr, k, 1, prob_bits);
398 if (free_sign_bits == 0) {
404 *sign_ptr |= (signed_k < 0);
407 RansEncPut(&rans, &ptr, (k < 0) ? prob_scale / 2 : 0, prob_scale / 2, prob_bits);
410 RansEncPutSymbol(&rans, &ptr, &esyms[k]);
414 static constexpr size_t out_max_size = 32 << 20; // 32 MB.
415 static constexpr size_t max_num_sign = 1048576; // Way too big. And actually bytes.
417 unique_ptr<uint8_t[]> out_buf, sign_buf;
418 uint8_t *out_end, *sign_end;
419 uint8_t *ptr, *sign_ptr;
421 size_t free_sign_bits;
422 RansEncSymbol esyms[NUM_SYMS];
425 static constexpr int dc_scalefac = 8; // Matches the FDCT's gain.
426 static constexpr double quant_scalefac = 4.0; // whatever?
428 static inline int quantize(int f, int coeff_idx)
430 if (coeff_idx == 0) {
431 return f / dc_scalefac;
437 const int w = std_luminance_quant_tbl[coeff_idx];
438 const int s = quant_scalefac;
439 int sign_f = (f > 0) ? 1 : -1;
440 return (32 * f + sign_f * w * s) / (2 * w * s);
443 static inline int unquantize(int qf, int coeff_idx)
445 if (coeff_idx == 0) {
446 return qf * dc_scalefac;
452 const int w = std_luminance_quant_tbl[coeff_idx];
453 const int s = quant_scalefac;
454 return (2 * qf * w * s) / 32;
457 void readpix(unsigned char *ptr, const char *filename)
459 FILE *fp = fopen(filename, "rb");
465 fseek(fp, 0, SEEK_END);
466 long len = ftell(fp);
467 assert(len >= WIDTH * HEIGHT * 3);
468 fseek(fp, len - WIDTH * HEIGHT * 3, SEEK_SET);
470 fread(ptr, 1, WIDTH * HEIGHT * 3, fp);
476 double coeff[3] = { 0.2126, 0.7152, 0.0722 }; // sum = 1.0
477 double cb_fac = 1.0 / (coeff[0] + coeff[1] + 1.0f - coeff[2]); // 0.539
478 double cr_fac = 1.0 / (1.0f - coeff[0] + coeff[1] + coeff[2]); // 0.635
480 unique_ptr<float[]> temp_cb(new float[WIDTH * HEIGHT]);
481 unique_ptr<float[]> temp_cr(new float[WIDTH * HEIGHT]);
482 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
483 for (unsigned xb = 0; xb < WIDTH; ++xb) {
484 int r = rgb[((yb * WIDTH) + xb) * 3 + 0];
485 int g = rgb[((yb * WIDTH) + xb) * 3 + 1];
486 int b = rgb[((yb * WIDTH) + xb) * 3 + 2];
487 double y = std::min(std::max(coeff[0] * r + coeff[1] * g + coeff[2] * b, 0.0), 255.0);
488 double cb = (b - y) * cb_fac + 128.0;
489 double cr = (r - y) * cr_fac + 128.0;
490 pix_y[(yb * WIDTH) + xb] = lrint(y);
491 temp_cb[(yb * WIDTH) + xb] = cb;
492 temp_cr[(yb * WIDTH) + xb] = cr;
493 full_pix_cb[(yb * WIDTH) + xb] = lrint(std::min(std::max(cb, 0.0), 255.0));
494 full_pix_cr[(yb * WIDTH) + xb] = lrint(std::min(std::max(cr, 0.0), 255.0));
498 // Simple 4:2:2 subsampling with left convention.
499 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
500 for (unsigned xb = 0; xb < WIDTH / 2; ++xb) {
501 int c0 = yb * WIDTH + std::max(int(xb) * 2 - 1, 0);
502 int c1 = yb * WIDTH + xb * 2;
503 int c2 = yb * WIDTH + xb * 2 + 1;
505 double cb = 0.25 * temp_cb[c0] + 0.5 * temp_cb[c1] + 0.25 * temp_cb[c2];
506 double cr = 0.25 * temp_cr[c0] + 0.5 * temp_cr[c1] + 0.25 * temp_cr[c2];
507 cb = std::min(std::max(cb, 0.0), 255.0);
508 cr = std::min(std::max(cr, 0.0), 255.0);
509 pix_cb[(yb * WIDTH/2) + xb] = lrint(cb);
510 pix_cr[(yb * WIDTH/2) + xb] = lrint(cr);
515 int main(int argc, char **argv)
518 readpix(rgb, argv[1]);
520 readpix(rgb, "color.pnm");
523 double sum_sq_err = 0.0;
524 //double last_cb_cfl_fac = 0.0;
525 //double last_cr_cfl_fac = 0.0;
527 // DCT and quantize luma
528 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
529 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
532 for (unsigned y = 0; y < 8; ++y) {
533 for (unsigned x = 0; x < 8; ++x) {
534 in_y[y * 8 + x] = pix_y[(yb + y) * WIDTH + (xb + x)];
541 for (unsigned y = 0; y < 8; ++y) {
542 for (unsigned x = 0; x < 8; ++x) {
543 int coeff_idx = y * 8 + x;
544 int k = quantize(in_y[coeff_idx], coeff_idx);
545 coeff_y[(yb + y) * WIDTH + (xb + x)] = k;
547 // Store back for reconstruction / PSNR calculation
548 in_y[coeff_idx] = unquantize(k, coeff_idx);
554 for (unsigned y = 0; y < 8; ++y) {
555 for (unsigned x = 0; x < 8; ++x) {
556 int k = clamp(in_y[y * 8 + x]);
557 uint8_t *ptr = &pix_y[(yb + y) * WIDTH + (xb + x)];
558 sum_sq_err += (*ptr - k) * (*ptr - k);
564 double mse = sum_sq_err / double(WIDTH * HEIGHT);
565 double psnr_db = 20 * log10(255.0 / sqrt(mse));
566 printf("psnr = %.2f dB\n", psnr_db);
568 //double chroma_energy = 0.0, chroma_energy_pred = 0.0;
570 // DCT and quantize chroma
571 //double last_cb_cfl_fac = 0.0, last_cr_cfl_fac = 0.0;
572 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
573 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
575 // TF switch: Two 8x8 luma blocks -> one 16x8 block, then drop high frequencies
576 printf("in blocks:\n");
577 for (unsigned y = 0; y < 8; ++y) {
578 for (unsigned x = 0; x < 8; ++x) {
579 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
583 for (unsigned x = 0; x < 8; ++x) {
584 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
591 for (unsigned y = 0; y < 8; ++y) {
592 for (unsigned x = 0; x < 4; ++x) {
593 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
594 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
597 in_y[y * 8 + x * 2 + 0] = a;
598 in_y[y * 8 + x * 2 + 1] = b;
602 printf("tf-ed block:\n");
603 for (unsigned y = 0; y < 8; ++y) {
604 for (unsigned x = 0; x < 8; ++x) {
605 short a = in_y[y * 8 + x];
611 // Read Y block with no tf switch (from reconstructed luma)
613 for (unsigned y = 0; y < 8; ++y) {
614 for (unsigned x = 0; x < 8; ++x) {
615 in_y[y * 8 + x] = pix_y[(yb + y) * (WIDTH) + (xb + x) * 2];
622 short in_cb[64], in_cr[64];
623 for (unsigned y = 0; y < 8; ++y) {
624 for (unsigned x = 0; x < 8; ++x) {
625 in_cb[y * 8 + x] = pix_cb[(yb + y) * (WIDTH/2) + (xb + x)];
626 in_cr[y * 8 + x] = pix_cr[(yb + y) * (WIDTH/2) + (xb + x)];
639 double denom = (x0 * x0 + x1 * x1 + x2 * x2);
640 //double denom = (x1 * x1);
642 double y0 = in_cb[1];
643 double y1 = in_cb[8];
644 double y2 = in_cb[9];
645 double cb_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
646 //double cb_cfl_fac = (x1 * y1) / denom;
648 for (unsigned y = 0; y < 8; ++y) {
649 for (unsigned x = 0; x < 8; ++x) {
650 short a = in_y[y * 8 + x];
654 for (unsigned x = 0; x < 8; ++x) {
655 short a = in_cb[y * 8 + x];
660 printf("(%d,%d,%d) -> (%d,%d,%d) gives %f\n",
661 in_y[1], in_y[8], in_y[9],
662 in_cb[1], in_cb[8], in_cb[9],
668 double cr_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
669 //double cr_cfl_fac = (x1 * y1) / denom;
670 printf("cb CfL = %7.3f dc = %5d cr CfL = %7.3f dc = %d\n",
671 cb_cfl_fac, in_cb[0] - in_y[0],
672 cr_cfl_fac, in_cr[0] - in_y[0]);
674 if (denom == 0.0) { cb_cfl_fac = cr_cfl_fac = 0.0; }
677 //last_cb_cfl_fac = cb_cfl_fac;
678 //last_cr_cfl_fac = cr_cfl_fac;
680 for (unsigned coeff_idx = 1; coeff_idx < 64; ++coeff_idx) {
681 //printf("%2d: cb = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cb[coeff_idx], last_cb_cfl_fac, in_y[coeff_idx], last_cb_cfl_fac * in_y[coeff_idx]);
682 //printf("%2d: cr = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cr[coeff_idx], last_cr_cfl_fac, in_y[coeff_idx], last_cr_cfl_fac * in_y[coeff_idx]);
683 double cb_pred = last_cb_cfl_fac * in_y[coeff_idx];
684 chroma_energy += in_cb[coeff_idx] * in_cb[coeff_idx];
685 chroma_energy_pred += (in_cb[coeff_idx] - cb_pred) * (in_cb[coeff_idx] - cb_pred);
687 //in_cb[coeff_idx] -= lrint(last_cb_cfl_fac * in_y[coeff_idx]);
688 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
689 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
690 //in_cb[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
691 //in_cr[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
692 //in_cb[coeff_idx] = lrint(in_y[coeff_idx]);
693 //in_cr[coeff_idx] = lrint(in_y[coeff_idx]);
697 //in_cb[0] -= in_y[0];
698 //in_cr[0] -= in_y[0];
701 for (unsigned y = 0; y < 8; ++y) {
702 for (unsigned x = 0; x < 8; ++x) {
703 int coeff_idx = y * 8 + x;
704 int k_cb = quantize(in_cb[coeff_idx], coeff_idx);
705 coeff_cb[(yb + y) * (WIDTH/2) + (xb + x)] = k_cb;
706 int k_cr = quantize(in_cr[coeff_idx], coeff_idx);
707 coeff_cr[(yb + y) * (WIDTH/2) + (xb + x)] = k_cr;
709 // Store back for reconstruction / PSNR calculation
710 in_cb[coeff_idx] = unquantize(k_cb, coeff_idx);
711 in_cr[coeff_idx] = unquantize(k_cr, coeff_idx);
715 idct_int32(in_y); // DEBUG
719 for (unsigned y = 0; y < 8; ++y) {
720 for (unsigned x = 0; x < 8; ++x) {
721 pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cb[y * 8 + x]);
722 pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cr[y * 8 + x]);
724 // pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
725 // pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
730 last_cb_cfl_fac = cb_cfl_fac;
731 last_cr_cfl_fac = cr_cfl_fac;
737 printf("chroma_energy = %f, with_pred = %f\n",
738 chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
741 // DC coefficient pred from the right to left
742 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
743 for (unsigned xb = 0; xb < WIDTH - 8; xb += 8) {
744 coeff_y[yb * WIDTH + xb] -= coeff_y[yb * WIDTH + (xb + 8)];
748 FILE *fp = fopen("reconstructed.pgm", "wb");
749 fprintf(fp, "P5\n%d %d\n255\n", WIDTH, HEIGHT);
750 fwrite(pix_y, 1, WIDTH * HEIGHT, fp);
753 fp = fopen("reconstructed.pnm", "wb");
754 fprintf(fp, "P6\n%d %d\n255\n", WIDTH, HEIGHT);
755 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
756 for (unsigned xb = 0; xb < WIDTH; ++xb) {
757 int y = pix_y[(yb * WIDTH) + xb];
759 int c0 = yb * (WIDTH/2) + xb/2;
761 cb = pix_cb[c0] - 128.0;
762 cr = pix_cr[c0] - 128.0;
764 int c1 = yb * (WIDTH/2) + std::min<int>(xb/2 + 1, WIDTH/2 - 1);
765 cb = 0.5 * (pix_cb[c0] + pix_cb[c1]) - 128.0;
766 cr = 0.5 * (pix_cr[c0] + pix_cr[c1]) - 128.0;
769 double r = y + 1.5748 * cr;
770 double g = y - 0.1873 * cb - 0.4681 * cr;
771 double b = y + 1.8556 * cb;
773 putc(clamp(lrint(r)), fp);
774 putc(clamp(lrint(g)), fp);
775 putc(clamp(lrint(b)), fp);
780 // For each coefficient, make some tables.
781 size_t extra_bits = 0, sign_bits = 0;
782 for (unsigned i = 0; i < 64; ++i) {
785 for (unsigned y = 0; y < 8; ++y) {
786 for (unsigned x = 0; x < 8; ++x) {
787 SymbolStats &s_luma = stats[pick_stats_for(x, y)];
788 SymbolStats &s_chroma = stats[pick_stats_for(x, y) + 8]; // HACK
789 //SymbolStats &s_chroma = stats[pick_stats_for(x, y)];
792 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
793 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
794 short k = abs(coeff_y[(yb + y) * WIDTH + (xb + x)]);
795 if (k >= ESCAPE_LIMIT) {
797 extra_bits += 12; // escape this one
799 if (k != 0) ++sign_bits;
804 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
805 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
806 short k_cb = abs(coeff_cb[(yb + y) * WIDTH/2 + (xb + x)]);
807 short k_cr = abs(coeff_cr[(yb + y) * WIDTH/2 + (xb + x)]);
808 if (k_cb >= ESCAPE_LIMIT) {
810 extra_bits += 12; // escape this one
812 if (k_cr >= ESCAPE_LIMIT) {
814 extra_bits += 12; // escape this one
816 if (k_cb != 0) ++sign_bits;
817 if (k_cr != 0) ++sign_bits;
818 ++s_chroma.freqs[k_cb];
819 ++s_chroma.freqs[k_cr];
824 for (unsigned i = 0; i < 64; ++i) {
825 stats[i].normalize_freqs(prob_scale);
828 FILE *codedfp = fopen("coded.dat", "wb");
829 if (codedfp == nullptr) {
834 // TODO: rather gamma-k or something
835 for (unsigned i = 0; i < 64; ++i) {
836 if (stats[i].cum_freqs[NUM_SYMS] == 0) {
839 printf("writing table %d\n", i);
840 for (unsigned j = 0; j < NUM_SYMS; ++j) {
841 write_varint(stats[i].freqs[j], codedfp);
845 RansEncoder rans_encoder;
847 size_t tot_bytes = 0;
850 for (unsigned y = 0; y < 8; ++y) {
851 for (unsigned x = 0; x < 8; ++x) {
852 SymbolStats &s_luma = stats[pick_stats_for(x, y)];
853 rans_encoder.init_prob(s_luma);
857 // need to reverse later
858 rans_encoder.clear();
859 size_t num_bytes = 0;
860 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
861 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
862 int k = coeff_y[(yb + y) * WIDTH + (xb + x)];
863 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
864 rans_encoder.encode_coeff(k);
867 num_bytes += rans_encoder.save_block(codedfp);
870 if (HEIGHT % 16 != 0) {
871 num_bytes += rans_encoder.save_block(codedfp);
873 tot_bytes += num_bytes;
874 printf("coeff %d Y': %ld bytes\n", y * 8 + x, num_bytes);
879 for (unsigned y = 0; y < 8; ++y) {
880 for (unsigned x = 0; x < 8; ++x) {
881 SymbolStats &s_chroma = stats[pick_stats_for(x, y) + 8];
882 //SymbolStats &s_chroma = stats[pick_stats_for(x, y)];
883 rans_encoder.init_prob(s_chroma);
885 rans_encoder.clear();
886 size_t num_bytes = 0;
887 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
888 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
889 int k = coeff_cb[(yb + y) * WIDTH/2 + (xb + x)];
890 rans_encoder.encode_coeff(k);
893 num_bytes += rans_encoder.save_block(codedfp);
896 if (HEIGHT % 16 != 0) {
897 num_bytes += rans_encoder.save_block(codedfp);
899 tot_bytes += num_bytes;
900 printf("coeff %d Cb: %ld bytes\n", y * 8 + x, num_bytes);
905 for (unsigned y = 0; y < 8; ++y) {
906 for (unsigned x = 0; x < 8; ++x) {
907 SymbolStats &s_chroma = stats[pick_stats_for(x, y) + 8];
908 //SymbolStats &s_chroma = stats[pick_stats_for(x, y)];
909 rans_encoder.init_prob(s_chroma);
911 rans_encoder.clear();
912 size_t num_bytes = 0;
913 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
914 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
915 int k = coeff_cr[(yb + y) * WIDTH/2 + (xb + x)];
916 rans_encoder.encode_coeff(k);
919 num_bytes += rans_encoder.save_block(codedfp);
922 if (HEIGHT % 16 != 0) {
923 num_bytes += rans_encoder.save_block(codedfp);
925 tot_bytes += num_bytes;
926 printf("coeff %d Cr: %ld bytes\n", y * 8 + x, num_bytes);
930 printf("%ld bytes + %ld sign bits (%ld) + %ld escape bits (%ld) = %ld total bytes\n",
931 tot_bytes - sign_bits / 8 - extra_bits / 8,