8 //#include "ryg_rans/rans64.h"
9 #include "ryg_rans/rans_byte.h"
10 #include "ryg_rans/renormalize.h"
17 #include <unordered_map>
21 #define WIDTH_BLOCKS (WIDTH/8)
22 #define WIDTH_BLOCKS_CHROMA (WIDTH/16)
23 #define HEIGHT_BLOCKS (HEIGHT/8)
24 #define NUM_BLOCKS (WIDTH_BLOCKS * HEIGHT_BLOCKS)
25 #define NUM_BLOCKS_CHROMA (WIDTH_BLOCKS_CHROMA * HEIGHT_BLOCKS)
27 #define ESCAPE_LIMIT (NUM_SYMS - 1)
29 // If you set this to 1, the program will try to optimize the placement
30 // of coefficients to rANS probability distributions. This is randomized,
31 // so you might want to run it a few times.
32 #define FIND_OPTIMAL_STREAM_ASSIGNMENT 0
33 #define NUM_CLUSTERS 4
35 static constexpr uint32_t prob_bits = 12;
36 static constexpr uint32_t prob_scale = 1 << prob_bits;
40 void fdct_int32(short *const In);
41 void idct_int32(short *const In);
43 unsigned char rgb[WIDTH * HEIGHT * 3];
44 unsigned char pix_y[WIDTH * HEIGHT];
45 unsigned char pix_cb[(WIDTH/2) * HEIGHT];
46 unsigned char pix_cr[(WIDTH/2) * HEIGHT];
47 unsigned char full_pix_cb[WIDTH * HEIGHT];
48 unsigned char full_pix_cr[WIDTH * HEIGHT];
49 short coeff_y[WIDTH * HEIGHT], coeff_cb[(WIDTH/2) * HEIGHT], coeff_cr[(WIDTH/2) * HEIGHT];
54 if (x > 255) return 255;
58 static const unsigned char std_luminance_quant_tbl[64] = {
60 16, 11, 10, 16, 24, 40, 51, 61,
61 12, 12, 14, 19, 26, 58, 60, 55,
62 14, 13, 16, 24, 40, 57, 69, 56,
63 14, 17, 22, 29, 51, 87, 80, 62,
64 18, 22, 37, 56, 68, 109, 103, 77,
65 24, 35, 55, 64, 81, 104, 113, 92,
66 49, 64, 78, 87, 103, 121, 120, 101,
67 72, 92, 95, 98, 112, 100, 103, 99
69 // ff_mpeg1_default_intra_matrix
70 8, 16, 19, 22, 26, 27, 29, 34,
71 16, 16, 22, 24, 27, 29, 34, 37,
72 19, 22, 26, 27, 29, 34, 34, 38,
73 22, 22, 26, 27, 29, 34, 37, 40,
74 22, 26, 27, 29, 32, 35, 40, 48,
75 26, 27, 29, 32, 35, 40, 48, 58,
76 26, 27, 29, 34, 38, 46, 56, 69,
77 27, 29, 35, 38, 46, 56, 69, 83
83 uint32_t freqs[NUM_SYMS];
84 uint32_t cum_freqs[NUM_SYMS + 1];
87 void calc_cum_freqs();
88 void normalize_freqs(uint32_t target_total);
91 void SymbolStats::clear()
93 for (int i=0; i < NUM_SYMS; i++)
97 void SymbolStats::calc_cum_freqs()
100 for (int i=0; i < NUM_SYMS; i++)
101 cum_freqs[i+1] = cum_freqs[i] + freqs[i];
104 void SymbolStats::normalize_freqs(uint32_t target_total)
106 uint64_t real_freq[NUM_SYMS + 1]; // hack
108 assert(target_total >= NUM_SYMS);
111 uint32_t cur_total = cum_freqs[NUM_SYMS];
113 if (cur_total == 0) return;
115 double ideal_cost = 0.0;
116 for (int i = 1; i <= NUM_SYMS; i++)
118 real_freq[i] = cum_freqs[i] - cum_freqs[i - 1];
119 if (real_freq[i] > 0)
120 ideal_cost -= real_freq[i] * log2(real_freq[i] / double(cur_total));
123 OptimalRenormalize(cum_freqs, NUM_SYMS, prob_scale);
125 // calculate updated freqs and make sure we didn't screw anything up
126 assert(cum_freqs[0] == 0 && cum_freqs[NUM_SYMS] == target_total);
127 for (int i=0; i < NUM_SYMS; i++) {
129 assert(cum_freqs[i+1] == cum_freqs[i]);
131 assert(cum_freqs[i+1] > cum_freqs[i]);
134 freqs[i] = cum_freqs[i+1] - cum_freqs[i];
137 double calc_cost = 0.0;
138 for (int i = 1; i <= NUM_SYMS; i++)
140 uint64_t freq = cum_freqs[i] - cum_freqs[i - 1];
141 if (real_freq[i] > 0)
142 calc_cost -= real_freq[i] * log2(freq / double(target_total));
145 static double total_loss = 0.0;
146 total_loss += calc_cost - ideal_cost;
147 static double total_loss_with_dp = 0.0;
148 double optimal_cost = 0.0;
149 //total_loss_with_dp += optimal_cost - ideal_cost;
150 printf("ideal cost = %.0f bits, DP cost = %.0f bits, calc cost = %.0f bits (loss = %.2f bytes, total loss = %.2f bytes, total loss with DP = %.2f bytes)\n",
151 ideal_cost, optimal_cost,
152 calc_cost, (calc_cost - ideal_cost) / 8.0, total_loss / 8.0, total_loss_with_dp / 8.0);
155 SymbolStats stats[128];
157 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
158 // Distance from one stream to the other, based on a hacked-up K-L divergence.
159 float kl_dist[64][64];
162 const int luma_mapping[64] = {
163 0, 0, 1, 1, 2, 2, 3, 3,
164 0, 0, 1, 2, 2, 2, 3, 3,
165 1, 1, 2, 2, 2, 3, 3, 3,
166 1, 1, 2, 2, 2, 3, 3, 3,
167 1, 2, 2, 2, 2, 3, 3, 3,
168 2, 2, 2, 2, 3, 3, 3, 3,
169 2, 2, 3, 3, 3, 3, 3, 3,
170 3, 3, 3, 3, 3, 3, 3, 3,
172 const int chroma_mapping[64] = {
173 0, 1, 1, 2, 2, 2, 3, 3,
174 1, 1, 2, 2, 2, 3, 3, 3,
175 2, 2, 2, 2, 3, 3, 3, 3,
176 2, 2, 2, 3, 3, 3, 3, 3,
177 2, 3, 3, 3, 3, 3, 3, 3,
178 3, 3, 3, 3, 3, 3, 3, 3,
179 3, 3, 3, 3, 3, 3, 3, 3,
180 3, 3, 3, 3, 3, 3, 3, 3,
183 int pick_stats_for(int x, int y, bool is_chroma)
185 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
186 return y * 8 + x + is_chroma * 64;
189 return chroma_mapping[y * 8 + x] + 4;
191 return luma_mapping[y * 8 + x];
197 void write_varint(int x, FILE *fp)
200 putc((x & 0x7f) | 0x80, fp);
210 out_buf.reset(new uint8_t[out_max_size]);
214 void init_prob(SymbolStats &s)
216 for (int i = 0; i < NUM_SYMS; i++) {
217 printf("%d: cumfreqs=%d freqs=%d prob_bits=%d\n", i, s.cum_freqs[i], s.freqs[i], prob_bits + 1);
218 RansEncSymbolInit(&esyms[i], s.cum_freqs[i], s.freqs[i], prob_bits + 1);
220 sign_bias = s.cum_freqs[NUM_SYMS];
225 out_end = out_buf.get() + out_max_size;
226 ptr = out_end; // *end* of output buffer
230 uint32_t save_block(FILE *codedfp) // Returns number of bytes.
232 RansEncFlush(&rans, &ptr);
233 //printf("post-flush = %08x\n", rans);
235 uint32_t num_rans_bytes = out_end - ptr;
237 if (num_rans_bytes == 4) {
239 memcpy(&block, ptr, 4);
241 if (block == last_block) {
242 write_varint(0, codedfp);
253 write_varint(num_rans_bytes, codedfp);
254 //fwrite(&num_rans_bytes, 1, 4, codedfp);
255 fwrite(ptr, 1, num_rans_bytes, codedfp);
257 //printf("first rANS bytes: %02x %02x %02x %02x %02x %02x %02x %02x\n", ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5], ptr[6], ptr[7]);
262 printf("Saving block: %d rANS bytes\n", num_rans_bytes);
263 return num_rans_bytes;
264 //return num_rans_bytes;
267 void encode_coeff(short signed_k)
269 //printf("encoding coeff %d (sym %d), rans before encoding = %08x\n", signed_k, ((abs(signed_k) - 1) & 255), rans);
270 unsigned short k = abs(signed_k);
271 if (k >= ESCAPE_LIMIT) {
272 // Put the coefficient as a 1/(2^12) symbol _before_
273 // the 255 coefficient, since the decoder will read the
274 // 255 coefficient first.
275 RansEncPut(&rans, &ptr, k, 1, prob_bits);
278 RansEncPutSymbol(&rans, &ptr, &esyms[(k - 1) & (NUM_SYMS - 1)]);
285 static constexpr size_t out_max_size = 32 << 20; // 32 MB.
286 static constexpr size_t max_num_sign = 1048576; // Way too big. And actually bytes.
288 unique_ptr<uint8_t[]> out_buf;
292 RansEncSymbol esyms[NUM_SYMS];
295 uint32_t last_block = 0; // Not a valid 4-byte rANS block (?)
298 static constexpr int dc_scalefac = 8; // Matches the FDCT's gain.
299 static constexpr double quant_scalefac = 4.0; // whatever?
301 static inline int quantize(int f, int coeff_idx)
303 if (coeff_idx == 0) {
304 return f / dc_scalefac;
310 const int w = std_luminance_quant_tbl[coeff_idx];
311 const int s = quant_scalefac;
312 int sign_f = (f > 0) ? 1 : -1;
313 return (32 * f + sign_f * w * s) / (2 * w * s);
316 static inline int unquantize(int qf, int coeff_idx)
318 if (coeff_idx == 0) {
319 return qf * dc_scalefac;
325 const int w = std_luminance_quant_tbl[coeff_idx];
326 const int s = quant_scalefac;
327 return (2 * qf * w * s) / 32;
330 void readpix(unsigned char *ptr, const char *filename)
332 FILE *fp = fopen(filename, "rb");
338 fseek(fp, 0, SEEK_END);
339 long len = ftell(fp);
340 assert(len >= WIDTH * HEIGHT * 3);
341 fseek(fp, len - WIDTH * HEIGHT * 3, SEEK_SET);
343 fread(ptr, 1, WIDTH * HEIGHT * 3, fp);
349 double coeff[3] = { 0.2126, 0.7152, 0.0722 }; // sum = 1.0
350 double cb_fac = 1.0 / (coeff[0] + coeff[1] + 1.0f - coeff[2]); // 0.539
351 double cr_fac = 1.0 / (1.0f - coeff[0] + coeff[1] + coeff[2]); // 0.635
353 unique_ptr<float[]> temp_cb(new float[WIDTH * HEIGHT]);
354 unique_ptr<float[]> temp_cr(new float[WIDTH * HEIGHT]);
355 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
356 for (unsigned xb = 0; xb < WIDTH; ++xb) {
357 int r = rgb[((yb * WIDTH) + xb) * 3 + 0];
358 int g = rgb[((yb * WIDTH) + xb) * 3 + 1];
359 int b = rgb[((yb * WIDTH) + xb) * 3 + 2];
360 double y = std::min(std::max(coeff[0] * r + coeff[1] * g + coeff[2] * b, 0.0), 255.0);
361 double cb = (b - y) * cb_fac + 128.0;
362 double cr = (r - y) * cr_fac + 128.0;
363 pix_y[(yb * WIDTH) + xb] = lrint(y);
364 temp_cb[(yb * WIDTH) + xb] = cb;
365 temp_cr[(yb * WIDTH) + xb] = cr;
366 full_pix_cb[(yb * WIDTH) + xb] = lrint(std::min(std::max(cb, 0.0), 255.0));
367 full_pix_cr[(yb * WIDTH) + xb] = lrint(std::min(std::max(cr, 0.0), 255.0));
371 // Simple 4:2:2 subsampling with left convention.
372 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
373 for (unsigned xb = 0; xb < WIDTH / 2; ++xb) {
374 int c0 = yb * WIDTH + std::max(int(xb) * 2 - 1, 0);
375 int c1 = yb * WIDTH + xb * 2;
376 int c2 = yb * WIDTH + xb * 2 + 1;
378 double cb = 0.25 * temp_cb[c0] + 0.5 * temp_cb[c1] + 0.25 * temp_cb[c2];
379 double cr = 0.25 * temp_cr[c0] + 0.5 * temp_cr[c1] + 0.25 * temp_cr[c2];
380 cb = std::min(std::max(cb, 0.0), 255.0);
381 cr = std::min(std::max(cr, 0.0), 255.0);
382 pix_cb[(yb * WIDTH/2) + xb] = lrint(cb);
383 pix_cr[(yb * WIDTH/2) + xb] = lrint(cr);
388 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
389 double find_best_assignment(const int *medoids, int *assignment)
391 double current_score = 0.0;
392 for (int i = 0; i < 64; ++i) {
393 int best_medoid = medoids[0];
394 float best_medoid_score = kl_dist[i][medoids[0]];
395 for (int j = 1; j < NUM_CLUSTERS; ++j) {
396 if (kl_dist[i][medoids[j]] < best_medoid_score) {
397 best_medoid = medoids[j];
398 best_medoid_score = kl_dist[i][medoids[j]];
401 assignment[i] = best_medoid;
402 current_score += best_medoid_score;
404 return current_score;
407 void find_optimal_stream_assignment(int base)
410 for (unsigned i = 0; i < 64; ++i) {
412 for (unsigned k = 0; k < NUM_SYMS; ++k) {
413 s += stats[i + base].freqs[k] + 0.5;
415 inv_sum[i] = 1.0 / s;
418 for (unsigned i = 0; i < 64; ++i) {
419 for (unsigned j = 0; j < 64; ++j) {
421 for (unsigned k = 0; k < NUM_SYMS; ++k) {
422 double p1 = (stats[i + base].freqs[k] + 0.5) * inv_sum[i];
423 double p2 = (stats[j + base].freqs[k] + 0.5) * inv_sum[j];
425 // K-L divergence is asymmetric; this is a hack.
426 d += p1 * log(p1 / p2);
427 d += p2 * log(p2 / p1);
430 //printf("%.3f ", d);
436 int medoids[64]; // only the first NUM_CLUSTERS matter
437 bool is_medoid[64] = { false };
438 std::iota(medoids, medoids + 64, 0);
439 std::random_device rd;
440 std::mt19937 g(rd());
441 std::shuffle(medoids, medoids + 64, g);
442 for (int i = 0; i < NUM_CLUSTERS; ++i) {
443 printf("%d ", medoids[i]);
444 is_medoid[medoids[i]] = true;
448 // assign each data point to the closest medoid
450 double current_score = find_best_assignment(medoids, assignment);
452 for (int i = 0; i < 1000; ++i) {
453 printf("iter %d\n", i);
454 bool any_changed = false;
455 for (int m = 0; m < NUM_CLUSTERS; ++m) {
456 for (int o = 0; o < 64; ++o) {
457 if (is_medoid[o]) continue;
458 int old_medoid = medoids[m];
461 int new_assignment[64];
462 double candidate_score = find_best_assignment(medoids, new_assignment);
464 if (candidate_score < current_score) {
465 current_score = candidate_score;
466 memcpy(assignment, new_assignment, sizeof(assignment));
468 is_medoid[old_medoid] = false;
469 is_medoid[medoids[m]] = true;
470 printf("%f: ", current_score);
471 for (int i = 0; i < 64; ++i) {
472 printf("%d ", assignment[i]);
477 medoids[m] = old_medoid;
481 if (!any_changed) break;
484 std::unordered_map<int, int> rmap;
485 for (int i = 0; i < 64; ++i) {
486 if (i % 8 == 0) printf("\n");
487 if (!rmap.count(assignment[i])) {
488 rmap.emplace(assignment[i], rmap.size());
490 printf("%d, ", rmap[assignment[i]]);
496 int main(int argc, char **argv)
499 readpix(rgb, argv[1]);
501 readpix(rgb, "color.pnm");
504 double sum_sq_err = 0.0;
505 //double last_cb_cfl_fac = 0.0;
506 //double last_cr_cfl_fac = 0.0;
508 // DCT and quantize luma
509 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
510 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
513 for (unsigned y = 0; y < 8; ++y) {
514 for (unsigned x = 0; x < 8; ++x) {
515 in_y[y * 8 + x] = pix_y[(yb + y) * WIDTH + (xb + x)];
522 for (unsigned y = 0; y < 8; ++y) {
523 for (unsigned x = 0; x < 8; ++x) {
524 int coeff_idx = y * 8 + x;
525 int k = quantize(in_y[coeff_idx], coeff_idx);
526 coeff_y[(yb + y) * WIDTH + (xb + x)] = k;
528 // Store back for reconstruction / PSNR calculation
529 in_y[coeff_idx] = unquantize(k, coeff_idx);
535 for (unsigned y = 0; y < 8; ++y) {
536 for (unsigned x = 0; x < 8; ++x) {
537 int k = clamp(in_y[y * 8 + x]);
538 uint8_t *ptr = &pix_y[(yb + y) * WIDTH + (xb + x)];
539 sum_sq_err += (*ptr - k) * (*ptr - k);
545 double mse = sum_sq_err / double(WIDTH * HEIGHT);
546 double psnr_db = 20 * log10(255.0 / sqrt(mse));
547 printf("psnr = %.2f dB\n", psnr_db);
549 //double chroma_energy = 0.0, chroma_energy_pred = 0.0;
551 // DCT and quantize chroma
552 //double last_cb_cfl_fac = 0.0, last_cr_cfl_fac = 0.0;
553 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
554 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
556 // TF switch: Two 8x8 luma blocks -> one 16x8 block, then drop high frequencies
557 printf("in blocks:\n");
558 for (unsigned y = 0; y < 8; ++y) {
559 for (unsigned x = 0; x < 8; ++x) {
560 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
564 for (unsigned x = 0; x < 8; ++x) {
565 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
572 for (unsigned y = 0; y < 8; ++y) {
573 for (unsigned x = 0; x < 4; ++x) {
574 short a = coeff_y[(yb + y) * WIDTH + (xb*2 + x)];
575 short b = coeff_y[(yb + y) * WIDTH + (xb*2 + x + 8)];
578 in_y[y * 8 + x * 2 + 0] = a;
579 in_y[y * 8 + x * 2 + 1] = b;
583 printf("tf-ed block:\n");
584 for (unsigned y = 0; y < 8; ++y) {
585 for (unsigned x = 0; x < 8; ++x) {
586 short a = in_y[y * 8 + x];
592 // Read Y block with no tf switch (from reconstructed luma)
594 for (unsigned y = 0; y < 8; ++y) {
595 for (unsigned x = 0; x < 8; ++x) {
596 in_y[y * 8 + x] = pix_y[(yb + y) * (WIDTH) + (xb + x) * 2];
603 short in_cb[64], in_cr[64];
604 for (unsigned y = 0; y < 8; ++y) {
605 for (unsigned x = 0; x < 8; ++x) {
606 in_cb[y * 8 + x] = pix_cb[(yb + y) * (WIDTH/2) + (xb + x)];
607 in_cr[y * 8 + x] = pix_cr[(yb + y) * (WIDTH/2) + (xb + x)];
620 double denom = (x0 * x0 + x1 * x1 + x2 * x2);
621 //double denom = (x1 * x1);
623 double y0 = in_cb[1];
624 double y1 = in_cb[8];
625 double y2 = in_cb[9];
626 double cb_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
627 //double cb_cfl_fac = (x1 * y1) / denom;
629 for (unsigned y = 0; y < 8; ++y) {
630 for (unsigned x = 0; x < 8; ++x) {
631 short a = in_y[y * 8 + x];
635 for (unsigned x = 0; x < 8; ++x) {
636 short a = in_cb[y * 8 + x];
641 printf("(%d,%d,%d) -> (%d,%d,%d) gives %f\n",
642 in_y[1], in_y[8], in_y[9],
643 in_cb[1], in_cb[8], in_cb[9],
649 double cr_cfl_fac = (x0 * y0 + x1 * y1 + x2 * y2) / denom;
650 //double cr_cfl_fac = (x1 * y1) / denom;
651 printf("cb CfL = %7.3f dc = %5d cr CfL = %7.3f dc = %d\n",
652 cb_cfl_fac, in_cb[0] - in_y[0],
653 cr_cfl_fac, in_cr[0] - in_y[0]);
655 if (denom == 0.0) { cb_cfl_fac = cr_cfl_fac = 0.0; }
658 //last_cb_cfl_fac = cb_cfl_fac;
659 //last_cr_cfl_fac = cr_cfl_fac;
661 for (unsigned coeff_idx = 1; coeff_idx < 64; ++coeff_idx) {
662 //printf("%2d: cb = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cb[coeff_idx], last_cb_cfl_fac, in_y[coeff_idx], last_cb_cfl_fac * in_y[coeff_idx]);
663 //printf("%2d: cr = %3d prediction = %f * %3d = %7.3f\n", coeff_idx, in_cr[coeff_idx], last_cr_cfl_fac, in_y[coeff_idx], last_cr_cfl_fac * in_y[coeff_idx]);
664 double cb_pred = last_cb_cfl_fac * in_y[coeff_idx];
665 chroma_energy += in_cb[coeff_idx] * in_cb[coeff_idx];
666 chroma_energy_pred += (in_cb[coeff_idx] - cb_pred) * (in_cb[coeff_idx] - cb_pred);
668 //in_cb[coeff_idx] -= lrint(last_cb_cfl_fac * in_y[coeff_idx]);
669 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
670 //in_cr[coeff_idx] -= lrint(last_cr_cfl_fac * in_y[coeff_idx]);
671 //in_cb[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
672 //in_cr[coeff_idx] = lrint(in_y[coeff_idx] * (1.0 / sqrt(2)));
673 //in_cb[coeff_idx] = lrint(in_y[coeff_idx]);
674 //in_cr[coeff_idx] = lrint(in_y[coeff_idx]);
678 //in_cb[0] -= in_y[0];
679 //in_cr[0] -= in_y[0];
682 for (unsigned y = 0; y < 8; ++y) {
683 for (unsigned x = 0; x < 8; ++x) {
684 int coeff_idx = y * 8 + x;
685 int k_cb = quantize(in_cb[coeff_idx], coeff_idx);
686 coeff_cb[(yb + y) * (WIDTH/2) + (xb + x)] = k_cb;
687 int k_cr = quantize(in_cr[coeff_idx], coeff_idx);
688 coeff_cr[(yb + y) * (WIDTH/2) + (xb + x)] = k_cr;
690 // Store back for reconstruction / PSNR calculation
691 in_cb[coeff_idx] = unquantize(k_cb, coeff_idx);
692 in_cr[coeff_idx] = unquantize(k_cr, coeff_idx);
696 idct_int32(in_y); // DEBUG
700 for (unsigned y = 0; y < 8; ++y) {
701 for (unsigned x = 0; x < 8; ++x) {
702 pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cb[y * 8 + x]);
703 pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = clamp(in_cr[y * 8 + x]);
705 // pix_cb[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
706 // pix_cr[(yb + y) * (WIDTH/2) + (xb + x)] = in_y[y * 8 + x];
711 last_cb_cfl_fac = cb_cfl_fac;
712 last_cr_cfl_fac = cr_cfl_fac;
718 printf("chroma_energy = %f, with_pred = %f\n",
719 chroma_energy / (WIDTH * HEIGHT), chroma_energy_pred / (WIDTH * HEIGHT));
722 // DC coefficient pred from the right to left
723 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
724 for (unsigned xb = 0; xb < WIDTH - 8; xb += 8) {
725 coeff_y[yb * WIDTH + xb] -= coeff_y[yb * WIDTH + (xb + 8)];
728 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
729 for (unsigned xb = 0; xb < WIDTH/2 - 8; xb += 8) {
730 coeff_cb[yb * WIDTH/2 + xb] -= coeff_cb[yb * WIDTH/2 + (xb + 8)];
731 coeff_cr[yb * WIDTH/2 + xb] -= coeff_cr[yb * WIDTH/2 + (xb + 8)];
735 FILE *fp = fopen("reconstructed.pgm", "wb");
736 fprintf(fp, "P5\n%d %d\n255\n", WIDTH, HEIGHT);
737 fwrite(pix_y, 1, WIDTH * HEIGHT, fp);
740 fp = fopen("reconstructed.pnm", "wb");
741 fprintf(fp, "P6\n%d %d\n255\n", WIDTH, HEIGHT);
742 for (unsigned yb = 0; yb < HEIGHT; ++yb) {
743 for (unsigned xb = 0; xb < WIDTH; ++xb) {
744 int y = pix_y[(yb * WIDTH) + xb];
746 int c0 = yb * (WIDTH/2) + xb/2;
748 cb = pix_cb[c0] - 128.0;
749 cr = pix_cr[c0] - 128.0;
751 int c1 = yb * (WIDTH/2) + std::min<int>(xb/2 + 1, WIDTH/2 - 1);
752 cb = 0.5 * (pix_cb[c0] + pix_cb[c1]) - 128.0;
753 cr = 0.5 * (pix_cr[c0] + pix_cr[c1]) - 128.0;
756 double r = y + 1.5748 * cr;
757 double g = y - 0.1873 * cb - 0.4681 * cr;
758 double b = y + 1.8556 * cb;
760 putc(clamp(lrint(r)), fp);
761 putc(clamp(lrint(g)), fp);
762 putc(clamp(lrint(b)), fp);
767 // For each coefficient, make some tables.
768 size_t extra_bits = 0;
769 for (unsigned i = 0; i < 64; ++i) {
772 for (unsigned y = 0; y < 8; ++y) {
773 for (unsigned x = 0; x < 8; ++x) {
774 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
775 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
778 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
779 for (unsigned xb = 0; xb < WIDTH; xb += 8) {
780 unsigned short k = abs(coeff_y[(yb + y) * WIDTH + (xb + x)]);
781 if (k >= ESCAPE_LIMIT) {
783 extra_bits += 12; // escape this one
785 ++s_luma.freqs[(k - 1) & (NUM_SYMS - 1)];
789 for (unsigned yb = 0; yb < HEIGHT; yb += 8) {
790 for (unsigned xb = 0; xb < WIDTH/2; xb += 8) {
791 unsigned short k_cb = abs(coeff_cb[(yb + y) * WIDTH/2 + (xb + x)]);
792 unsigned short k_cr = abs(coeff_cr[(yb + y) * WIDTH/2 + (xb + x)]);
793 if (k_cb >= ESCAPE_LIMIT) {
795 extra_bits += 12; // escape this one
797 if (k_cr >= ESCAPE_LIMIT) {
799 extra_bits += 12; // escape this one
801 ++s_chroma.freqs[(k_cb - 1) & (NUM_SYMS - 1)];
802 ++s_chroma.freqs[(k_cr - 1) & (NUM_SYMS - 1)];
808 #if FIND_OPTIMAL_STREAM_ASSIGNMENT
810 find_optimal_stream_assignment(0);
812 find_optimal_stream_assignment(64);
816 for (unsigned i = 0; i < 64; ++i) {
817 stats[i].freqs[NUM_SYMS - 1] /= 2; // zero, has no sign bits (yes, this is trickery)
818 stats[i].normalize_freqs(prob_scale);
819 stats[i].cum_freqs[NUM_SYMS] += stats[i].freqs[NUM_SYMS - 1];
820 stats[i].freqs[NUM_SYMS - 1] *= 2;
823 FILE *codedfp = fopen("coded.dat", "wb");
824 if (codedfp == nullptr) {
829 // TODO: rather gamma-k or something
830 for (unsigned i = 0; i < 64; ++i) {
831 if (stats[i].cum_freqs[NUM_SYMS] == 0) {
834 printf("writing table %d\n", i);
835 for (unsigned j = 0; j < NUM_SYMS; ++j) {
836 write_varint(stats[i].freqs[j], codedfp);
840 RansEncoder rans_encoder;
842 size_t tot_bytes = 0;
845 for (unsigned y = 0; y < 8; ++y) {
846 for (unsigned x = 0; x < 8; ++x) {
847 SymbolStats &s_luma = stats[pick_stats_for(x, y, false)];
848 rans_encoder.init_prob(s_luma);
851 std::vector<int> lens;
853 // need to reverse later
854 rans_encoder.clear();
855 size_t num_bytes = 0;
856 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS; ++block_idx) {
857 unsigned yb = block_idx / WIDTH_BLOCKS;
858 unsigned xb = block_idx % WIDTH_BLOCKS;
860 int k = coeff_y[(yb * 8 + y) * WIDTH + (xb * 8 + x)];
861 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
862 rans_encoder.encode_coeff(k);
864 if (block_idx % 320 == 319 || block_idx == NUM_BLOCKS - 1) {
865 int l = rans_encoder.save_block(codedfp);
870 tot_bytes += num_bytes;
871 printf("coeff %d Y': %ld bytes\n", y * 8 + x, num_bytes);
877 double avg_l = sum_l / lens.size();
879 double sum_sql = 0.0;
881 sum_sql += (l - avg_l) * (l - avg_l);
883 double stddev_l = sqrt(sum_sql / (lens.size() - 1));
884 printf("coeff %d: avg=%.2f bytes, stddev=%.2f bytes\n", y*8+x, avg_l, stddev_l);
889 for (unsigned y = 0; y < 8; ++y) {
890 for (unsigned x = 0; x < 8; ++x) {
891 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
892 rans_encoder.init_prob(s_chroma);
894 rans_encoder.clear();
895 size_t num_bytes = 0;
896 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; ++block_idx) {
897 unsigned yb = block_idx / WIDTH_BLOCKS_CHROMA;
898 unsigned xb = block_idx % WIDTH_BLOCKS_CHROMA;
900 int k = coeff_cb[(yb * 8 + y) * WIDTH/2 + (xb * 8 + x)];
901 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
902 rans_encoder.encode_coeff(k);
904 if (block_idx % 320 == 319 || block_idx == NUM_BLOCKS - 1) {
905 num_bytes += rans_encoder.save_block(codedfp);
908 tot_bytes += num_bytes;
909 printf("coeff %d Cb: %ld bytes\n", y * 8 + x, num_bytes);
914 for (unsigned y = 0; y < 8; ++y) {
915 for (unsigned x = 0; x < 8; ++x) {
916 SymbolStats &s_chroma = stats[pick_stats_for(x, y, true)];
917 rans_encoder.init_prob(s_chroma);
919 rans_encoder.clear();
920 size_t num_bytes = 0;
921 for (unsigned block_idx = 0; block_idx < NUM_BLOCKS_CHROMA; ++block_idx) {
922 unsigned yb = block_idx / WIDTH_BLOCKS_CHROMA;
923 unsigned xb = block_idx % WIDTH_BLOCKS_CHROMA;
925 int k = coeff_cr[(yb * 8 + y) * WIDTH/2 + (xb * 8 + x)];
926 //printf("encoding coeff %d xb,yb=%d,%d: %d\n", y*8+x, xb, yb, k);
927 rans_encoder.encode_coeff(k);
929 if (block_idx % 320 == 319 || block_idx == NUM_BLOCKS - 1) {
930 num_bytes += rans_encoder.save_block(codedfp);
933 tot_bytes += num_bytes;
934 printf("coeff %d Cr: %ld bytes\n", y * 8 + x, num_bytes);
938 printf("%ld bytes + %ld escape bits (%ld) = %ld total bytes\n",
939 tot_bytes - extra_bits / 8,