git.sesse.net Git - fjl/blob - idct.c

   1 #include <math.h>
   2 #include <string.h>
   3 #include <stdlib.h>
   4
   5 #include "idct.h"
   6
   7 void* idct_reference_alloc(const uint32_t* quant_table)
   8 {
   9         uint32_t* qt_copy = (uint32_t*)malloc(DCTSIZE2 * sizeof(uint32_t));
  10         // FIXME: check for NULL return
  11
  12         memcpy(qt_copy, quant_table, DCTSIZE2 * sizeof(uint32_t));
  13
  14         return qt_copy;
  15 }
  16
  17 void idct_reference_free(void* userdata)
  18 {
  19         free(userdata);
  20 }
  21
  22 void idct_reference(const int16_t* input, const void* userdata, uint8_t* output)
  23 {
  24         const uint32_t* quant_table = (const uint32_t*)userdata;
  25         double temp[DCTSIZE2];
  26
  27         for (unsigned y = 0; y < 8; ++y) {
  28                 for (unsigned x = 0; x < 8; ++x) {
  29                         double acc = 0.0;
  30                         for (unsigned u = 0; u < 8; ++u) {
  31                                 for (unsigned v = 0; v < 8; ++v) {
  32                                         double c_u = (u == 0) ? 1/sqrt(2.0) : 1.0;
  33                                         double c_v = (v == 0) ? 1/sqrt(2.0) : 1.0;
  34                                         acc += c_u * c_v
  35                                                 * input[u * DCTSIZE + v] * quant_table[u * DCTSIZE + v]
  36                                                 * cos((2 * x + 1) * v * M_PI / 16.0)
  37                                                 * cos((2 * y + 1) * u * M_PI / 16.0);
  38                                 }
  39                         }
  40                         temp[y * DCTSIZE + x] = 0.25 * acc;
  41                 }
  42         }
  43
  44         for (unsigned y = 0; y < 8; ++y) {
  45                 for (unsigned x = 0; x < 8; ++x) {
  46                         double val = temp[y * DCTSIZE + x];
  47                         if (val < 0.0) {
  48                                 output[y * DCTSIZE + x] = 0;
  49                         } else if (val >= 255.0) {
  50                                 output[y * DCTSIZE + x] = 255;
  51                         } else {
  52                                 output[y * DCTSIZE + x] = (uint8_t)(val + 0.5);
  53                         }
  54                 }
  55         }
  56 }
  57
  58 // AA&N (Arai, Agui and Nakajima) floating-point IDCT.
  59 // This IDCT is based on the same DCT that libjpeg uses -- in fact, exactly the
  60 // same figure from the same book ("JPEG: Still Image Data Compression Standard",
  61 // page 52, figure 4-8). However, it is coded from scratch, and uses the
  62 // transposition method for converting DCT -> IDCT suggested in the book.
  63 // (libjpeg seems to use some other method that yields similar, but not
  64 // the same, code.)
  65
  66 // As this is generally meant as a reference and not useful code (we expect
  67 // a SIMD fixed-point algorithm to be used in most cases), it has not been
  68 // attempted significantly optimized. We assume the compiler will be smart
  69 // enough to do all the variable propagation for us anyway.
  70
  71 // Scale factors; 1.0 / (sqrt(2.0) * cos(k * M_PI / 16.0)), except for the first which is 1.
  72 static const double scalefac[] = {
  73         1.0, 0.7209598220069479, 0.765366864730180, 0.8504300947672564,
  74         1.0, 1.2727585805728336, 1.847759065022573, 3.6245097854115502
  75 };
  76
  77 // Premultiply the scale factors and the overall 1/8 factor into the quantization
  78 // table entries (and convert to double).
  79 void* idct_float_alloc(const uint32_t* quant_table)
  80 {
  81         double* qt_copy = (double*)malloc(DCTSIZE2 * sizeof(double));
  82
  83         for (unsigned y = 0; y < DCTSIZE; ++y) {
  84                 for (unsigned x = 0; x < DCTSIZE; ++x) {
  85                         qt_copy[y * DCTSIZE + x] = 0.125 * quant_table[y * DCTSIZE + x] *
  86                                 scalefac[x] * scalefac[y];
  87                 }
  88         }
  89
  90         return qt_copy;
  91 }
  92
  93 void idct_float_free(void* userdata)
  94 {
  95         free(userdata);
  96 }
  97
  98 // 1D 8-point DCT.
  99 static inline void idct1d_float(double y0, double y1, double y2, double y3, double y4, double y5, double y6, double y7, double *x)
 100 {
 101         // constants
 102         static const double a1 = 0.7071067811865474;   // sqrt(2)
 103         static const double a2 = 0.5411961001461971;   // cos(3/8 pi) * sqrt(2)
 104         static const double a3 = a1;
 105         static const double a4 = 1.3065629648763766;   // cos(pi/8) * sqrt(2)
 106         static const double a5 = 0.5 * (a4 - a2);
 107
 108         // phase 1
 109         const double p1_0 = y0;
 110         const double p1_1 = y4;
 111         const double p1_2 = y2;
 112         const double p1_3 = y6;
 113         const double p1_4 = y5;
 114         const double p1_5 = y1;
 115         const double p1_6 = y7;
 116         const double p1_7 = y3;
 117
 118         // phase 2
 119         const double p2_0 = p1_0;
 120         const double p2_1 = p1_1;
 121         const double p2_2 = p1_2;
 122         const double p2_3 = p1_3;
 123         const double p2_4 = p1_4 - p1_7;
 124         const double p2_5 = p1_5 + p1_6;
 125         const double p2_6 = p1_5 - p1_6;
 126         const double p2_7 = p1_4 + p1_7;
 127
 128         // phase 3
 129         const double p3_0 = p2_0;
 130         const double p3_1 = p2_1;
 131         const double p3_2 = p2_2 - p2_3;
 132         const double p3_3 = p2_2 + p2_3;
 133         const double p3_4 = p2_4;
 134         const double p3_5 = p2_5 - p2_7;
 135         const double p3_6 = p2_6;
 136         const double p3_7 = p2_5 + p2_7;
 137
 138         // phase 4
 139         const double p4_0 = p3_0;
 140         const double p4_1 = p3_1;
 141         const double p4_2 = a1 * p3_2;
 142         const double p4_3 = p3_3;
 143         const double p4_4 = p3_4 * -a2 + (p3_4 + p3_6) * -a5;
 144         const double p4_5 = a3 * p3_5;
 145         const double p4_6 = p3_6 * a4 + (p3_4 + p3_6) * -a5;
 146         const double p4_7 = p3_7;
 147
 148         // phase 5
 149         const double p5_0 = p4_0 + p4_1;
 150         const double p5_1 = p4_0 - p4_1;
 151         const double p5_2 = p4_2;
 152         const double p5_3 = p4_2 + p4_3;
 153         const double p5_4 = p4_4;
 154         const double p5_5 = p4_5;
 155         const double p5_6 = p4_6;
 156         const double p5_7 = p4_7;
 157
 158         // phase 6
 159         const double p6_0 = p5_0 + p5_3;
 160         const double p6_1 = p5_1 + p5_2;
 161         const double p6_2 = p5_1 - p5_2;
 162         const double p6_3 = p5_0 - p5_3;
 163         const double p6_4 = -p5_4;
 164         const double p6_5 = p5_5 - p5_4;
 165         const double p6_6 = p5_5 + p5_6;
 166         const double p6_7 = p5_6 + p5_7;
 167
 168         // phase 7
 169         x[0] = p6_0 + p6_7;
 170         x[1] = p6_1 + p6_6;
 171         x[2] = p6_2 + p6_5;
 172         x[3] = p6_3 + p6_4;
 173         x[4] = p6_3 - p6_4;
 174         x[5] = p6_2 - p6_5;
 175         x[6] = p6_1 - p6_6;
 176         x[7] = p6_0 - p6_7;
 177 }
 178
 179 void idct_float(const int16_t* input, const void* userdata, uint8_t* output)
 180 {
 181         const double* quant_table = (const double*)userdata;
 182         double temp[DCTSIZE2];
 183
 184         // IDCT columns.
 185         for (unsigned x = 0; x < DCTSIZE; ++x) {
 186                 idct1d_float(input[DCTSIZE * 0 + x] * quant_table[DCTSIZE * 0 + x],
 187                              input[DCTSIZE * 1 + x] * quant_table[DCTSIZE * 1 + x],
 188                              input[DCTSIZE * 2 + x] * quant_table[DCTSIZE * 2 + x],
 189                              input[DCTSIZE * 3 + x] * quant_table[DCTSIZE * 3 + x],
 190                              input[DCTSIZE * 4 + x] * quant_table[DCTSIZE * 4 + x],
 191                              input[DCTSIZE * 5 + x] * quant_table[DCTSIZE * 5 + x],
 192                              input[DCTSIZE * 6 + x] * quant_table[DCTSIZE * 6 + x],
 193                              input[DCTSIZE * 7 + x] * quant_table[DCTSIZE * 7 + x],
 194                              temp + x * DCTSIZE);
 195         }
 196
 197         // IDCT rows.
 198         for (unsigned y = 0; y < DCTSIZE; ++y) {
 199                 double temp2[DCTSIZE];
 200                 idct1d_float(temp[DCTSIZE * 0 + y],
 201                              temp[DCTSIZE * 1 + y],
 202                              temp[DCTSIZE * 2 + y],
 203                              temp[DCTSIZE * 3 + y],
 204                              temp[DCTSIZE * 4 + y],
 205                              temp[DCTSIZE * 5 + y],
 206                              temp[DCTSIZE * 6 + y],
 207                              temp[DCTSIZE * 7 + y],
 208                              temp2);
 209                 for (unsigned x = 0; x < DCTSIZE; ++x) {
 210                         const double val = temp2[x];
 211                         if (val < 0.0) {
 212                                 output[y * DCTSIZE + x] = 0;
 213                         } else if (val >= 255.0) {
 214                                 output[y * DCTSIZE + x] = 255;
 215                         } else {
 216                                 output[y * DCTSIZE + x] = (uint8_t)(val + 0.5);
 217                         }
 218                 }
 219         }
 220 }