git.sesse.net Git - fjl/blob - idct_imprecise_int.c

   1 #include <math.h>
   2 #include <string.h>
   3 #include <stdlib.h>
   4
   5 #include "idct.h"
   6
   7 #define TRUNCATE_BITS 11
   8 #define TRUNCATE_TABLE_SIZE (1 << TRUNCATE_BITS)
   9 #define TRUNCATE_TABLE_BIAS (1 << (TRUNCATE_BITS - 1))
  10
  11 struct idct_imprecise_int_userdata {
  12         int32_t qt_copy[DCTSIZE2];
  13         uint8_t truncate_table[TRUNCATE_TABLE_SIZE];
  14 };
  15
  16 #define PRECISION 12
  17 #define ROUND_BIAS (257LL << (PRECISION - 1))
  18
  19 #define FIX(x) ((int32_t)((x) * (1LL << PRECISION) + 0.5))
  20
  21 // Scale factors; 1.0 / (sqrt(2.0) * cos(k * M_PI / 16.0)), except for the first which is 1.
  22 static const double scalefac[] = {
  23         1.0, 0.7209598220069479, 0.765366864730180, 0.8504300947672564,
  24         1.0, 1.2727585805728336, 1.847759065022573, 3.6245097854115502
  25 };
  26
  27 // Premultiply the scale factors and the overall 1/8 factor into the quantization
  28 // table entries (and convert to fixed-point).
  29 void* idct_imprecise_int_alloc(const uint32_t* quant_table)
  30 {
  31         struct idct_imprecise_int_userdata* userdata = (struct idct_imprecise_int_userdata*)malloc(sizeof(struct idct_imprecise_int_userdata));
  32
  33         for (unsigned y = 0; y < DCTSIZE; ++y) {
  34                 for (unsigned x = 0; x < DCTSIZE; ++x) {
  35                         userdata->qt_copy[y * DCTSIZE + x] = FIX((1.0/DCTSIZE) * quant_table[y * DCTSIZE + x] *
  36                                 scalefac[x] * scalefac[y]);
  37                 }
  38         }
  39         for (unsigned i = 0; i < TRUNCATE_TABLE_SIZE; ++i) {
  40                 int source_val = i - TRUNCATE_TABLE_BIAS;
  41                 if (source_val < 0) {
  42                         userdata->truncate_table[i] = 0;
  43                 } else if (source_val >= 255) {
  44                         userdata->truncate_table[i] = 255;
  45                 } else {
  46                         userdata->truncate_table[i] = source_val;
  47                 }
  48         }
  49
  50         return userdata;
  51 }
  52
  53 void idct_imprecise_int_free(void* userdata)
  54 {
  55         free(userdata);
  56 }
  57
  58 // 1D 8-point DCT.
  59 static inline void idct1d_int(int32_t y0, int32_t y1, int32_t y2, int32_t y3, int32_t y4, int32_t y5, int32_t y6, int32_t y7, int32_t *x)
  60 {
  61         // constants
  62         static const int32_t a1 = FIX(0.7071067811865474);   // sqrt(2)
  63         static const int32_t a2 = FIX(0.5411961001461971);   // cos(3/8 pi) * sqrt(2)
  64         static const int32_t a3 = a1;
  65         static const int32_t a4 = FIX(1.3065629648763766);   // cos(pi/8) * sqrt(2)
  66         static const int32_t a5 = FIX(0.5 * (1.3065629648763766 - 0.5411961001461971));
  67
  68         // phase 1
  69         const int32_t p1_0 = y0;
  70         const int32_t p1_1 = y4;
  71         const int32_t p1_2 = y2;
  72         const int32_t p1_3 = y6;
  73         const int32_t p1_4 = y5;
  74         const int32_t p1_5 = y1;
  75         const int32_t p1_6 = y7;
  76         const int32_t p1_7 = y3;
  77
  78         // phase 2
  79         const int32_t p2_0 = p1_0;
  80         const int32_t p2_1 = p1_1;
  81         const int32_t p2_2 = p1_2;
  82         const int32_t p2_3 = p1_3;
  83         const int32_t p2_4 = p1_4 - p1_7;
  84         const int32_t p2_5 = p1_5 + p1_6;
  85         const int32_t p2_6 = p1_5 - p1_6;
  86         const int32_t p2_7 = p1_4 + p1_7;
  87
  88         // phase 3
  89         const int32_t p3_0 = p2_0;
  90         const int32_t p3_1 = p2_1;
  91         const int32_t p3_2 = p2_2 - p2_3;
  92         const int32_t p3_3 = p2_2 + p2_3;
  93         const int32_t p3_4 = p2_4;
  94         const int32_t p3_5 = p2_5 - p2_7;
  95         const int32_t p3_6 = p2_6;
  96         const int32_t p3_7 = p2_5 + p2_7;
  97
  98         // phase 4
  99         const int32_t p4_0 = p3_0;
 100         const int32_t p4_1 = p3_1;
 101         const int32_t p4_2 = (a1 * p3_2) >> PRECISION;
 102         const int32_t p4_3 = p3_3;
 103         const int32_t p4_4 = (p3_4 * -a2 + (p3_4 + p3_6) * -a5) >> PRECISION;
 104         const int32_t p4_5 = (a3 * p3_5) >> PRECISION;
 105         const int32_t p4_6 = (p3_6 * a4 + (p3_4 + p3_6) * -a5) >> PRECISION;
 106         const int32_t p4_7 = p3_7;
 107
 108         // phase 5
 109         const int32_t p5_0 = p4_0 + p4_1;
 110         const int32_t p5_1 = p4_0 - p4_1;
 111         const int32_t p5_2 = p4_2;
 112         const int32_t p5_3 = p4_2 + p4_3;
 113         const int32_t p5_4 = p4_4;
 114         const int32_t p5_5 = p4_5;
 115         const int32_t p5_6 = p4_6;
 116         const int32_t p5_7 = p4_7;
 117
 118         // phase 6
 119         const int32_t p6_0 = p5_0 + p5_3;
 120         const int32_t p6_1 = p5_1 + p5_2;
 121         const int32_t p6_2 = p5_1 - p5_2;
 122         const int32_t p6_3 = p5_0 - p5_3;
 123         const int32_t p6_4 = -p5_4;
 124         const int32_t p6_5 = p5_5 - p5_4;
 125         const int32_t p6_6 = p5_5 + p5_6;
 126         const int32_t p6_7 = p5_6 + p5_7;
 127
 128         // phase 7
 129         x[0] = p6_0 + p6_7;
 130         x[1] = p6_1 + p6_6;
 131         x[2] = p6_2 + p6_5;
 132         x[3] = p6_3 + p6_4;
 133         x[4] = p6_3 - p6_4;
 134         x[5] = p6_2 - p6_5;
 135         x[6] = p6_1 - p6_6;
 136         x[7] = p6_0 - p6_7;
 137 }
 138
 139 void idct_imprecise_int(const int16_t* input, const void* userdata, uint8_t* output)
 140 {
 141         const struct idct_imprecise_int_userdata* my_userdata = (const struct idct_imprecise_int_userdata*)userdata;
 142         const int32_t* quant_table = my_userdata->qt_copy;
 143         int32_t temp[DCTSIZE2];
 144
 145         // IDCT columns.
 146         for (unsigned x = 0; x < DCTSIZE; ++x) {
 147                 idct1d_int(input[DCTSIZE * 0 + x] * quant_table[DCTSIZE * 0 + x],
 148                            input[DCTSIZE * 1 + x] * quant_table[DCTSIZE * 1 + x],
 149                            input[DCTSIZE * 2 + x] * quant_table[DCTSIZE * 2 + x],
 150                            input[DCTSIZE * 3 + x] * quant_table[DCTSIZE * 3 + x],
 151                            input[DCTSIZE * 4 + x] * quant_table[DCTSIZE * 4 + x],
 152                            input[DCTSIZE * 5 + x] * quant_table[DCTSIZE * 5 + x],
 153                            input[DCTSIZE * 6 + x] * quant_table[DCTSIZE * 6 + x],
 154                            input[DCTSIZE * 7 + x] * quant_table[DCTSIZE * 7 + x],
 155                            temp + x * DCTSIZE);
 156         }
 157
 158         // IDCT rows.
 159         for (unsigned y = 0; y < DCTSIZE; ++y) {
 160                 int32_t temp2[DCTSIZE];
 161                 idct1d_int(temp[DCTSIZE * 0 + y],
 162                            temp[DCTSIZE * 1 + y],
 163                            temp[DCTSIZE * 2 + y],
 164                            temp[DCTSIZE * 3 + y],
 165                            temp[DCTSIZE * 4 + y],
 166                            temp[DCTSIZE * 5 + y],
 167                            temp[DCTSIZE * 6 + y],
 168                            temp[DCTSIZE * 7 + y],
 169                            temp2);
 170                 for (unsigned x = 0; x < DCTSIZE; ++x) {
 171                         const int32_t val = (temp2[x] + ROUND_BIAS + FIX(TRUNCATE_TABLE_BIAS)) >> PRECISION;
 172                         output[y * DCTSIZE + x] = my_userdata->truncate_table[val & ((1 << TRUNCATE_BITS)-1)];
 173                 }
 174         }
 175 }