2 * Copyright (C) 2016 foo86
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/common.h"
26 static void sum_a(const int *input, int *output, int len)
30 for (i = 0; i < len; i++)
31 output[i] = input[2 * i] + input[2 * i + 1];
34 static void sum_b(const int *input, int *output, int len)
39 for (i = 1; i < len; i++)
40 output[i] = input[2 * i] + input[2 * i - 1];
43 static void sum_c(const int *input, int *output, int len)
47 for (i = 0; i < len; i++)
48 output[i] = input[2 * i];
51 static void sum_d(const int *input, int *output, int len)
56 for (i = 1; i < len; i++)
57 output[i] = input[2 * i - 1] + input[2 * i + 1];
60 static void dct_a(const int *input, int *output)
62 static const int cos_mod[8][8] = {
63 { 8348215, 8027397, 7398092, 6484482, 5321677, 3954362, 2435084, 822227 },
64 { 8027397, 5321677, 822227, -3954362, -7398092, -8348215, -6484482, -2435084 },
65 { 7398092, 822227, -6484482, -8027397, -2435084, 5321677, 8348215, 3954362 },
66 { 6484482, -3954362, -8027397, 822227, 8348215, 2435084, -7398092, -5321677 },
67 { 5321677, -7398092, -2435084, 8348215, -822227, -8027397, 3954362, 6484482 },
68 { 3954362, -8348215, 5321677, 2435084, -8027397, 6484482, 822227, -7398092 },
69 { 2435084, -6484482, 8348215, -7398092, 3954362, 822227, -5321677, 8027397 },
70 { 822227, -2435084, 3954362, -5321677, 6484482, -7398092, 8027397, -8348215 }
75 for (i = 0; i < 8; i++) {
77 for (j = 0; j < 8; j++)
78 res += (int64_t)cos_mod[i][j] * input[j];
79 output[i] = norm23(res);
83 static void dct_b(const int *input, int *output)
85 static const int cos_mod[8][7] = {
86 { 8227423, 7750063, 6974873, 5931642, 4660461, 3210181, 1636536 },
87 { 6974873, 3210181, -1636536, -5931642, -8227423, -7750063, -4660461 },
88 { 4660461, -3210181, -8227423, -5931642, 1636536, 7750063, 6974873 },
89 { 1636536, -7750063, -4660461, 5931642, 6974873, -3210181, -8227423 },
90 { -1636536, -7750063, 4660461, 5931642, -6974873, -3210181, 8227423 },
91 { -4660461, -3210181, 8227423, -5931642, -1636536, 7750063, -6974873 },
92 { -6974873, 3210181, 1636536, -5931642, 8227423, -7750063, 4660461 },
93 { -8227423, 7750063, -6974873, 5931642, -4660461, 3210181, -1636536 }
98 for (i = 0; i < 8; i++) {
99 int64_t res = input[0] * (INT64_C(1) << 23);
100 for (j = 0; j < 7; j++)
101 res += (int64_t)cos_mod[i][j] * input[1 + j];
102 output[i] = norm23(res);
106 static void mod_a(const int *input, int *output)
108 static const int cos_mod[16] = {
109 4199362, 4240198, 4323885, 4454708,
110 4639772, 4890013, 5221943, 5660703,
111 -6245623, -7040975, -8158494, -9809974,
112 -12450076, -17261920, -28585092, -85479984
117 for (i = 0; i < 8; i++)
118 output[i] = mul23(cos_mod[i], input[i] + input[8 + i]);
120 for (i = 8, k = 7; i < 16; i++, k--)
121 output[i] = mul23(cos_mod[i], input[k] - input[8 + k]);
124 static void mod_b(int *input, int *output)
126 static const int cos_mod[8] = {
127 4214598, 4383036, 4755871, 5425934,
128 6611520, 8897610, 14448934, 42791536
133 for (i = 0; i < 8; i++)
134 input[8 + i] = mul23(cos_mod[i], input[8 + i]);
136 for (i = 0; i < 8; i++)
137 output[i] = input[i] + input[8 + i];
139 for (i = 8, k = 7; i < 16; i++, k--)
140 output[i] = input[k] - input[8 + k];
143 static void mod_c(const int *input, int *output)
145 static const int cos_mod[32] = {
146 1048892, 1051425, 1056522, 1064244,
147 1074689, 1087987, 1104313, 1123884,
148 1146975, 1173922, 1205139, 1241133,
149 1282529, 1330095, 1384791, 1447815,
150 -1520688, -1605358, -1704360, -1821051,
151 -1959964, -2127368, -2332183, -2587535,
152 -2913561, -3342802, -3931480, -4785806,
153 -6133390, -8566050, -14253820, -42727120
158 for (i = 0; i < 16; i++)
159 output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
161 for (i = 16, k = 15; i < 32; i++, k--)
162 output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
165 static void clp_v(int *input, int len)
169 for (i = 0; i < len; i++)
170 input[i] = clip23(input[i]);
173 static void imdct_half_32(int32_t *output, const int32_t *input)
175 int buf_a[32], buf_b[32];
176 int i, k, mag, shift, round;
179 for (i = 0; i < 32; i++)
180 mag += abs(input[i]);
182 shift = mag > 0x400000 ? 2 : 0;
183 round = shift > 0 ? 1 << (shift - 1) : 0;
185 for (i = 0; i < 32; i++)
186 buf_a[i] = (input[i] + round) >> shift;
188 sum_a(buf_a, buf_b + 0, 16);
189 sum_b(buf_a, buf_b + 16, 16);
192 sum_a(buf_b + 0, buf_a + 0, 8);
193 sum_b(buf_b + 0, buf_a + 8, 8);
194 sum_c(buf_b + 16, buf_a + 16, 8);
195 sum_d(buf_b + 16, buf_a + 24, 8);
198 dct_a(buf_a + 0, buf_b + 0);
199 dct_b(buf_a + 8, buf_b + 8);
200 dct_b(buf_a + 16, buf_b + 16);
201 dct_b(buf_a + 24, buf_b + 24);
204 mod_a(buf_b + 0, buf_a + 0);
205 mod_b(buf_b + 16, buf_a + 16);
210 for (i = 0; i < 32; i++)
211 buf_b[i] = clip23(buf_b[i] * (1 << shift));
213 for (i = 0, k = 31; i < 16; i++, k--) {
214 output[ i] = clip23(buf_b[i] - buf_b[k]);
215 output[16 + i] = clip23(buf_b[i] + buf_b[k]);
219 static void mod64_a(const int *input, int *output)
221 static const int cos_mod[32] = {
222 4195568, 4205700, 4226086, 4256977,
223 4298755, 4351949, 4417251, 4495537,
224 4587901, 4695690, 4820557, 4964534,
225 5130115, 5320382, 5539164, 5791261,
226 -6082752, -6421430, -6817439, -7284203,
227 -7839855, -8509474, -9328732, -10350140,
228 -11654242, -13371208, -15725922, -19143224,
229 -24533560, -34264200, -57015280, -170908480
234 for (i = 0; i < 16; i++)
235 output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
237 for (i = 16, k = 15; i < 32; i++, k--)
238 output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
241 static void mod64_b(int *input, int *output)
243 static const int cos_mod[16] = {
244 4199362, 4240198, 4323885, 4454708,
245 4639772, 4890013, 5221943, 5660703,
246 6245623, 7040975, 8158494, 9809974,
247 12450076, 17261920, 28585092, 85479984
252 for (i = 0; i < 16; i++)
253 input[16 + i] = mul23(cos_mod[i], input[16 + i]);
255 for (i = 0; i < 16; i++)
256 output[i] = input[i] + input[16 + i];
258 for (i = 16, k = 15; i < 32; i++, k--)
259 output[i] = input[k] - input[16 + k];
262 static void mod64_c(const int *input, int *output)
264 static const int cos_mod[64] = {
265 741511, 741958, 742853, 744199,
266 746001, 748262, 750992, 754197,
267 757888, 762077, 766777, 772003,
268 777772, 784105, 791021, 798546,
269 806707, 815532, 825054, 835311,
270 846342, 858193, 870912, 884554,
271 899181, 914860, 931667, 949686,
272 969011, 989747, 1012012, 1035941,
273 -1061684, -1089412, -1119320, -1151629,
274 -1186595, -1224511, -1265719, -1310613,
275 -1359657, -1413400, -1472490, -1537703,
276 -1609974, -1690442, -1780506, -1881904,
277 -1996824, -2128058, -2279225, -2455101,
278 -2662128, -2909200, -3208956, -3579983,
279 -4050785, -4667404, -5509372, -6726913,
280 -8641940, -12091426, -20144284, -60420720
285 for (i = 0; i < 32; i++)
286 output[i] = mul23(cos_mod[i], input[i] + input[32 + i]);
288 for (i = 32, k = 31; i < 64; i++, k--)
289 output[i] = mul23(cos_mod[i], input[k] - input[32 + k]);
292 static void imdct_half_64(int32_t *output, const int32_t *input)
294 int buf_a[64], buf_b[64];
295 int i, k, mag, shift, round;
298 for (i = 0; i < 64; i++)
299 mag += abs(input[i]);
301 shift = mag > 0x400000 ? 2 : 0;
302 round = shift > 0 ? 1 << (shift - 1) : 0;
304 for (i = 0; i < 64; i++)
305 buf_a[i] = (input[i] + round) >> shift;
307 sum_a(buf_a, buf_b + 0, 32);
308 sum_b(buf_a, buf_b + 32, 32);
311 sum_a(buf_b + 0, buf_a + 0, 16);
312 sum_b(buf_b + 0, buf_a + 16, 16);
313 sum_c(buf_b + 32, buf_a + 32, 16);
314 sum_d(buf_b + 32, buf_a + 48, 16);
317 sum_a(buf_a + 0, buf_b + 0, 8);
318 sum_b(buf_a + 0, buf_b + 8, 8);
319 sum_c(buf_a + 16, buf_b + 16, 8);
320 sum_d(buf_a + 16, buf_b + 24, 8);
321 sum_c(buf_a + 32, buf_b + 32, 8);
322 sum_d(buf_a + 32, buf_b + 40, 8);
323 sum_c(buf_a + 48, buf_b + 48, 8);
324 sum_d(buf_a + 48, buf_b + 56, 8);
327 dct_a(buf_b + 0, buf_a + 0);
328 dct_b(buf_b + 8, buf_a + 8);
329 dct_b(buf_b + 16, buf_a + 16);
330 dct_b(buf_b + 24, buf_a + 24);
331 dct_b(buf_b + 32, buf_a + 32);
332 dct_b(buf_b + 40, buf_a + 40);
333 dct_b(buf_b + 48, buf_a + 48);
334 dct_b(buf_b + 56, buf_a + 56);
337 mod_a(buf_a + 0, buf_b + 0);
338 mod_b(buf_a + 16, buf_b + 16);
339 mod_b(buf_a + 32, buf_b + 32);
340 mod_b(buf_a + 48, buf_b + 48);
343 mod64_a(buf_b + 0, buf_a + 0);
344 mod64_b(buf_b + 32, buf_a + 32);
347 mod64_c(buf_a, buf_b);
349 for (i = 0; i < 64; i++)
350 buf_b[i] = clip23(buf_b[i] * (1 << shift));
352 for (i = 0, k = 63; i < 32; i++, k--) {
353 output[ i] = clip23(buf_b[i] - buf_b[k]);
354 output[32 + i] = clip23(buf_b[i] + buf_b[k]);
358 av_cold void ff_dcadct_init(DCADCTContext *c)
360 c->imdct_half[0] = imdct_half_32;
361 c->imdct_half[1] = imdct_half_64;