]> git.sesse.net Git - ffmpeg/blob - libavcodec/dsputil.c
* using static instead of extern
[ffmpeg] / libavcodec / dsputil.c
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Gerard Lantau.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18  */
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include "avcodec.h"
22 #include "dsputil.h"
23 #include "simple_idct.h"
24
25 void (*ff_idct)(DCTELEM *block);
26 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
27 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
28 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
29
30 op_pixels_abs_func pix_abs16x16;
31 op_pixels_abs_func pix_abs16x16_x2;
32 op_pixels_abs_func pix_abs16x16_y2;
33 op_pixels_abs_func pix_abs16x16_xy2;
34
35 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
36 UINT32 squareTbl[512];
37
38 extern UINT16 default_intra_matrix[64];
39 extern UINT16 default_non_intra_matrix[64];
40
41 UINT8 zigzag_direct[64] = {
42     0, 1, 8, 16, 9, 2, 3, 10,
43     17, 24, 32, 25, 18, 11, 4, 5,
44     12, 19, 26, 33, 40, 48, 41, 34,
45     27, 20, 13, 6, 7, 14, 21, 28,
46     35, 42, 49, 56, 57, 50, 43, 36,
47     29, 22, 15, 23, 30, 37, 44, 51,
48     58, 59, 52, 45, 38, 31, 39, 46,
49     53, 60, 61, 54, 47, 55, 62, 63
50 };
51
52 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
53 UINT16 __align8 inv_zigzag_direct16[64];
54
55 /* not permutated zigzag_direct for MMX quantizer */
56 UINT8 zigzag_direct_noperm[64];
57
58 UINT8 ff_alternate_horizontal_scan[64] = {
59     0,  1,  2,  3,  8,  9, 16, 17, 
60     10, 11,  4,  5,  6,  7, 15, 14,
61     13, 12, 19, 18, 24, 25, 32, 33, 
62     26, 27, 20, 21, 22, 23, 28, 29,
63     30, 31, 34, 35, 40, 41, 48, 49, 
64     42, 43, 36, 37, 38, 39, 44, 45,
65     46, 47, 50, 51, 56, 57, 58, 59, 
66     52, 53, 54, 55, 60, 61, 62, 63,
67 };
68
69 UINT8 ff_alternate_vertical_scan[64] = {
70     0,  8, 16, 24,  1,  9,  2, 10, 
71     17, 25, 32, 40, 48, 56, 57, 49,
72     41, 33, 26, 18,  3, 11,  4, 12, 
73     19, 27, 34, 42, 50, 58, 35, 43,
74     51, 59, 20, 28,  5, 13,  6, 14, 
75     21, 29, 36, 44, 52, 60, 37, 45,
76     53, 61, 22, 30,  7, 15, 23, 31, 
77     38, 46, 54, 62, 39, 47, 55, 63,
78 };
79
80 /* Input permutation for the simple_idct_mmx */
81 static UINT8 simple_mmx_permutation[64]={
82         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
83         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
84         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
85         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
86         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
87         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
88         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
89         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
90 };
91
92 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
93 UINT32 inverse[256]={
94          0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
95  536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
96  268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
97  178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
98  134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
99  107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
100   89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
101   76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
102   67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
103   59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
104   53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
105   48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
106   44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
107   41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
108   38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
109   35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
110   33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
111   31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
112   29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
113   28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
114   26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
115   25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
116   24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
117   23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
118   22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
119   21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
120   20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
121   19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
122   19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
123   18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
124   17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
125   17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
126 };
127
128 /* used to skip zeros at the end */
129 UINT8 zigzag_end[64];
130
131 UINT8 permutation[64];
132 //UINT8 invPermutation[64];
133
134 static void build_zigzag_end()
135 {
136     int lastIndex;
137     int lastIndexAfterPerm=0;
138     for(lastIndex=0; lastIndex<64; lastIndex++)
139     {
140         if(zigzag_direct[lastIndex] > lastIndexAfterPerm) 
141             lastIndexAfterPerm= zigzag_direct[lastIndex];
142         zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
143     }
144 }
145
146 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
147 {
148     DCTELEM *p;
149     const UINT8 *pix;
150     int i;
151
152     /* read the pixels */
153     p = block;
154     pix = pixels;
155     for(i=0;i<8;i++) {
156         p[0] = pix[0];
157         p[1] = pix[1];
158         p[2] = pix[2];
159         p[3] = pix[3];
160         p[4] = pix[4];
161         p[5] = pix[5];
162         p[6] = pix[6];
163         p[7] = pix[7];
164         pix += line_size;
165         p += 8;
166     }
167 }
168
169 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
170 {
171     const DCTELEM *p;
172     UINT8 *pix;
173     int i;
174     UINT8 *cm = cropTbl + MAX_NEG_CROP;
175     
176     /* read the pixels */
177     p = block;
178     pix = pixels;
179     for(i=0;i<8;i++) {
180         pix[0] = cm[p[0]];
181         pix[1] = cm[p[1]];
182         pix[2] = cm[p[2]];
183         pix[3] = cm[p[3]];
184         pix[4] = cm[p[4]];
185         pix[5] = cm[p[5]];
186         pix[6] = cm[p[6]];
187         pix[7] = cm[p[7]];
188         pix += line_size;
189         p += 8;
190     }
191 }
192
193 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
194 {
195     const DCTELEM *p;
196     UINT8 *pix;
197     int i;
198     UINT8 *cm = cropTbl + MAX_NEG_CROP;
199     
200     /* read the pixels */
201     p = block;
202     pix = pixels;
203     for(i=0;i<8;i++) {
204         pix[0] = cm[pix[0] + p[0]];
205         pix[1] = cm[pix[1] + p[1]];
206         pix[2] = cm[pix[2] + p[2]];
207         pix[3] = cm[pix[3] + p[3]];
208         pix[4] = cm[pix[4] + p[4]];
209         pix[5] = cm[pix[5] + p[5]];
210         pix[6] = cm[pix[6] + p[6]];
211         pix[7] = cm[pix[7] + p[7]];
212         pix += line_size;
213         p += 8;
214     }
215 }
216
217 #define PIXOP(BTYPE, OPNAME, OP, INCR)                                                   \
218                                                                                          \
219 static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
220 {                                                                                        \
221     BTYPE *p;                                                                            \
222     const UINT8 *pix;                                                                    \
223                                                                                          \
224     p = block;                                                                           \
225     pix = pixels;                                                                        \
226     do {                                                                                 \
227         OP(p[0], pix[0]);                                                                  \
228         OP(p[1], pix[1]);                                                                  \
229         OP(p[2], pix[2]);                                                                  \
230         OP(p[3], pix[3]);                                                                  \
231         OP(p[4], pix[4]);                                                                  \
232         OP(p[5], pix[5]);                                                                  \
233         OP(p[6], pix[6]);                                                                  \
234         OP(p[7], pix[7]);                                                                  \
235         pix += line_size;                                                                \
236         p += INCR;                                                                       \
237     } while (--h);;                                                                       \
238 }                                                                                        \
239                                                                                          \
240 static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
241 {                                                                                        \
242     BTYPE *p;                                                                          \
243     const UINT8 *pix;                                                                    \
244                                                                                          \
245     p = block;                                                                           \
246     pix = pixels;                                                                        \
247     do {                                                                   \
248         OP(p[0], avg2(pix[0], pix[1]));                                                    \
249         OP(p[1], avg2(pix[1], pix[2]));                                                    \
250         OP(p[2], avg2(pix[2], pix[3]));                                                    \
251         OP(p[3], avg2(pix[3], pix[4]));                                                    \
252         OP(p[4], avg2(pix[4], pix[5]));                                                    \
253         OP(p[5], avg2(pix[5], pix[6]));                                                    \
254         OP(p[6], avg2(pix[6], pix[7]));                                                    \
255         OP(p[7], avg2(pix[7], pix[8]));                                                    \
256         pix += line_size;                                                                \
257         p += INCR;                                                                       \
258     } while (--h);                                                                        \
259 }                                                                                        \
260                                                                                          \
261 static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
262 {                                                                                        \
263     BTYPE *p;                                                                          \
264     const UINT8 *pix;                                                                    \
265     const UINT8 *pix1;                                                                   \
266                                                                                          \
267     p = block;                                                                           \
268     pix = pixels;                                                                        \
269     pix1 = pixels + line_size;                                                           \
270     do {                                                                                 \
271         OP(p[0], avg2(pix[0], pix1[0]));                                                   \
272         OP(p[1], avg2(pix[1], pix1[1]));                                                   \
273         OP(p[2], avg2(pix[2], pix1[2]));                                                   \
274         OP(p[3], avg2(pix[3], pix1[3]));                                                   \
275         OP(p[4], avg2(pix[4], pix1[4]));                                                   \
276         OP(p[5], avg2(pix[5], pix1[5]));                                                   \
277         OP(p[6], avg2(pix[6], pix1[6]));                                                   \
278         OP(p[7], avg2(pix[7], pix1[7]));                                                   \
279         pix += line_size;                                                                \
280         pix1 += line_size;                                                               \
281         p += INCR;                                                                       \
282     } while(--h);                                                                         \
283 }                                                                                        \
284                                                                                          \
285 static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
286 {                                                                                        \
287     BTYPE *p;                                                                          \
288     const UINT8 *pix;                                                                    \
289     const UINT8 *pix1;                                                                   \
290                                                                                          \
291     p = block;                                                                           \
292     pix = pixels;                                                                        \
293     pix1 = pixels + line_size;                                                           \
294     do {                                                                   \
295         OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1]));                                  \
296         OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2]));                                  \
297         OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3]));                                  \
298         OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4]));                                  \
299         OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5]));                                  \
300         OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6]));                                  \
301         OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7]));                                  \
302         OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8]));                                  \
303         pix += line_size;                                                                \
304         pix1 += line_size;                                                               \
305         p += INCR;                                                                       \
306     } while(--h);                                                                         \
307 }                                                                                        \
308                                                                                          \
309 void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
310     OPNAME ## _pixels,                                                                   \
311     OPNAME ## _pixels_x2,                                                                \
312     OPNAME ## _pixels_y2,                                                                \
313     OPNAME ## _pixels_xy2,                                                               \
314 };
315
316
317 /* rounding primitives */
318 #define avg2(a,b) ((a+b+1)>>1)
319 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
320
321 #define op_put(a, b) a = b
322 #define op_avg(a, b) a = avg2(a, b)
323 #define op_sub(a, b) a -= b
324
325 PIXOP(UINT8, put, op_put, line_size)
326 PIXOP(UINT8, avg, op_avg, line_size)
327
328 PIXOP(DCTELEM, sub, op_sub, 8)
329
330 /* not rounding primitives */
331 #undef avg2
332 #undef avg4
333 #define avg2(a,b) ((a+b)>>1)
334 #define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
335
336 PIXOP(UINT8, put_no_rnd, op_put, line_size)
337 PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
338
339 /* motion estimation */
340
341 #undef avg2
342 #undef avg4
343 #define avg2(a,b) ((a+b+1)>>1)
344 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
345
346 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
347 {
348     int s, i;
349
350     s = 0;
351     for(i=0;i<h;i++) {
352         s += abs(pix1[0] - pix2[0]);
353         s += abs(pix1[1] - pix2[1]);
354         s += abs(pix1[2] - pix2[2]);
355         s += abs(pix1[3] - pix2[3]);
356         s += abs(pix1[4] - pix2[4]);
357         s += abs(pix1[5] - pix2[5]);
358         s += abs(pix1[6] - pix2[6]);
359         s += abs(pix1[7] - pix2[7]);
360         s += abs(pix1[8] - pix2[8]);
361         s += abs(pix1[9] - pix2[9]);
362         s += abs(pix1[10] - pix2[10]);
363         s += abs(pix1[11] - pix2[11]);
364         s += abs(pix1[12] - pix2[12]);
365         s += abs(pix1[13] - pix2[13]);
366         s += abs(pix1[14] - pix2[14]);
367         s += abs(pix1[15] - pix2[15]);
368         pix1 += line_size;
369         pix2 += line_size;
370     }
371     return s;
372 }
373
374 int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
375 {
376     int s, i;
377
378     s = 0;
379     for(i=0;i<h;i++) {
380         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
381         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
382         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
383         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
384         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
385         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
386         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
387         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
388         s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
389         s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
390         s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
391         s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
392         s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
393         s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
394         s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
395         s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
396         pix1 += line_size;
397         pix2 += line_size;
398     }
399     return s;
400 }
401
402 int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
403 {
404     int s, i;
405     UINT8 *pix3 = pix2 + line_size;
406
407     s = 0;
408     for(i=0;i<h;i++) {
409         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
410         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
411         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
412         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
413         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
414         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
415         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
416         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
417         s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
418         s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
419         s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
420         s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
421         s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
422         s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
423         s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
424         s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
425         pix1 += line_size;
426         pix2 += line_size;
427         pix3 += line_size;
428     }
429     return s;
430 }
431
432 int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
433 {
434     int s, i;
435     UINT8 *pix3 = pix2 + line_size;
436
437     s = 0;
438     for(i=0;i<h;i++) {
439         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
440         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
441         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
442         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
443         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
444         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
445         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
446         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
447         s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
448         s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
449         s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
450         s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
451         s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
452         s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
453         s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
454         s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
455         pix1 += line_size;
456         pix2 += line_size;
457         pix3 += line_size;
458     }
459     return s;
460 }
461
462 /* permute block according so that it corresponds to the MMX idct
463    order */
464 #ifdef SIMPLE_IDCT
465  /* general permutation, but perhaps slightly slower */
466 void block_permute(INT16 *block)
467 {
468         int i;
469         INT16 temp[64];
470
471         for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
472
473         for(i=0; i<64; i++) block[i] = temp[i];
474 }
475 #else
476
477 void block_permute(INT16 *block)
478 {
479     int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
480     int i;
481
482     for(i=0;i<8;i++) {
483         tmp1 = block[1];
484         tmp2 = block[2];
485         tmp3 = block[3];
486         tmp4 = block[4];
487         tmp5 = block[5];
488         tmp6 = block[6];
489         block[1] = tmp2;
490         block[2] = tmp4;
491         block[3] = tmp6;
492         block[4] = tmp1;
493         block[5] = tmp3;
494         block[6] = tmp5;
495         block += 8;
496     }
497 }
498 #endif
499
500 void dsputil_init(void)
501 {
502     int i, j;
503     int use_permuted_idct;
504
505     for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
506     for(i=0;i<MAX_NEG_CROP;i++) {
507         cropTbl[i] = 0;
508         cropTbl[i + MAX_NEG_CROP + 256] = 255;
509     }
510
511     for(i=0;i<512;i++) {
512         squareTbl[i] = (i - 256) * (i - 256);
513     }
514
515 #ifdef SIMPLE_IDCT
516     ff_idct = simple_idct;
517 #else
518     ff_idct = j_rev_dct;
519 #endif
520     get_pixels = get_pixels_c;
521     put_pixels_clamped = put_pixels_clamped_c;
522     add_pixels_clamped = add_pixels_clamped_c;
523
524     pix_abs16x16 = pix_abs16x16_c;
525     pix_abs16x16_x2 = pix_abs16x16_x2_c;
526     pix_abs16x16_y2 = pix_abs16x16_y2_c;
527     pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
528     av_fdct = jpeg_fdct_ifast;
529
530     use_permuted_idct = 1;
531
532 #ifdef HAVE_MMX
533     dsputil_init_mmx();
534 #endif
535 #ifdef ARCH_ARMV4L
536     dsputil_init_armv4l();
537 #endif
538 #ifdef HAVE_MLIB
539     dsputil_init_mlib();
540     use_permuted_idct = 0;
541 #endif
542 #ifdef ARCH_ALPHA
543     dsputil_init_alpha();
544     use_permuted_idct = 0;
545 #endif
546
547 #ifdef SIMPLE_IDCT
548     if(ff_idct == simple_idct) use_permuted_idct=0;
549 #endif
550
551     if(use_permuted_idct)
552 #ifdef SIMPLE_IDCT
553         for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
554 #else
555         for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
556 #endif
557     else
558         for(i=0; i<64; i++) permutation[i]=i;
559
560     for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
561     for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
562     
563     if (use_permuted_idct) {
564         /* permute for IDCT */
565         for(i=0;i<64;i++) {
566             j = zigzag_direct[i];
567             zigzag_direct[i] = block_permute_op(j);
568             j = ff_alternate_horizontal_scan[i];
569             ff_alternate_horizontal_scan[i] = block_permute_op(j);
570             j = ff_alternate_vertical_scan[i];
571             ff_alternate_vertical_scan[i] = block_permute_op(j);
572         }
573         block_permute(default_intra_matrix);
574         block_permute(default_non_intra_matrix);
575     }
576     
577     build_zigzag_end();
578 }