3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "libavutil/attributes.h"
29 #include "libavutil/internal.h"
31 #include "copy_block.h"
33 #include "simple_idct.h"
34 #include "mpegvideo.h"
37 uint32_t ff_square_tab[512] = { 0, };
39 static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
43 uint32_t *sq = ff_square_tab + 256;
45 for (i = 0; i < h; i++) {
46 s += sq[pix1[0] - pix2[0]];
47 s += sq[pix1[1] - pix2[1]];
48 s += sq[pix1[2] - pix2[2]];
49 s += sq[pix1[3] - pix2[3]];
56 static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
60 uint32_t *sq = ff_square_tab + 256;
62 for (i = 0; i < h; i++) {
63 s += sq[pix1[0] - pix2[0]];
64 s += sq[pix1[1] - pix2[1]];
65 s += sq[pix1[2] - pix2[2]];
66 s += sq[pix1[3] - pix2[3]];
67 s += sq[pix1[4] - pix2[4]];
68 s += sq[pix1[5] - pix2[5]];
69 s += sq[pix1[6] - pix2[6]];
70 s += sq[pix1[7] - pix2[7]];
77 static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
81 uint32_t *sq = ff_square_tab + 256;
83 for (i = 0; i < h; i++) {
84 s += sq[pix1[0] - pix2[0]];
85 s += sq[pix1[1] - pix2[1]];
86 s += sq[pix1[2] - pix2[2]];
87 s += sq[pix1[3] - pix2[3]];
88 s += sq[pix1[4] - pix2[4]];
89 s += sq[pix1[5] - pix2[5]];
90 s += sq[pix1[6] - pix2[6]];
91 s += sq[pix1[7] - pix2[7]];
92 s += sq[pix1[8] - pix2[8]];
93 s += sq[pix1[9] - pix2[9]];
94 s += sq[pix1[10] - pix2[10]];
95 s += sq[pix1[11] - pix2[11]];
96 s += sq[pix1[12] - pix2[12]];
97 s += sq[pix1[13] - pix2[13]];
98 s += sq[pix1[14] - pix2[14]];
99 s += sq[pix1[15] - pix2[15]];
107 static int sum_abs_dctelem_c(int16_t *block)
111 for (i = 0; i < 64; i++)
112 sum += FFABS(block[i]);
116 #define avg2(a, b) ((a + b + 1) >> 1)
117 #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
119 static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
120 int line_size, int h)
124 for (i = 0; i < h; i++) {
125 s += abs(pix1[0] - pix2[0]);
126 s += abs(pix1[1] - pix2[1]);
127 s += abs(pix1[2] - pix2[2]);
128 s += abs(pix1[3] - pix2[3]);
129 s += abs(pix1[4] - pix2[4]);
130 s += abs(pix1[5] - pix2[5]);
131 s += abs(pix1[6] - pix2[6]);
132 s += abs(pix1[7] - pix2[7]);
133 s += abs(pix1[8] - pix2[8]);
134 s += abs(pix1[9] - pix2[9]);
135 s += abs(pix1[10] - pix2[10]);
136 s += abs(pix1[11] - pix2[11]);
137 s += abs(pix1[12] - pix2[12]);
138 s += abs(pix1[13] - pix2[13]);
139 s += abs(pix1[14] - pix2[14]);
140 s += abs(pix1[15] - pix2[15]);
147 static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
148 int line_size, int h)
152 for (i = 0; i < h; i++) {
153 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
154 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
155 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
156 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
157 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
158 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
159 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
160 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
161 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
162 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
163 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
164 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
165 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
166 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
167 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
168 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
175 static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
176 int line_size, int h)
179 uint8_t *pix3 = pix2 + line_size;
181 for (i = 0; i < h; i++) {
182 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
183 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
184 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
185 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
186 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
187 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
188 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
189 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
190 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
191 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
192 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
193 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
194 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
195 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
196 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
197 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
205 static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
206 int line_size, int h)
209 uint8_t *pix3 = pix2 + line_size;
211 for (i = 0; i < h; i++) {
212 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
213 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
214 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
215 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
216 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
217 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
218 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
219 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
220 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
221 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
222 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
223 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
224 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
225 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
226 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
227 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
235 static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
236 int line_size, int h)
240 for (i = 0; i < h; i++) {
241 s += abs(pix1[0] - pix2[0]);
242 s += abs(pix1[1] - pix2[1]);
243 s += abs(pix1[2] - pix2[2]);
244 s += abs(pix1[3] - pix2[3]);
245 s += abs(pix1[4] - pix2[4]);
246 s += abs(pix1[5] - pix2[5]);
247 s += abs(pix1[6] - pix2[6]);
248 s += abs(pix1[7] - pix2[7]);
255 static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
256 int line_size, int h)
260 for (i = 0; i < h; i++) {
261 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
262 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
263 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
264 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
265 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
266 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
267 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
268 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
275 static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
276 int line_size, int h)
279 uint8_t *pix3 = pix2 + line_size;
281 for (i = 0; i < h; i++) {
282 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
283 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
284 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
285 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
286 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
287 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
288 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
289 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
297 static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
298 int line_size, int h)
301 uint8_t *pix3 = pix2 + line_size;
303 for (i = 0; i < h; i++) {
304 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
305 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
306 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
307 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
308 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
309 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
310 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
311 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
319 static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h)
321 int score1 = 0, score2 = 0, x, y;
323 for (y = 0; y < h; y++) {
324 for (x = 0; x < 16; x++)
325 score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
327 for (x = 0; x < 15; x++)
328 score2 += FFABS(s1[x] - s1[x + stride] -
329 s1[x + 1] + s1[x + stride + 1]) -
330 FFABS(s2[x] - s2[x + stride] -
331 s2[x + 1] + s2[x + stride + 1]);
338 return score1 + FFABS(score2) * c->avctx->nsse_weight;
340 return score1 + FFABS(score2) * 8;
343 static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h)
345 int score1 = 0, score2 = 0, x, y;
347 for (y = 0; y < h; y++) {
348 for (x = 0; x < 8; x++)
349 score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
351 for (x = 0; x < 7; x++)
352 score2 += FFABS(s1[x] - s1[x + stride] -
353 s1[x + 1] + s1[x + stride + 1]) -
354 FFABS(s2[x] - s2[x + stride] -
355 s2[x + 1] + s2[x + stride + 1]);
362 return score1 + FFABS(score2) * c->avctx->nsse_weight;
364 return score1 + FFABS(score2) * 8;
367 static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
373 void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type)
377 memset(cmp, 0, sizeof(void *) * 6);
379 for (i = 0; i < 6; i++) {
380 switch (type & 0xFF) {
385 cmp[i] = c->hadamard8_diff[i];
391 cmp[i] = c->dct_sad[i];
394 cmp[i] = c->dct264_sad[i];
397 cmp[i] = c->dct_max[i];
400 cmp[i] = c->quant_psnr[i];
429 av_log(NULL, AV_LOG_ERROR,
430 "internal error in cmp function selection\n");
435 #define BUTTERFLY2(o1, o2, i1, i2) \
439 #define BUTTERFLY1(x, y) \
448 #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
450 static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst,
451 uint8_t *src, int stride, int h)
453 int i, temp[64], sum = 0;
457 for (i = 0; i < 8; i++) {
458 // FIXME: try pointer walks
459 BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
460 src[stride * i + 0] - dst[stride * i + 0],
461 src[stride * i + 1] - dst[stride * i + 1]);
462 BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
463 src[stride * i + 2] - dst[stride * i + 2],
464 src[stride * i + 3] - dst[stride * i + 3]);
465 BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
466 src[stride * i + 4] - dst[stride * i + 4],
467 src[stride * i + 5] - dst[stride * i + 5]);
468 BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
469 src[stride * i + 6] - dst[stride * i + 6],
470 src[stride * i + 7] - dst[stride * i + 7]);
472 BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
473 BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
474 BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
475 BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
477 BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
478 BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
479 BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
480 BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
483 for (i = 0; i < 8; i++) {
484 BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
485 BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
486 BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
487 BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
489 BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
490 BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
491 BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
492 BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
494 sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) +
495 BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) +
496 BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) +
497 BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
502 static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
503 uint8_t *dummy, int stride, int h)
505 int i, temp[64], sum = 0;
509 for (i = 0; i < 8; i++) {
510 // FIXME: try pointer walks
511 BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
512 src[stride * i + 0], src[stride * i + 1]);
513 BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
514 src[stride * i + 2], src[stride * i + 3]);
515 BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
516 src[stride * i + 4], src[stride * i + 5]);
517 BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
518 src[stride * i + 6], src[stride * i + 7]);
520 BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
521 BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
522 BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
523 BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
525 BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
526 BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
527 BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
528 BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
531 for (i = 0; i < 8; i++) {
532 BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
533 BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
534 BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
535 BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
537 BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
538 BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
539 BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
540 BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
543 BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i])
544 + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
545 + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
546 + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
549 sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean
554 static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
555 uint8_t *src2, int stride, int h)
557 LOCAL_ALIGNED_16(int16_t, temp, [64]);
561 s->pdsp.diff_pixels(temp, src1, src2, stride);
563 return s->dsp.sum_abs_dctelem(temp);
569 const int s07 = SRC(0) + SRC(7); \
570 const int s16 = SRC(1) + SRC(6); \
571 const int s25 = SRC(2) + SRC(5); \
572 const int s34 = SRC(3) + SRC(4); \
573 const int a0 = s07 + s34; \
574 const int a1 = s16 + s25; \
575 const int a2 = s07 - s34; \
576 const int a3 = s16 - s25; \
577 const int d07 = SRC(0) - SRC(7); \
578 const int d16 = SRC(1) - SRC(6); \
579 const int d25 = SRC(2) - SRC(5); \
580 const int d34 = SRC(3) - SRC(4); \
581 const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \
582 const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \
583 const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \
584 const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \
586 DST(1, a4 + (a7 >> 2)); \
587 DST(2, a2 + (a3 >> 1)); \
588 DST(3, a5 + (a6 >> 2)); \
590 DST(5, a6 - (a5 >> 2)); \
591 DST(6, (a2 >> 1) - a3); \
592 DST(7, (a4 >> 2) - a7); \
595 static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
596 uint8_t *src2, int stride, int h)
601 s->pdsp.diff_pixels(dct[0], src1, src2, stride);
603 #define SRC(x) dct[i][x]
604 #define DST(x, v) dct[i][x] = v
605 for (i = 0; i < 8; i++)
610 #define SRC(x) dct[x][i]
611 #define DST(x, v) sum += FFABS(v)
612 for (i = 0; i < 8; i++)
620 static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
621 uint8_t *src2, int stride, int h)
623 LOCAL_ALIGNED_16(int16_t, temp, [64]);
628 s->pdsp.diff_pixels(temp, src1, src2, stride);
631 for (i = 0; i < 64; i++)
632 sum = FFMAX(sum, FFABS(temp[i]));
637 static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
638 uint8_t *src2, int stride, int h)
640 LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]);
641 int16_t *const bak = temp + 64;
647 s->pdsp.diff_pixels(temp, src1, src2, stride);
649 memcpy(bak, temp, 64 * sizeof(int16_t));
651 s->block_last_index[0 /* FIXME */] =
652 s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
653 s->dct_unquantize_inter(s, temp, 0, s->qscale);
654 ff_simple_idct_8(temp); // FIXME
656 for (i = 0; i < 64; i++)
657 sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
662 static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
665 const uint8_t *scantable = s->intra_scantable.permutated;
666 LOCAL_ALIGNED_16(int16_t, temp, [64]);
667 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
668 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
669 int i, last, run, bits, level, distortion, start_i;
670 const int esc_length = s->ac_esc_length;
671 uint8_t *length, *last_length;
675 copy_block8(lsrc1, src1, 8, stride, 8);
676 copy_block8(lsrc2, src2, 8, stride, 8);
678 s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
680 s->block_last_index[0 /* FIXME */] =
682 s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
688 length = s->intra_ac_vlc_length;
689 last_length = s->intra_ac_vlc_last_length;
690 bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
693 length = s->inter_ac_vlc_length;
694 last_length = s->inter_ac_vlc_last_length;
697 if (last >= start_i) {
699 for (i = start_i; i < last; i++) {
700 int j = scantable[i];
705 if ((level & (~127)) == 0)
706 bits += length[UNI_AC_ENC_INDEX(run, level)];
715 level = temp[i] + 64;
717 av_assert2(level - 64);
719 if ((level & (~127)) == 0) {
720 bits += last_length[UNI_AC_ENC_INDEX(run, level)];
727 s->dct_unquantize_intra(s, temp, 0, s->qscale);
729 s->dct_unquantize_inter(s, temp, 0, s->qscale);
732 s->idsp.idct_add(lsrc2, 8, temp);
734 distortion = s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
736 return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7);
739 static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
742 const uint8_t *scantable = s->intra_scantable.permutated;
743 LOCAL_ALIGNED_16(int16_t, temp, [64]);
744 int i, last, run, bits, level, start_i;
745 const int esc_length = s->ac_esc_length;
746 uint8_t *length, *last_length;
750 s->pdsp.diff_pixels(temp, src1, src2, stride);
752 s->block_last_index[0 /* FIXME */] =
754 s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
760 length = s->intra_ac_vlc_length;
761 last_length = s->intra_ac_vlc_last_length;
762 bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
765 length = s->inter_ac_vlc_length;
766 last_length = s->inter_ac_vlc_last_length;
769 if (last >= start_i) {
771 for (i = start_i; i < last; i++) {
772 int j = scantable[i];
777 if ((level & (~127)) == 0)
778 bits += length[UNI_AC_ENC_INDEX(run, level)];
787 level = temp[i] + 64;
789 av_assert2(level - 64);
791 if ((level & (~127)) == 0)
792 bits += last_length[UNI_AC_ENC_INDEX(run, level)];
800 #define VSAD_INTRA(size) \
801 static int vsad_intra ## size ## _c(MpegEncContext *c, \
802 uint8_t *s, uint8_t *dummy, \
805 int score = 0, x, y; \
807 for (y = 1; y < h; y++) { \
808 for (x = 0; x < size; x += 4) { \
809 score += FFABS(s[x] - s[x + stride]) + \
810 FFABS(s[x + 1] - s[x + stride + 1]) + \
811 FFABS(s[x + 2] - s[x + 2 + stride]) + \
812 FFABS(s[x + 3] - s[x + 3 + stride]); \
823 static int vsad ## size ## _c(MpegEncContext *c, \
824 uint8_t *s1, uint8_t *s2, \
827 int score = 0, x, y; \
829 for (y = 1; y < h; y++) { \
830 for (x = 0; x < size; x++) \
831 score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
841 #define SQ(a) ((a) * (a))
842 #define VSSE_INTRA(size) \
843 static int vsse_intra ## size ## _c(MpegEncContext *c, \
844 uint8_t *s, uint8_t *dummy, \
847 int score = 0, x, y; \
849 for (y = 1; y < h; y++) { \
850 for (x = 0; x < size; x += 4) { \
851 score += SQ(s[x] - s[x + stride]) + \
852 SQ(s[x + 1] - s[x + stride + 1]) + \
853 SQ(s[x + 2] - s[x + stride + 2]) + \
854 SQ(s[x + 3] - s[x + stride + 3]); \
865 static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, \
868 int score = 0, x, y; \
870 for (y = 1; y < h; y++) { \
871 for (x = 0; x < size; x++) \
872 score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]); \
882 #define WRAPPER8_16_SQ(name8, name16) \
883 static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
888 score += name8(s, dst, src, stride, 8); \
889 score += name8(s, dst + 8, src + 8, stride, 8); \
893 score += name8(s, dst, src, stride, 8); \
894 score += name8(s, dst + 8, src + 8, stride, 8); \
899 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
900 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
901 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
903 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
905 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
906 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
907 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
908 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
910 /* init static data */
911 av_cold void ff_dsputil_static_init(void)
915 for (i = 0; i < 512; i++)
916 ff_square_tab[i] = (i - 256) * (i - 256);
919 int ff_check_alignment(void)
921 static int did_fail = 0;
922 LOCAL_ALIGNED_16(int, aligned, [4]);
924 if ((intptr_t)aligned & 15) {
926 #if HAVE_MMX || HAVE_ALTIVEC
927 av_log(NULL, AV_LOG_ERROR,
928 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
929 "and may be very slow or crash. This is not a bug in libavcodec,\n"
930 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
931 "Do not report crashes to FFmpeg developers.\n");
940 av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
942 ff_check_alignment();
944 c->sum_abs_dctelem = sum_abs_dctelem_c;
946 /* TODO [0] 16 [1] 8 */
947 c->pix_abs[0][0] = pix_abs16_c;
948 c->pix_abs[0][1] = pix_abs16_x2_c;
949 c->pix_abs[0][2] = pix_abs16_y2_c;
950 c->pix_abs[0][3] = pix_abs16_xy2_c;
951 c->pix_abs[1][0] = pix_abs8_c;
952 c->pix_abs[1][1] = pix_abs8_x2_c;
953 c->pix_abs[1][2] = pix_abs8_y2_c;
954 c->pix_abs[1][3] = pix_abs8_xy2_c;
956 #define SET_CMP_FUNC(name) \
957 c->name[0] = name ## 16_c; \
958 c->name[1] = name ## 8x8_c;
960 SET_CMP_FUNC(hadamard8_diff)
961 c->hadamard8_diff[4] = hadamard8_intra16_c;
962 c->hadamard8_diff[5] = hadamard8_intra8x8_c;
963 SET_CMP_FUNC(dct_sad)
964 SET_CMP_FUNC(dct_max)
966 SET_CMP_FUNC(dct264_sad)
968 c->sad[0] = pix_abs16_c;
969 c->sad[1] = pix_abs8_c;
973 SET_CMP_FUNC(quant_psnr)
976 c->vsad[0] = vsad16_c;
977 c->vsad[1] = vsad8_c;
978 c->vsad[4] = vsad_intra16_c;
979 c->vsad[5] = vsad_intra8_c;
980 c->vsse[0] = vsse16_c;
981 c->vsse[1] = vsse8_c;
982 c->vsse[4] = vsse_intra16_c;
983 c->vsse[5] = vsse_intra8_c;
984 c->nsse[0] = nsse16_c;
985 c->nsse[1] = nsse8_c;
986 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
987 ff_dsputil_init_dwt(c);
991 ff_dsputil_init_alpha(c, avctx);
993 ff_dsputil_init_arm(c, avctx);
995 ff_dsputil_init_ppc(c, avctx);
997 ff_dsputil_init_x86(c, avctx);
1000 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
1002 ff_dsputil_init(c, avctx);
1005 av_cold void avpriv_dsputil_init(DSPContext *c, AVCodecContext *avctx)
1007 ff_dsputil_init(c, avctx);