*
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
*
- * This library is free software; you can redistribute it and/or
+ * based upon some outcommented C code from mpeg2dec (idct_mmx.c
+ * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
+ *
+ * Alpha optimizations by Måns Rullgård <mans@mansr.com>
+ * and Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
*
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * based upon some outcommented c code from mpeg2dec (idct_mmx.c
- * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
- *
- * Alpha optimiziations by Måns Rullgård <mru@users.sourceforge.net>
- * and Falk Hueffner <falk@debian.org>
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavcodec/dsputil.h"
+#include "dsputil_alpha.h"
#include "asm.h"
-#include "../dsputil.h"
-
-extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
- int line_size);
-extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
- int line_size);
// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
// W4 is actually exactly 16384, but using 16383 works around
static inline int idct_row(DCTELEM *row)
{
int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t;
- uint64_t l, r;
+ uint64_t l, r, t2;
l = ldq(row);
r = ldq(row + 4);
if (l == 0 && r == 0)
return 0;
-
+
a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
if (((l & ~0xffffUL) | r) == 0) {
a0 >>= ROW_SHIFT;
- a0 = (uint16_t) a0;
- a0 |= a0 << 16;
- a0 |= a0 << 32;
-
- stq(a0, row);
- stq(a0, row + 4);
+ t2 = (uint16_t) a0;
+ t2 |= t2 << 16;
+ t2 |= t2 << 32;
+
+ stq(t2, row);
+ stq(t2, row + 4);
return 1;
}
b3 -= W5 * t;
}
-
+
t = extwl(r, 2); /* row[5] */
if (t) {
t = sextw(t);
{
int i;
uint64_t l, r;
- uint64_t *lcol = (uint64_t *) col;
for (i = 0; i < 8; ++i) {
- int_fast32_t a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4;
+ int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
a0 *= W4;
- col[0] = a0 >> COL_SHIFT;
- ++col;
+ col[i] = a0 >> COL_SHIFT;
}
- l = lcol[0];
- r = lcol[1];
- lcol[ 2] = l; lcol[ 3] = r;
- lcol[ 4] = l; lcol[ 5] = r;
- lcol[ 6] = l; lcol[ 7] = r;
- lcol[ 8] = l; lcol[ 9] = r;
- lcol[10] = l; lcol[11] = r;
- lcol[12] = l; lcol[13] = r;
- lcol[14] = l; lcol[15] = r;
+ l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
+ stq(l, col + 2 * 4); stq(r, col + 3 * 4);
+ stq(l, col + 4 * 4); stq(r, col + 5 * 4);
+ stq(l, col + 6 * 4); stq(r, col + 7 * 4);
+ stq(l, col + 8 * 4); stq(r, col + 9 * 4);
+ stq(l, col + 10 * 4); stq(r, col + 11 * 4);
+ stq(l, col + 12 * 4); stq(r, col + 13 * 4);
+ stq(l, col + 14 * 4); stq(r, col + 15 * 4);
}
-void simple_idct_axp(DCTELEM *block)
+void ff_simple_idct_axp(DCTELEM *block)
{
int i;
if (rowsZero) {
idct_col2(block);
} else if (rowsConstant) {
- uint64_t *lblock = (uint64_t *) block;
-
idct_col(block);
for (i = 0; i < 8; i += 2) {
- uint64_t v = (uint16_t) block[i * 8];
- uint64_t w = (uint16_t) block[i * 8 + 8];
+ uint64_t v = (uint16_t) block[0];
+ uint64_t w = (uint16_t) block[8];
v |= v << 16;
w |= w << 16;
v |= v << 32;
w |= w << 32;
- lblock[0] = v;
- lblock[1] = v;
- lblock[2] = w;
- lblock[3] = w;
- lblock += 4;
+ stq(v, block + 0 * 4);
+ stq(v, block + 1 * 4);
+ stq(w, block + 2 * 4);
+ stq(w, block + 3 * 4);
+ block += 4 * 4;
}
} else {
for (i = 0; i < 8; i++)
}
}
-void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
+void ff_simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
{
- simple_idct_axp(block);
+ ff_simple_idct_axp(block);
put_pixels_clamped_axp_p(block, dest, line_size);
}
-void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
+void ff_simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
{
- simple_idct_axp(block);
+ ff_simple_idct_axp(block);
add_pixels_clamped_axp_p(block, dest, line_size);
}