* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
#include "libavcodec/dsputil.h"
-
-#include "gcc_fixes.h"
-
#include "dsputil_ppc.h"
#include "util_altivec.h"
+#include "types_altivec.h"
int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
sad = (vector unsigned int)vec_splat_u32(0);
- permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+ permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2
sum = (vector unsigned int)vec_splat_u32(0);
- permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+ permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
for (i = 0; i < h; i++) {
}
}
+
+static void clear_block_altivec(DCTELEM *block) {
+ LOAD_ZERO;
+ vec_st(zero_s16v, 0, block);
+ vec_st(zero_s16v, 16, block);
+ vec_st(zero_s16v, 32, block);
+ vec_st(zero_s16v, 48, block);
+ vec_st(zero_s16v, 64, block);
+ vec_st(zero_s16v, 80, block);
+ vec_st(zero_s16v, 96, block);
+ vec_st(zero_s16v, 112, block);
+}
+
+
void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
register int i;
register vector unsigned char vdst, vsrc;
POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
{
register const vector signed short vprod1 =(const vector signed short)
- AVV( 1,-1, 1,-1, 1,-1, 1,-1);
+ { 1,-1, 1,-1, 1,-1, 1,-1 };
register const vector signed short vprod2 =(const vector signed short)
- AVV( 1, 1,-1,-1, 1, 1,-1,-1);
+ { 1, 1,-1,-1, 1, 1,-1,-1 };
register const vector signed short vprod3 =(const vector signed short)
- AVV( 1, 1, 1, 1,-1,-1,-1,-1);
+ { 1, 1, 1, 1,-1,-1,-1,-1 };
register const vector unsigned char perm1 = (const vector unsigned char)
- AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
- 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
+ {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
+ 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D};
register const vector unsigned char perm2 = (const vector unsigned char)
- AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
- 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
+ {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
+ 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B};
register const vector unsigned char perm3 = (const vector unsigned char)
- AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+ {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
#define ONEITERBUTTERFLY(i, res) \
{ \
static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) {
int sum;
register vector signed short
- temp0 REG_v(v0),
- temp1 REG_v(v1),
- temp2 REG_v(v2),
- temp3 REG_v(v3),
- temp4 REG_v(v4),
- temp5 REG_v(v5),
- temp6 REG_v(v6),
- temp7 REG_v(v7);
+ temp0 __asm__ ("v0"),
+ temp1 __asm__ ("v1"),
+ temp2 __asm__ ("v2"),
+ temp3 __asm__ ("v3"),
+ temp4 __asm__ ("v4"),
+ temp5 __asm__ ("v5"),
+ temp6 __asm__ ("v6"),
+ temp7 __asm__ ("v7");
register vector signed short
- temp0S REG_v(v8),
- temp1S REG_v(v9),
- temp2S REG_v(v10),
- temp3S REG_v(v11),
- temp4S REG_v(v12),
- temp5S REG_v(v13),
- temp6S REG_v(v14),
- temp7S REG_v(v15);
- register const vector unsigned char vzero REG_v(v31)=
+ temp0S __asm__ ("v8"),
+ temp1S __asm__ ("v9"),
+ temp2S __asm__ ("v10"),
+ temp3S __asm__ ("v11"),
+ temp4S __asm__ ("v12"),
+ temp5S __asm__ ("v13"),
+ temp6S __asm__ ("v14"),
+ temp7S __asm__ ("v15");
+ register const vector unsigned char vzero __asm__ ("v31") =
(const vector unsigned char)vec_splat_u8(0);
{
- register const vector signed short vprod1 REG_v(v16)=
- (const vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
- register const vector signed short vprod2 REG_v(v17)=
- (const vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
- register const vector signed short vprod3 REG_v(v18)=
- (const vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
- register const vector unsigned char perm1 REG_v(v19)=
+ register const vector signed short vprod1 __asm__ ("v16") =
+ (const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 };
+ register const vector signed short vprod2 __asm__ ("v17") =
+ (const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 };
+ register const vector signed short vprod3 __asm__ ("v18") =
+ (const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 };
+ register const vector unsigned char perm1 __asm__ ("v19") =
(const vector unsigned char)
- AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
- 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
- register const vector unsigned char perm2 REG_v(v20)=
+ {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
+ 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D};
+ register const vector unsigned char perm2 __asm__ ("v20") =
(const vector unsigned char)
- AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
- 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
- register const vector unsigned char perm3 REG_v(v21)=
+ {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
+ 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B};
+ register const vector unsigned char perm3 __asm__ ("v21") =
(const vector unsigned char)
- AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-
-#define ONEITERBUTTERFLY(i, res1, res2) \
- { \
- register vector unsigned char src1 REG_v(v22), \
- src2 REG_v(v23), \
- dst1 REG_v(v24), \
- dst2 REG_v(v25), \
- srcO REG_v(v22), \
- dstO REG_v(v23); \
+ {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
+
+#define ONEITERBUTTERFLY(i, res1, res2) \
+ { \
+ register vector unsigned char src1 __asm__ ("v22"), \
+ src2 __asm__ ("v23"), \
+ dst1 __asm__ ("v24"), \
+ dst2 __asm__ ("v25"), \
+ srcO __asm__ ("v22"), \
+ dstO __asm__ ("v23"); \
\
- register vector signed short srcV REG_v(v24), \
- dstV REG_v(v25), \
- srcW REG_v(v26), \
- dstW REG_v(v27), \
- but0 REG_v(v28), \
- but0S REG_v(v29), \
- op1 REG_v(v30), \
- but1 REG_v(v22), \
- op1S REG_v(v23), \
- but1S REG_v(v24), \
- op2 REG_v(v25), \
- but2 REG_v(v26), \
- op2S REG_v(v27), \
- but2S REG_v(v28), \
- op3 REG_v(v29), \
- op3S REG_v(v30); \
+ register vector signed short srcV __asm__ ("v24"), \
+ dstV __asm__ ("v25"), \
+ srcW __asm__ ("v26"), \
+ dstW __asm__ ("v27"), \
+ but0 __asm__ ("v28"), \
+ but0S __asm__ ("v29"), \
+ op1 __asm__ ("v30"), \
+ but1 __asm__ ("v22"), \
+ op1S __asm__ ("v23"), \
+ but1S __asm__ ("v24"), \
+ op2 __asm__ ("v25"), \
+ but2 __asm__ ("v26"), \
+ op2S __asm__ ("v27"), \
+ but2S __asm__ ("v28"), \
+ op3 __asm__ ("v29"), \
+ op3S __asm__ ("v30"); \
\
src1 = vec_ld(stride * i, src); \
src2 = vec_ld((stride * i) + 16, src); \
c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec;
c->get_pixels = get_pixels_altivec;
+ c->clear_block = clear_block_altivec;
c->add_bytes= add_bytes_altivec;
c->put_pixels_tab[0][0] = put_pixels16_altivec;
/* the two functions do the same thing, so use the same code */
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
- if (ENABLE_VORBIS_DECODER)
+ if (CONFIG_VORBIS_DECODER)
c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;
}