#include <unistd.h>
#include <math.h>
+#include "libavutil/common.h"
#include "dsputil.h"
#include "simple_idct.h"
#include "faandct.h"
#include "faanidct.h"
-
-#ifndef MAX
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-#endif
+#include "i386/idct_xvid.h"
#undef printf
#undef random
/* reference fdct/idct */
extern void fdct(DCTELEM *block);
extern void idct(DCTELEM *block);
-extern void ff_idct_xvid_mmx(DCTELEM *block);
-extern void ff_idct_xvid_mmx2(DCTELEM *block);
extern void init_fdct();
extern void ff_mmx_idct(DCTELEM *data);
struct algo {
- char *name;
+ const char *name;
enum { FDCT, IDCT } is_idct;
void (* func) (DCTELEM *block);
void (* ref) (DCTELEM *block);
- enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
+ enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
int mm_support;
};
{"SIMPLE-MMX", 1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, MM_MMX},
{"XVID-MMX", 1, ff_idct_xvid_mmx, idct, NO_PERM, MM_MMX},
{"XVID-MMX2", 1, ff_idct_xvid_mmx2, idct, NO_PERM, MM_MMXEXT},
+ {"XVID-SSE2", 1, ff_idct_xvid_sse2, idct, SSE2_PERM, MM_SSE2},
#endif
#ifdef HAVE_ALTIVEC
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
};
+static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+
void idct_mmx_init(void)
{
int i;
}
}
-static DCTELEM block[64] __attribute__ ((aligned (8)));
+static DCTELEM block[64] __attribute__ ((aligned (16)));
static DCTELEM block1[64] __attribute__ ((aligned (8)));
static DCTELEM block_org[64] __attribute__ ((aligned (8)));
for(i=0;i<64;i++)
block[idct_simple_mmx_perm[i]] = block1[i];
+ } else if (form == SSE2_PERM) {
+ for(i=0; i<64; i++)
+ block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
} else {
for(i=0; i<64; i++)
block[i]= block1[i];
}
#endif
}
- for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
+ for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
#if 1 // dump systematic errors
for(i=0; i<64; i++){
init_fdct();
idct_mmx_init();
+
+#ifndef mm_flags
mm_flags = mm_support();
+#endif
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
for(i=0;i<MAX_NEG_CROP;i++) {