* mmx/mmx2/3dnow postprocess code.
*/
-#include "x86_cpu.h"
+#include "libavutil/x86_cpu.h"
#define ALIGN_MASK "$-8"
static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
int numEq= 0, dcOk;
src+= stride*4; // src points to begin of the 8x8 Block
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t"
: : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
);
- asm volatile(
+ __asm__ volatile(
"lea (%2, %3), %%"REG_a" \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
- asm volatile( //"movv %0 %1 %2\n\t"
+ __asm__ volatile( //"movv %0 %1 %2\n\t"
"movq %2, %%mm0 \n\t" // QP,..., QP
"pxor %%mm4, %%mm4 \n\t"
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
// FIXME rounding
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" // 0
"movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
"leal (%0, %1), %%"REG_a" \n\t"
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t" // 0
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
memcpy(tmp+8, src+stride*8, 8);
*/
src+= stride*4;
- asm volatile(
+ __asm__ volatile(
#if 0 //slightly more accurate and slightly slower
"pxor %%mm7, %%mm7 \n\t" // 0
*/
#elif defined (HAVE_MMX)
src+= stride*4;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars
"and "ALIGN_MASK", %%"REG_c" \n\t" // align
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
- asm volatile(
+ __asm__ volatile(
"pxor %%mm6, %%mm6 \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t"
"movq %2, %%mm0 \n\t"
f= (f + 8)>>4;
#ifdef DEBUG_DERING_THRESHOLD
- asm volatile("emms\n\t":);
+ __asm__ volatile("emms\n\t":);
{
static long long numPixels=0;
if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= 4*stride;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_c" \n\t"
// 0 1 2 3 4 5 6 7 8 9
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
"lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*4;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
"pxor %%mm7, %%mm7 \n\t"
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*4;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
"pxor %%mm7, %%mm7 \n\t"
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= 4*stride;
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
// 0 1 2 3 4 5 6 7 8 9
#ifdef HAVE_MMX
src+= 4*stride;
#ifdef HAVE_MMX2
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
// 0 1 2 3 4 5 6 7 8 9
);
#else // MMX without MMX2
- asm volatile(
+ __asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
// 0 1 2 3 4 5 6 7 8 9
*/
static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
{
- asm(
+ __asm__(
"lea (%0, %1), %%"REG_a" \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
*/
static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
{
- asm(
+ __asm__(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a",%1,4), %%"REG_d" \n\t"
// 0 1 2 3 4 5 6 7 8 9
#ifndef HAVE_ALTIVEC
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
- uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
+ uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
{
// to save a register (FIXME do this outside of the loops)
- tempBluredPast[127]= maxNoise[0];
- tempBluredPast[128]= maxNoise[1];
- tempBluredPast[129]= maxNoise[2];
+ tempBlurredPast[127]= maxNoise[0];
+ tempBlurredPast[128]= maxNoise[1];
+ tempBlurredPast[129]= maxNoise[2];
#define FAST_L2_DIFF
//#define L1_DIFF //u should change the thresholds too if u try that one
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
- asm volatile(
+ __asm__ volatile(
"lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride
"lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride
"lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride
"4: \n\t"
- :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
+ :: "r" (src), "r" (tempBlurred), "r"((long)stride), "m" (tempBlurredPast)
: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
);
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
for(y=0; y<8; y++){
int x;
for(x=0; x<8; x++){
- int ref= tempBlured[ x + y*stride ];
+ int ref= tempBlurred[ x + y*stride ];
int cur= src[ x + y*stride ];
int d1=ref - cur;
// if(x==0 || x==7) d1+= d1>>1;
i=d;
d= (
4*d
- +(*(tempBluredPast-256))
- +(*(tempBluredPast-1))+ (*(tempBluredPast+1))
- +(*(tempBluredPast+256))
+ +(*(tempBlurredPast-256))
+ +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))
+ +(*(tempBlurredPast+256))
+4)>>3;
- *tempBluredPast=i;
-// ((*tempBluredPast)*3 + d + 2)>>2;
+ *tempBlurredPast=i;
+// ((*tempBlurredPast)*3 + d + 2)>>2;
/*
Switch between
for(y=0; y<8; y++){
int x;
for(x=0; x<8; x++){
- int ref= tempBlured[ x + y*stride ];
+ int ref= tempBlurred[ x + y*stride ];
int cur= src[ x + y*stride ];
- tempBlured[ x + y*stride ]=
+ tempBlurred[ x + y*stride ]=
src[ x + y*stride ]=
(ref + cur + 1)>>1;
}
for(y=0; y<8; y++){
int x;
for(x=0; x<8; x++){
- tempBlured[ x + y*stride ]= src[ x + y*stride ];
+ tempBlurred[ x + y*stride ]= src[ x + y*stride ];
}
}
}
for(y=0; y<8; y++){
int x;
for(x=0; x<8; x++){
- int ref= tempBlured[ x + y*stride ];
+ int ref= tempBlurred[ x + y*stride ];
int cur= src[ x + y*stride ];
- tempBlured[ x + y*stride ]=
+ tempBlurred[ x + y*stride ]=
src[ x + y*stride ]=
(ref*7 + cur + 4)>>3;
}
for(y=0; y<8; y++){
int x;
for(x=0; x<8; x++){
- int ref= tempBlured[ x + y*stride ];
+ int ref= tempBlurred[ x + y*stride ];
int cur= src[ x + y*stride ];
- tempBlured[ x + y*stride ]=
+ tempBlurred[ x + y*stride ]=
src[ x + y*stride ]=
(ref*3 + cur + 2)>>2;
}
int64_t sums[10*8*2];
src+= step*3; // src points to begin of the 8x8 Block
//START_TIMER
- asm volatile(
+ __asm__ volatile(
"movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t"
: : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
);
- asm volatile(
+ __asm__ volatile(
"lea (%2, %3), %%"REG_a" \n\t"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
long offset= -8*step;
int64_t *temp_sums= sums;
- asm volatile(
+ __asm__ volatile(
"movq %2, %%mm0 \n\t" // QP,..., QP
"pxor %%mm4, %%mm4 \n\t"
src+= step; // src points to begin of the 8x8 Block
- asm volatile(
+ __asm__ volatile(
"movq %4, %%mm6 \n\t"
"pcmpeqb %%mm5, %%mm5 \n\t"
"pxor %%mm6, %%mm5 \n\t"
if(eq_mask != -1LL){
uint8_t *temp_src= src;
- asm volatile(
+ __asm__ volatile(
"pxor %%mm7, %%mm7 \n\t"
"lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars
"and "ALIGN_MASK", %%"REG_c" \n\t" // align
#endif
if(levelFix){
#ifdef HAVE_MMX
- asm volatile(
+ __asm__ volatile(
"movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
"movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale
"lea (%2,%4), %%"REG_a" \n\t"
#endif //HAVE_MMX
}else{
#ifdef HAVE_MMX
- asm volatile(
+ __asm__ volatile(
"lea (%0,%2), %%"REG_a" \n\t"
"lea (%1,%3), %%"REG_d" \n\t"
static inline void RENAME(duplicate)(uint8_t src[], int stride)
{
#ifdef HAVE_MMX
- asm volatile(
+ __asm__ volatile(
"movq (%0), %%mm0 \n\t"
"add %1, %0 \n\t"
"movq %%mm0, (%0) \n\t"
prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
*/
- asm(
+ __asm__(
"mov %4, %%"REG_a" \n\t"
"shr $2, %%"REG_a" \n\t"
"and $6, %%"REG_a" \n\t"
}
c.QP= QP;
#ifdef HAVE_MMX
- asm volatile(
+ __asm__ volatile(
"movd %1, %%mm7 \n\t"
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
*/
- asm(
+ __asm__(
"mov %4, %%"REG_a" \n\t"
"shr $2, %%"REG_a" \n\t"
"and $6, %%"REG_a" \n\t"
if(mode & TEMP_NOISE_FILTER)
{
RENAME(tempNoiseReducer)(dstBlock-8, stride,
- c.tempBlured[isColor] + y*dstStride + x,
- c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
+ c.tempBlurred[isColor] + y*dstStride + x,
+ c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
c.ppMode.maxTmpNoise);
}
}
if((mode & TEMP_NOISE_FILTER)){
RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
- c.tempBlured[isColor] + y*dstStride + x,
- c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
+ c.tempBlurred[isColor] + y*dstStride + x,
+ c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
c.ppMode.maxTmpNoise);
}
}*/
}
#ifdef HAVE_3DNOW
- asm volatile("femms");
+ __asm__ volatile("femms");
#elif defined (HAVE_MMX)
- asm volatile("emms");
+ __asm__ volatile("emms");
#endif
#ifdef DEBUG_BRIGHTNESS