]> git.sesse.net Git - ffmpeg/blobdiff - postproc/postprocess_template.c
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
[ffmpeg] / postproc / postprocess_template.c
index f24eccf2cecd243d2c09b964975e19d5c97ca52c..a75c9b4dc895f35eeb30ad1742b147c6f2f7b2ad 100644 (file)
@@ -56,8 +56,9 @@ asm volatile(
                "leal (%1, %2), %%eax                           \n\t"
 //     0       1       2       3       4       5       6       7       8       9
 //     %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2
-               "movq %3, %%mm7                                 \n\t" // mm7 = 0x7F
-               "movq %4, %%mm6                                 \n\t" // mm6 = 0x7D
+               "movq %3, %%mm7                                 \n\t" 
+               "movq %4, %%mm6                                 \n\t" 
+
                "movq (%1), %%mm0                               \n\t"
                "movq (%%eax), %%mm1                            \n\t"
                "psubb %%mm1, %%mm0                             \n\t" // mm0 = differnece
@@ -119,7 +120,7 @@ asm volatile(
 #endif
                "movd %%mm0, %0                                 \n\t"
                : "=r" (numEq)
-               : "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold)
+               : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
                : "%eax"
                );
        numEq= (-numEq) &0xFF;
@@ -150,6 +151,7 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c
                );
        return isOk==0;
 #else
+#if 1
        int x;
        const int QP= c->QP;
        src+= stride*3;
@@ -158,8 +160,26 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
        }
 
+       return 1;
+#else
+       int x;
+       const int QP= c->QP;
+       src+= stride*4;
+       for(x=0; x<BLOCK_SIZE; x++)
+       {
+               int min=255;
+               int max=0;
+               int y;
+               for(y=0; y<8; y++){
+                       int v= src[x + y*stride];
+                       if(v>max) max=v;
+                       if(v<min) min=v;
+               }
+               if(max-min > 2*QP) return 0;
+       }
        return 1;
 #endif
+#endif
 }
 
 /**
@@ -872,11 +892,8 @@ src-=8;
 */
 #elif defined (HAVE_MMX)
        src+= stride*4;
-
        asm volatile(
                "pxor %%mm7, %%mm7                              \n\t"
-               "leal (%0, %1), %%eax                           \n\t"
-               "leal (%%eax, %1, 4), %%edx                     \n\t"
                "leal -40(%%esp), %%ecx                         \n\t" // make space for 4 8-byte vars
                "andl $0xFFFFFFF8, %%ecx                        \n\t" // align
 //     0       1       2       3       4       5       6       7
@@ -888,12 +905,13 @@ src-=8;
                "punpcklbw %%mm7, %%mm0                         \n\t" // low part of line 0
                "punpckhbw %%mm7, %%mm1                         \n\t" // high part of line 0
 
-               "movq (%%eax), %%mm2                            \n\t"
+               "movq (%0, %1), %%mm2                           \n\t"
+               "leal (%0, %1, 2), %%eax                        \n\t"
                "movq %%mm2, %%mm3                              \n\t"
                "punpcklbw %%mm7, %%mm2                         \n\t" // low part of line 1
                "punpckhbw %%mm7, %%mm3                         \n\t" // high part of line 1
 
-               "movq (%%eax, %1), %%mm4                        \n\t"
+               "movq (%%eax), %%mm4                            \n\t"
                "movq %%mm4, %%mm5                              \n\t"
                "punpcklbw %%mm7, %%mm4                         \n\t" // low part of line 2
                "punpckhbw %%mm7, %%mm5                         \n\t" // high part of line 2
@@ -910,7 +928,7 @@ src-=8;
                "psubw %%mm2, %%mm0                             \n\t" // 2L0 - 5L1 + 5L2
                "psubw %%mm3, %%mm1                             \n\t" // 2H0 - 5H1 + 5H2
 
-               "movq (%%eax, %1, 2), %%mm2                     \n\t"
+               "movq (%%eax, %1), %%mm2                        \n\t"
                "movq %%mm2, %%mm3                              \n\t"
                "punpcklbw %%mm7, %%mm2                         \n\t" // L3
                "punpckhbw %%mm7, %%mm3                         \n\t" // H3
@@ -922,7 +940,7 @@ src-=8;
                "movq %%mm0, (%%ecx)                            \n\t" // 2L0 - 5L1 + 5L2 - 2L3
                "movq %%mm1, 8(%%ecx)                           \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
-               "movq (%0, %1, 4), %%mm0                        \n\t"
+               "movq (%%eax, %1, 2), %%mm0                     \n\t"
                "movq %%mm0, %%mm1                              \n\t"
                "punpcklbw %%mm7, %%mm0                         \n\t" // L4
                "punpckhbw %%mm7, %%mm1                         \n\t" // H4
@@ -936,12 +954,13 @@ src-=8;
                "psubw %%mm2, %%mm4                             \n\t" // 2L2 - L3 + L4
                "psubw %%mm3, %%mm5                             \n\t" // 2H2 - H3 + H4
 
+               "leal (%%eax, %1), %0                           \n\t"
                "psllw $2, %%mm2                                \n\t" // 4L3 - 4L4
                "psllw $2, %%mm3                                \n\t" // 4H3 - 4H4
                "psubw %%mm2, %%mm4                             \n\t" // 2L2 - 5L3 + 5L4
                "psubw %%mm3, %%mm5                             \n\t" // 2H2 - 5H3 + 5H4
 //50 opcodes so far
-               "movq (%%edx), %%mm2                            \n\t"
+               "movq (%0, %1, 2), %%mm2                        \n\t"
                "movq %%mm2, %%mm3                              \n\t"
                "punpcklbw %%mm7, %%mm2                         \n\t" // L5
                "punpckhbw %%mm7, %%mm3                         \n\t" // H5
@@ -950,10 +969,10 @@ src-=8;
                "psubw %%mm2, %%mm4                             \n\t" // 2L2 - 5L3 + 5L4 - 2L5
                "psubw %%mm3, %%mm5                             \n\t" // 2H2 - 5H3 + 5H4 - 2H5
 
-               "movq (%%edx, %1), %%mm6                        \n\t"
+               "movq (%%eax, %1, 4), %%mm6                     \n\t"
                "punpcklbw %%mm7, %%mm6                         \n\t" // L6
                "psubw %%mm6, %%mm2                             \n\t" // L5 - L6
-               "movq (%%edx, %1), %%mm6                        \n\t"
+               "movq (%%eax, %1, 4), %%mm6                     \n\t"
                "punpckhbw %%mm7, %%mm6                         \n\t" // H6
                "psubw %%mm6, %%mm3                             \n\t" // H5 - H6
 
@@ -967,7 +986,7 @@ src-=8;
                "psubw %%mm2, %%mm0                             \n\t" // 2L4 - 5L5 + 5L6
                "psubw %%mm3, %%mm1                             \n\t" // 2H4 - 5H5 + 5H6
 
-               "movq (%%edx, %1, 2), %%mm2                     \n\t"
+               "movq (%0, %1, 4), %%mm2                        \n\t"
                "movq %%mm2, %%mm3                              \n\t"
                "punpcklbw %%mm7, %%mm2                         \n\t" // L7
                "punpckhbw %%mm7, %%mm3                         \n\t" // H7
@@ -1090,16 +1109,16 @@ src-=8;
                "psubw %%mm6, %%mm4                             \n\t"
                "psubw %%mm7, %%mm5                             \n\t"
                "packsswb %%mm5, %%mm4                          \n\t"
-               "movq (%%eax, %1, 2), %%mm0                     \n\t"
+               "movq (%0), %%mm0                               \n\t"
                "paddb   %%mm4, %%mm0                           \n\t"
-               "movq %%mm0, (%%eax, %1, 2)                     \n\t"
-               "movq (%0, %1, 4), %%mm0                        \n\t"
+               "movq %%mm0, (%0)                               \n\t"
+               "movq (%0, %1), %%mm0                           \n\t"
                "psubb %%mm4, %%mm0                             \n\t"
-               "movq %%mm0, (%0, %1, 4)                        \n\t"
+               "movq %%mm0, (%0, %1)                           \n\t"
 
-               :
-               : "r" (src), "r" (stride), "m" (c->pQPb)
-               : "%eax", "%edx", "%ecx"
+               : "+r" (src)
+               : "r" (stride), "m" (c->pQPb)
+               : "%eax", "%ecx"
        );
 #else
        const int l1= stride;
@@ -2640,21 +2659,25 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
        int QPCorrecture= 256*256;
 
        int copyAhead;
+#ifdef HAVE_MMX
+       int i;
+#endif
 
        //FIXME remove
        uint64_t * const yHistogram= c.yHistogram;
        uint8_t * const tempSrc= c.tempSrc;
        uint8_t * const tempDst= c.tempDst;
-
-       c.dcOffset= c.ppMode.maxDcDiff;
-       c.dcThreshold= c.ppMode.maxDcDiff*2 + 1;
+       const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
 
 #ifdef HAVE_MMX
-       c.mmxDcOffset= 0x7F - c.dcOffset;
-       c.mmxDcThreshold= 0x7F - c.dcThreshold;
-
-       c.mmxDcOffset*= 0x0101010101010101LL;
-       c.mmxDcThreshold*= 0x0101010101010101LL;
+       for(i=0; i<32; i++){
+               int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
+               int threshold= offset*2 + 1;
+               c.mmxDcOffset[i]= 0x7F - offset;
+               c.mmxDcThreshold[i]= 0x7F - threshold;
+               c.mmxDcOffset[i]*= 0x0101010101010101LL;
+               c.mmxDcThreshold[i]*= 0x0101010101010101LL;
+       }
 #endif
 
        if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
@@ -2690,7 +2713,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
 //             printf("\n\n");
 
                /* we allways get a completly black picture first */
-               maxClipped= (uint64_t)(sum * maxClippedThreshold);
+               maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
 
                clipped= sum;
                for(black=255; black>0; black--)
@@ -2814,11 +2837,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
                uint8_t *tempBlock1= c.tempBlocks;
                uint8_t *tempBlock2= c.tempBlocks + 8;
 #endif
-#ifdef ARCH_X86
-               int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride];
-               int QPDelta= isColor ? (-1) : 1<<31;
-               int QPFrac= 1<<30;
-#endif
+               int8_t *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride];
+               int8_t *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth];
                int QP=0;
                /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
                   if not than use a temporary buffer */
@@ -2855,28 +2875,19 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
 #ifdef HAVE_MMX
                        uint8_t *tmpXchg;
 #endif
-#ifdef ARCH_X86
-                       QP= *QPptr;
-                       asm volatile(
-                               "addl %2, %1            \n\t"
-                               "sbbl %%eax, %%eax      \n\t"
-                               "shll $2, %%eax         \n\t"
-                               "subl %%eax, %0         \n\t"
-                               : "+r" (QPptr), "+m" (QPFrac)
-                               : "r" (QPDelta)
-                               : "%eax"
-                       );
-#else
-                       QP= isColor ?
-                                QPs[(y>>3)*QPStride + (x>>3)]:
-                                QPs[(y>>4)*QPStride + (x>>4)];
-#endif
-                       if(!isColor)
+                       if(isColor)
+                       {
+                               QP= QPptr[x>>3];
+                               c.nonBQP= nonBQPptr[x>>3];
+                       }
+                       else
                        {
+                               QP= QPptr[x>>4];
                                QP= (QP* QPCorrecture + 256*128)>>16;
+                               c.nonBQP= nonBQPptr[x>>4];
+                               c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
                                yHistogram[ srcBlock[srcStride*12 + 4] ]++;
                        }
-//printf("%d ", QP);
                        c.QP= QP;
 #ifdef HAVE_MMX
                        asm volatile(