swr: fix compilation failure with old versions of gcc.

[ffmpeg] / libswscale / swscale.c
diff --git a/libswscale/swscale.c b/libswscale/swscale.c

index a98a4bb90fbf99d4b216d8a7c2a24032adc55b69..38856d9c37a7e444543262113d7c062785725399 100644 (file)
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1,20 +1,20 @@
  /*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
   *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
   *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
@@ -27,6 +27,7 @@
  #include "swscale.h"
  #include "swscale_internal.h"
  #include "rgb2rgb.h"
+#include "libavutil/avassert.h"
  #include "libavutil/intreadwrite.h"
  #include "libavutil/cpu.h"
  #include "libavutil/avutil.h"
@@ -47,6 +48,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
  DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
  {  64, 64, 64, 64, 64, 64, 64, 64 };
  
+
  static av_always_inline void fillPlane(uint8_t* plane, int stride,
                                         int width, int height,
                                         int y, uint8_t val)
@@ -69,6 +71,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t
      int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
      int sh = bits - 4;
  
+    if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
+        sh= 9;
+
      for (i = 0; i < dstW; i++) {
          int j;
          int srcPos = filterPos[i];
@@ -90,6 +95,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t
      const uint16_t *src = (const uint16_t *) _src;
      int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
  
+    if(sh<15)
+        sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
      for (i = 0; i < dstW; i++) {
          int j;
          int srcPos = filterPos[i];
@@ -204,7 +212,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
      int i;
      int32_t *dst = (int32_t *) _dst;
      for (i = 0; i < width; i++)
-        dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
+        dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
  }
  
  static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
@@ -218,6 +226,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
          dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
          xpos+=xInc;
      }
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+        dst[i] = src[srcW-1]*128;
  }
  
  // *** horizontal scale Y line to temp buffer
@@ -228,12 +238,12 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
                                       uint8_t *formatConvBuffer,
                                       uint32_t *pal, int isAlpha)
  {
-    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+    void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
      void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
      const uint8_t *src = src_in[isAlpha ? 3 : 0];
  
      if (toYV12) {
-        toYV12(formatConvBuffer, src, srcW, pal);
+        toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
          src= formatConvBuffer;
      } else if (c->readLumPlanar && !isAlpha) {
          c->readLumPlanar(formatConvBuffer, src_in, srcW);
@@ -263,6 +273,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
          dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
          xpos+=xInc;
      }
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+        dst1[i] = src1[srcW-1]*128;
+        dst2[i] = src2[srcW-1]*128;
+    }
  }
  
  static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
@@ -273,12 +287,12 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
  {
      const uint8_t *src1 = src_in[1], *src2 = src_in[2];
      if (c->chrToYV12) {
-        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
-        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
+        c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
          src1= formatConvBuffer;
          src2= buf2;
      } else if (c->readChrPlanar) {
-        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
          c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
          src1= formatConvBuffer;
          src2= buf2;
@@ -316,8 +330,6 @@ static int swScale(SwsContext *c, const uint8_t* src[],
      int32_t *vChrFilterPos= c->vChrFilterPos;
      int32_t *hLumFilterPos= c->hLumFilterPos;
      int32_t *hChrFilterPos= c->hChrFilterPos;
-    int16_t *vLumFilter= c->vLumFilter;
-    int16_t *vChrFilter= c->vChrFilter;
      int16_t *hLumFilter= c->hLumFilter;
      int16_t *hChrFilter= c->hChrFilter;
      int32_t *lumMmxFilter= c->lumMmxFilter;
@@ -337,13 +349,14 @@ static int swScale(SwsContext *c, const uint8_t* src[],
      const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
      int lastDstY;
      uint32_t *pal=c->pal_yuv;
+    int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
+
      yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
      yuv2planarX_fn yuv2planeX = c->yuv2planeX;
      yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
      yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
      yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
      yuv2packedX_fn yuv2packedX = c->yuv2packedX;
-    int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
  
      /* vars which will change and which we need to store back in the context */
      int dstY= c->dstY;
@@ -373,7 +386,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
      DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
                     vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
  
-    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
+    if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
          static int warnedAlready=0; //FIXME move this into the context perhaps
          if (flags & SWS_PRINT_INFO && !warnedAlready) {
              av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
@@ -382,6 +395,18 @@ static int swScale(SwsContext *c, const uint8_t* src[],
          }
      }
  
+    if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
+        || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
+        || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
+    ) {
+        static int warnedAlready=0;
+        int cpu_flags = av_get_cpu_flags();
+        if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
+            av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
+            warnedAlready=1;
+        }
+    }
+
      /* Note the user might start scaling the picture in the middle so this
         will not get executed. This is not really intended but works
         currently, so people might do it. */
@@ -406,6 +431,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
              dst[2] + dstStride[2] * chrDstY,
              (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
          };
+        int use_mmx_vfilter= c->use_mmx_vfilter;
  
          const int firstLumSrcY= FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); //First line needed as input
          const int firstLumSrcY2= FFMAX(1 - vLumFilterSize, vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]);
@@ -503,85 +529,66 @@ static int swScale(SwsContext *c, const uint8_t* src[],
              // hmm looks like we can't use MMX here without overwriting this array's tail
              ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX,  &yuv2nv12cX,
                                       &yuv2packed1, &yuv2packed2, &yuv2packedX);
+            use_mmx_vfilter= 0;
          }
  
          {
-            const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
-            const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-
-            if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
-                const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
-                int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
-                for (i = 0; i < neg;            i++)
-                    tmpY[i] = lumSrcPtr[neg];
-                for (     ; i < end;            i++)
-                    tmpY[i] = lumSrcPtr[i];
-                for (     ; i < vLumFilterSize; i++)
-                    tmpY[i] = tmpY[i-1];
-                lumSrcPtr = tmpY;
-
-                if (alpSrcPtr) {
-                    const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
-                    for (i = 0; i < neg;            i++)
-                        tmpA[i] = alpSrcPtr[neg];
-                    for (     ; i < end;            i++)
-                        tmpA[i] = alpSrcPtr[i];
-                    for (     ; i < vLumFilterSize; i++)
-                        tmpA[i] = tmpA[i - 1];
-                    alpSrcPtr = tmpA;
-                }
-            }
-            if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
-                const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
-                              **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
-                int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
-                for (i = 0; i < neg;            i++) {
-                    tmpU[i] = chrUSrcPtr[neg];
-                    tmpV[i] = chrVSrcPtr[neg];
-                }
-                for (     ; i < end;            i++) {
-                    tmpU[i] = chrUSrcPtr[i];
-                    tmpV[i] = chrVSrcPtr[i];
-                }
-                for (     ; i < vChrFilterSize; i++) {
-                    tmpU[i] = tmpU[i - 1];
-                    tmpV[i] = tmpV[i - 1];
-                }
-                chrUSrcPtr = tmpU;
-                chrVSrcPtr = tmpV;
-            }
+            const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+            const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+            const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+            int16_t *vLumFilter= c->vLumFilter;
+            int16_t *vChrFilter= c->vChrFilter;
  
              if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like
                  const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
  
+                vLumFilter +=    dstY * vLumFilterSize;
+                vChrFilter += chrDstY * vChrFilterSize;
+
+//                 av_assert0(use_mmx_vfilter != (
+//                                yuv2planeX == yuv2planeX_10BE_c
+//                             || yuv2planeX == yuv2planeX_10LE_c
+//                             || yuv2planeX == yuv2planeX_9BE_c
+//                             || yuv2planeX == yuv2planeX_9LE_c
+//                             || yuv2planeX == yuv2planeX_16BE_c
+//                             || yuv2planeX == yuv2planeX_16LE_c
+//                             || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
+
+                if(use_mmx_vfilter){
+                    vLumFilter= c->lumMmxFilter;
+                    vChrFilter= c->chrMmxFilter;
+                }
+
                  if (vLumFilterSize == 1) {
                      yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
                  } else {
-                    yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
+                    yuv2planeX(vLumFilter, vLumFilterSize,
                                 lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
                  }
  
                  if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
                      if (yuv2nv12cX) {
-                        yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
+                        yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
                      } else if (vChrFilterSize == 1) {
                          yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
                          yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
                      } else {
-                        yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
+                        yuv2planeX(vChrFilter, vChrFilterSize,
                                     chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
-                        yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
-                                   chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
+                        yuv2planeX(vChrFilter, vChrFilterSize,
+                                   chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
                      }
                  }
  
                  if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
+                    if(use_mmx_vfilter){
+                        vLumFilter= c->alpMmxFilter;
+                    }
                      if (vLumFilterSize == 1) {
                          yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
                      } else {
-                        yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
+                        yuv2planeX(vLumFilter, vLumFilterSize,
                                     alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
                      }
                  }
@@ -643,6 +650,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
  
      ff_sws_init_input_funcs(c);
  
+
      if (c->srcBpc == 8) {
          if (c->dstBpc <= 10) {
              c->hyScale = c->hcScale = hScale8To15_c;