swscale: make filterPos 32bit.

author Ronald S. Bultje <rsbultje@gmail.com>

Mon, 5 Mar 2012 20:26:42 +0000 (12:26 -0800)

committer Ronald S. Bultje <rsbultje@gmail.com>

Tue, 6 Mar 2012 18:47:41 +0000 (10:47 -0800)
author Ronald S. Bultje <rsbultje@gmail.com>
Mon, 5 Mar 2012 20:26:42 +0000 (12:26 -0800)
committer Ronald S. Bultje <rsbultje@gmail.com>
Tue, 6 Mar 2012 18:47:41 +0000 (10:47 -0800)
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c

index 3041053096257312880146920f3b39ae49c1a2df..5537707bd0cb79b0c60439400c1b1ad856a4f139 100644 (file)
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -144,7 +144,7 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize,
  
  static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
                                  const uint8_t *src, const int16_t *filter,
-                                const int16_t *filterPos, int filterSize)
+                                const int32_t *filterPos, int filterSize)
  {
      register int i;
      DECLARE_ALIGNED(16, int, tempo)[4];
diff --git a/libswscale/swscale.c b/libswscale/swscale.c

index bd909bf16302e951ca97ad4f2bdb97da7e430559..a98a4bb90fbf99d4b216d8a7c2a24032adc55b69 100644 (file)
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -61,7 +61,7 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride,
  
  static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
                             const int16_t *filter,
-                           const int16_t *filterPos, int filterSize)
+                           const int32_t *filterPos, int filterSize)
  {
      int i;
      int32_t *dst = (int32_t *) _dst;
@@ -84,7 +84,7 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t
  
  static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
                             const int16_t *filter,
-                           const int16_t *filterPos, int filterSize)
+                           const int32_t *filterPos, int filterSize)
  {
      int i;
      const uint16_t *src = (const uint16_t *) _src;
@@ -105,7 +105,7 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t
  
  // bilinear / bicubic scaling
  static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
-                          const int16_t *filter, const int16_t *filterPos,
+                          const int16_t *filter, const int32_t *filterPos,
                            int filterSize)
  {
      int i;
@@ -123,7 +123,7 @@ static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *
  }
  
  static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
-                          const int16_t *filter, const int16_t *filterPos,
+                          const int16_t *filter, const int32_t *filterPos,
                            int filterSize)
  {
      int i;
@@ -224,7 +224,7 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
  static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
                                       const uint8_t *src_in[4], int srcW, int xInc,
                                       const int16_t *hLumFilter,
-                                     const int16_t *hLumFilterPos, int hLumFilterSize,
+                                     const int32_t *hLumFilterPos, int hLumFilterSize,
                                       uint8_t *formatConvBuffer,
                                       uint32_t *pal, int isAlpha)
  {
@@ -268,7 +268,7 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
  static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
                                       const uint8_t *src_in[4],
                                       int srcW, int xInc, const int16_t *hChrFilter,
-                                     const int16_t *hChrFilterPos, int hChrFilterSize,
+                                     const int32_t *hChrFilterPos, int hChrFilterSize,
                                       uint8_t *formatConvBuffer, uint32_t *pal)
  {
      const uint8_t *src1 = src_in[1], *src2 = src_in[2];
@@ -312,10 +312,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
      const int chrXInc= c->chrXInc;
      const enum PixelFormat dstFormat= c->dstFormat;
      const int flags= c->flags;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
-    int16_t *hLumFilterPos= c->hLumFilterPos;
-    int16_t *hChrFilterPos= c->hChrFilterPos;
+    int32_t *vLumFilterPos= c->vLumFilterPos;
+    int32_t *vChrFilterPos= c->vChrFilterPos;
+    int32_t *hLumFilterPos= c->hLumFilterPos;
+    int32_t *hChrFilterPos= c->hChrFilterPos;
      int16_t *vLumFilter= c->vLumFilter;
      int16_t *vChrFilter= c->vChrFilter;
      int16_t *hLumFilter= c->hLumFilter;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h

index bc36826ea23b82f486b62a0ac8e4f63a1df17e6e..29fe4ff2c6b21ccff6c87e3bb9d676d0d1df9c14 100644 (file)
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -295,10 +295,10 @@ typedef struct SwsContext {
      int16_t *hChrFilter;          ///< Array of horizontal filter coefficients for chroma     planes.
      int16_t *vLumFilter;          ///< Array of vertical   filter coefficients for luma/alpha planes.
      int16_t *vChrFilter;          ///< Array of vertical   filter coefficients for chroma     planes.
-    int16_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
-    int16_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
-    int16_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
-    int16_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
+    int32_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
+    int32_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
+    int32_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
+    int32_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
      int hLumFilterSize;           ///< Horizontal filter size for luma/alpha pixels.
      int hChrFilterSize;           ///< Horizontal filter size for chroma     pixels.
      int vLumFilterSize;           ///< Vertical   filter size for luma/alpha pixels.
@@ -508,10 +508,10 @@ typedef struct SwsContext {
      /** @{ */
      void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
                      const uint8_t *src, const int16_t *filter,
-                    const int16_t *filterPos, int filterSize);
+                    const int32_t *filterPos, int filterSize);
      void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
                      const uint8_t *src, const int16_t *filter,
-                    const int16_t *filterPos, int filterSize);
+                    const int32_t *filterPos, int filterSize);
      /** @} */
  
      /// Color range conversion function for luma plane if needed.
diff --git a/libswscale/utils.c b/libswscale/utils.c

index 7c89a70953a829ccd3450a7c402ec93ada721781..51dd33111707adc926d59400b5282a2229c47dc8 100644 (file)
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -180,7 +180,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
                                           dist-1.0);
  }
  
-static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
+static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc,
                        int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags,
                        SwsVector *srcFilter, SwsVector *dstFilter, double param[2], int is_horizontal)
  {
@@ -196,7 +196,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
      emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions)
  
      // NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end
-    FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(int16_t), fail);
+    FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail);
  
      if (FFABS(xInc - 0x10000) <10) { // unscaled
          int i;
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm

index 0d367f7a14e6085172d133c8d4ee1bc63c91ffdf..2387d0266b31173ea26ffdaa96f87887b28c5512 100644 (file)
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -38,7 +38,7 @@ SECTION .text
  ;                               (SwsContext *c, int{16,32}_t *dst,
  ;                                int dstW, const uint{8,16}_t *src,
  ;                                const int16_t *filter,
-;                                const int16_t *filterPos, int filterSize);
+;                                const int32_t *filterPos, int filterSize);
  ;
  ; Scale one horizontal line. Input is either 8-bits width or 16-bits width
  ; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to
@@ -53,6 +53,9 @@ SECTION .text
  cglobal hscale%1to%2_%4_%5, %6, 7, %7
  %if ARCH_X86_64
      movsxd        r2, r2d
+%define mov32 movsxd
+%else ; x86-32
+%define mov32 mov
  %endif ; x86-64
  %if %2 == 19
  %if mmsize == 8 ; mmx
@@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
  %else ; %2 == 19
      lea           r1, [r1+r2*(4>>r2shr)]
  %endif ; %2 == 15/19
-    lea           r5, [r5+r2*(2>>r2shr)]
+    lea           r5, [r5+r2*(4>>r2shr)]
      neg           r2
  
  .loop:
  %if %3 == 4 ; filterSize == 4 scaling
      ; load 2x4 or 4x4 source pixels into m0/m1
-    movsx         r0, word [r5+r2*2+0]   ; filterPos[0]
-    movsx         r6, word [r5+r2*2+2]   ; filterPos[1]
+    mov32         r0, dword [r5+r2*4+0]  ; filterPos[0]
+    mov32         r6, dword [r5+r2*4+4]  ; filterPos[1]
      movlh         m0, [r3+r0*srcmul]     ; src[filterPos[0] + {0,1,2,3}]
  %if mmsize == 8
      movlh         m1, [r3+r6*srcmul]     ; src[filterPos[1] + {0,1,2,3}]
@@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
  %else ; %1 == 8
      movd          m4, [r3+r6*srcmul]     ; src[filterPos[1] + {0,1,2,3}]
  %endif
-    movsx         r0, word [r5+r2*2+4]   ; filterPos[2]
-    movsx         r6, word [r5+r2*2+6]   ; filterPos[3]
+    mov32         r0, dword [r5+r2*4+8]  ; filterPos[2]
+    mov32         r6, dword [r5+r2*4+12] ; filterPos[3]
      movlh         m1, [r3+r0*srcmul]     ; src[filterPos[2] + {0,1,2,3}]
  %if %1 > 8
      movhps        m1, [r3+r6*srcmul]     ; src[filterPos[3] + {0,1,2,3}]
@@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
  %endif ; mmx/sse2/ssse3/sse4
  %else ; %3 == 8, i.e. filterSize == 8 scaling
      ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5
-    movsx         r0, word [r5+r2*1+0]   ; filterPos[0]
-    movsx         r6, word [r5+r2*1+2]   ; filterPos[1]
+    mov32         r0, dword [r5+r2*2+0]  ; filterPos[0]
+    mov32         r6, dword [r5+r2*2+4]  ; filterPos[1]
      movbh         m0, [r3+ r0   *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}]
  %if mmsize == 8
      movbh         m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}]
@@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
      movbh         m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}]
  %else ; mmsize == 16
      movbh         m1, [r3+ r6   *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}]
-    movsx         r0, word [r5+r2*1+4]   ; filterPos[2]
-    movsx         r6, word [r5+r2*1+6]   ; filterPos[3]
+    mov32         r0, dword [r5+r2*2+8]  ; filterPos[2]
+    mov32         r6, dword [r5+r2*2+12] ; filterPos[3]
      movbh         m4, [r3+ r0   *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}]
      movbh         m5, [r3+ r6   *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}]
  %endif ; mmsize == 8/16
@@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
  %define r1x     r1
  %define filter2 r6m
  %endif ; x86-32/64
-    lea           r5, [r5+r2*2]
+    lea           r5, [r5+r2*4]
  %if %2 == 15
      lea           r1, [r1+r2*2]
  %else ; %2 == 19
@@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
      neg           r2
  
  .loop:
-    movsx         r0, word [r5+r2*2+0]   ; filterPos[0]
-    movsx        r1x, word [r5+r2*2+2]   ; filterPos[1]
+    mov32         r0, dword [r5+r2*4+0]  ; filterPos[0]
+    mov32        r1x, dword [r5+r2*4+4]  ; filterPos[1]
      ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)?
      pxor          m4, m4
      pxor          m5, m5
@@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
      jl .innerloop
  
  %ifidn %4, X4
-    movsx        r1x, word [r5+r2*2+2]   ; filterPos[1]
+    mov32        r1x, dword [r5+r2*4+4]  ; filterPos[1]
      movlh         m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0]
      sub          r1x, r6                   ; and first 4 srcpx of dstpx[1]
  %if %1 > 8
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c

index 64d5f0fc9d5de19b06cae001f9fe675064646f91..99b32623cbdfa8d4dba433d5919b3456b9011c57 100644 (file)
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -93,8 +93,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
      int16_t **alpPixBuf= c->alpPixBuf;
      const int vLumBufSize= c->vLumBufSize;
      const int vChrBufSize= c->vChrBufSize;
-    int16_t *vLumFilterPos= c->vLumFilterPos;
-    int16_t *vChrFilterPos= c->vChrFilterPos;
+    int32_t *vLumFilterPos= c->vLumFilterPos;
+    int32_t *vChrFilterPos= c->vChrFilterPos;
      int16_t *vLumFilter= c->vLumFilter;
      int16_t *vChrFilter= c->vChrFilter;
      int32_t *lumMmxFilter= c->lumMmxFilter;
@@ -204,7 +204,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt(
                                                  SwsContext *c, int16_t *data, \
                                                  int dstW, const uint8_t *src, \
                                                  const int16_t *filter, \
-                                                const int16_t *filterPos, int filterSize)
+                                                const int32_t *filterPos, int filterSize)
  
  #define SCALE_FUNCS(filter_n, opt) \
      SCALE_FUNC(filter_n,  8, 15, opt); \
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c

index 4db3fb3086eff87368c0214caa4b54b1ac61e35f..ad2b32f27b734a3d26bef1b8e3e4cfc7087e94e5 100644 (file)
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1376,7 +1376,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
                                   int dstWidth, const uint8_t *src,
                                   int srcW, int xInc)
  {
-    int16_t *filterPos = c->hLumFilterPos;
+    int32_t *filterPos = c->hLumFilterPos;
      int16_t *filter    = c->hLumFilter;
      void    *mmx2FilterCode= c->lumMmx2FilterCode;
      int i;
@@ -1472,7 +1472,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
                                   int dstWidth, const uint8_t *src1,
                                   const uint8_t *src2, int srcW, int xInc)
  {
-    int16_t *filterPos = c->hChrFilterPos;
+    int32_t *filterPos = c->hChrFilterPos;
      int16_t *filter    = c->hChrFilter;
      void    *mmx2FilterCode= c->chrMmx2FilterCode;
      int i;
author	Ronald S. Bultje <rsbultje@gmail.com>
	Mon, 5 Mar 2012 20:26:42 +0000 (12:26 -0800)
committer	Ronald S. Bultje <rsbultje@gmail.com>
	Tue, 6 Mar 2012 18:47:41 +0000 (10:47 -0800)
libswscale/ppc/swscale_altivec.c		patch \| blob \| history
libswscale/swscale.c		patch \| blob \| history
libswscale/swscale_internal.h		patch \| blob \| history
libswscale/utils.c		patch \| blob \| history
libswscale/x86/scale.asm		patch \| blob \| history
libswscale/x86/swscale_mmx.c		patch \| blob \| history
libswscale/x86/swscale_template.c		patch \| blob \| history