copy current macroblock to a smaller buffer, to improve cache coherency and reduce...

author Loren Merritt <pengvado@videolan.org>

Mon, 27 Feb 2006 07:31:36 +0000 (07:31 +0000)

committer Loren Merritt <pengvado@videolan.org>

Mon, 27 Feb 2006 07:31:36 +0000 (07:31 +0000)
author Loren Merritt <pengvado@videolan.org>
Mon, 27 Feb 2006 07:31:36 +0000 (07:31 +0000)
committer Loren Merritt <pengvado@videolan.org>
Mon, 27 Feb 2006 07:31:36 +0000 (07:31 +0000)
diff --git a/common/amd64/amd64inc.asm b/common/amd64/amd64inc.asm

index ab99dea8a08737df5caa89cfa695497c7b350a53..a047e5e1d71be45cf7adbcc1b64ccc020e9f71a5 100644 (file)
--- a/common/amd64/amd64inc.asm
+++ b/common/amd64/amd64inc.asm
@@ -272,3 +272,4 @@ SECTION .text
      %define GLOBAL
  %endif
  
+%assign FDEC_STRIDE 32
diff --git a/common/amd64/predict-a.asm b/common/amd64/predict-a.asm

index 0bb7fb0701ef0588cd6b04a131b015ee1bab5081..5bd08124ece9398cd40f28e176d4ef3b0442b246 100644 (file)
--- a/common/amd64/predict-a.asm
+++ b/common/amd64/predict-a.asm
@@ -28,14 +28,33 @@ BITS 64
  
  %include "amd64inc.asm"
  
-%macro SAVE_0_1 1
-    movq        [%1]         , mm0
-    movq        [%1 + 8]     , mm1
+%macro STORE8x8 2
+    movq        [parm1q + 1*FDEC_STRIDE], %1
+    movq        [parm1q + 2*FDEC_STRIDE], %1
+    movq        [parm1q + 3*FDEC_STRIDE], %1
+    movq        [parm1q + 4*FDEC_STRIDE], %1
+    movq        [parm1q + 5*FDEC_STRIDE], %2
+    movq        [parm1q + 6*FDEC_STRIDE], %2
+    movq        [parm1q + 7*FDEC_STRIDE], %2
+    movq        [parm1q + 8*FDEC_STRIDE], %2
  %endmacro
  
-%macro SAVE_0_0 1
-    movq        [%1]         , mm0
-    movq        [%1 + 8]     , mm0
+%macro STORE16x16 2
+    mov         eax, 4
+ALIGN 4
+.loop:
+    movq        [parm1q + 1*FDEC_STRIDE], %1
+    movq        [parm1q + 2*FDEC_STRIDE], %1
+    movq        [parm1q + 3*FDEC_STRIDE], %1
+    movq        [parm1q + 4*FDEC_STRIDE], %1
+    movq        [parm1q + 1*FDEC_STRIDE + 8], %2
+    movq        [parm1q + 2*FDEC_STRIDE + 8], %2
+    movq        [parm1q + 3*FDEC_STRIDE + 8], %2
+    movq        [parm1q + 4*FDEC_STRIDE + 8], %2
+    dec         eax
+    lea         parm1q, [parm1q + 4*FDEC_STRIDE]
+    jnz         .loop
+    nop
  %endmacro
  
  
@@ -80,20 +99,20 @@ cglobal predict_16x16_dc_top_mmxext
  %endmacro
  
  %macro PRED8x8_LOAD_TOP 0
-    sub         parm1q, parm2q
+    sub         parm1q, FDEC_STRIDE
  
-    and         parm3d, 12
+    and         parm2d, 12
      movq        mm1, [parm1q-1]
      movq        mm2, [parm1q+1]
  
-    cmp         parm3d, byte 8
+    cmp         parm2d, byte 8
      jge         .have_topleft
      mov         al,  [parm1q]
      mov         ah,  [parm1q]
      pinsrw      mm1, eax, 0
  .have_topleft:
  
-    and         parm3d, byte 4
+    and         parm2d, byte 4
      jne         .have_topright
      mov         al,  [parm1q+7]
      mov         ah,  [parm1q+7]
@@ -105,38 +124,27 @@ cglobal predict_16x16_dc_top_mmxext
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8_v_mmxext( uint8_t *src, int i_stride, int i_neighbors )
+; void predict_8x8_v_mmxext( uint8_t *src, int i_neighbors )
  ;
  ;-----------------------------------------------------------------------------
  
  ALIGN 16
  predict_8x8_v_mmxext:
      PRED8x8_LOAD_TOP
-
-    lea         rax, [parm2q + 2*parm2q]
-    movq        [parm1q +   parm2q], mm0      ; 0
-    movq        [parm1q + 2*parm2q], mm0      ; 1
-    movq        [parm1q + 4*parm2q], mm0      ; 3
-    movq        [parm1q + 8*parm2q], mm0      ; 7
-    add         parm1q, rax
-    movq        [parm1q], mm0                 ; 2
-    movq        [parm1q + 2*parm2q], mm0      ; 4
-    movq        [parm1q +   rax   ], mm0      ; 5
-    movq        [parm1q + 4*parm2q], mm0      ; 6
-
+    STORE8x8    mm0, mm0
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, uint8_t *pix_left );
+; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_neighbors, uint8_t *pix_left );
  ;
  ;-----------------------------------------------------------------------------
  
  ALIGN 16
  predict_8x8_dc_core_mmxext:
-    movq        mm1, [parm4q-1]
-    movq        mm2, [parm4q+1]
-    PRED8x8_LOWPASS mm4, [parm4q]
+    movq        mm1, [parm3q-1]
+    movq        mm2, [parm3q+1]
+    PRED8x8_LOWPASS mm4, [parm3q]
  
      PRED8x8_LOAD_TOP
  
@@ -149,53 +157,31 @@ predict_8x8_dc_core_mmxext:
      pshufw      mm0, mm0, 0
      packuswb    mm0, mm0
  
-    lea         rax, [parm2q + 2*parm2q]
-    movq        [parm1q +   parm2q], mm0      ; 0
-    movq        [parm1q + 2*parm2q], mm0      ; 1
-    movq        [parm1q + 4*parm2q], mm0      ; 3
-    movq        [parm1q + 8*parm2q], mm0      ; 7
-    add         parm1q, rax
-    movq        [parm1q], mm0                 ; 2
-    movq        [parm1q + 2*parm2q], mm0      ; 4
-    movq        [parm1q +   rax   ], mm0      ; 5
-    movq        [parm1q + 4*parm2q], mm0      ; 6
-
+    STORE8x8    mm0, mm0
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
+; void predict_8x8c_v_mmx( uint8_t *src )
  ;
  ;-----------------------------------------------------------------------------
  
  ALIGN 16
  predict_8x8c_v_mmx :
-    sub         parm1q, parm2q  ; esi <-- line -1
-
-    movq        mm0,                   [parm1q]
-    movq        [parm1q + parm2q],     mm0          ; 0
-    movq        [parm1q + 2 * parm2q], mm0          ; 1
-    movq        [parm1q + 4 * parm2q], mm0          ; 3
-    movq        [parm1q + 8 * parm2q], mm0          ; 7
-    add         parm1q,                parm2q       ; <-- line 0
-    movq        [parm1q + 2 * parm2q], mm0          ; 2
-    movq        [parm1q + 4 * parm2q], mm0          ; 4
-    lea         parm1q,                [parm1q + 4 * parm2q] ; <-- line 4
-    movq        [parm1q + parm2q],     mm0          ; 5
-    movq        [parm1q + 2 * parm2q], mm0          ; 6
-
+    sub         parm1q, FDEC_STRIDE
+    movq        mm0, [parm1q]
+    STORE8x8    mm0, mm0
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8c_dc_core_mmxext( uint8_t *src, int i_stride, int s2, int s3 )
+; void predict_8x8c_dc_core_mmxext( uint8_t *src, int s2, int s3 )
  ;
  ;-----------------------------------------------------------------------------
  
  ALIGN 16
  predict_8x8c_dc_core_mmxext:
-    sub         parm1q, parm2q
-    lea         rax, [parm2q + 2*parm2q]
+    sub         parm1q, FDEC_STRIDE
  
      movq        mm0, [parm1q]
      pxor        mm1, mm1
@@ -205,8 +191,8 @@ predict_8x8c_dc_core_mmxext:
      psadbw      mm1, mm2        ; s1
      psadbw      mm0, mm2        ; s0
  
-    movd        mm4, parm3d
-    movd        mm5, parm4d
+    movd        mm4, parm2d
+    movd        mm5, parm3d
      paddw       mm0, mm4
      pshufw      mm2, mm5, 0
      psrlw       mm0, 3
@@ -222,29 +208,20 @@ predict_8x8c_dc_core_mmxext:
      packuswb    mm0, mm1        ; dc0,dc1 (b)
      packuswb    mm2, mm3        ; dc2,dc3 (b)
  
-    movq        [parm1q +   parm2q], mm0 ; 0
-    movq        [parm1q + 2*parm2q], mm0 ; 1
-    movq        [parm1q +   rax   ], mm0 ; 2
-    movq        [parm1q + 4*parm2q], mm0 ; 3
-    lea         parm1q, [parm1q + 4*parm2q]
-    movq        [parm1q +   parm2q], mm2 ; 4
-    movq        [parm1q + 2*parm2q], mm2 ; 5
-    movq        [parm1q +   rax   ], mm2 ; 6
-    movq        [parm1q + 4*parm2q], mm2 ; 7
-
+    STORE8x8    mm0, mm2
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8c_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c )
+; void predict_8x8c_p_core_mmx( uint8_t *src, int i00, int b, int c )
  ;
  ;-----------------------------------------------------------------------------
  
  ALIGN 16
  predict_8x8c_p_core_mmx:
-    movd        mm0, parm3d
-    movd        mm2, parm4d
-    movd        mm4, parm5d
+    movd        mm0, parm2d
+    movd        mm2, parm3d
+    movd        mm4, parm4d
      pshufw      mm0, mm0, 0
      pshufw      mm2, mm2, 0
      pshufw      mm4, mm4, 0
@@ -269,7 +246,7 @@ ALIGN 4
  
      paddsw      mm0, mm4
      paddsw      mm1, mm4
-    add         parm1q, parm2q
+    add         parm1q, FDEC_STRIDE
      dec         eax
      jg          .loop
  
@@ -278,15 +255,15 @@ ALIGN 4
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_16x16_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c )
+; void predict_16x16_p_core_mmx( uint8_t *src, int i00, int b, int c )
  ;
  ;-----------------------------------------------------------------------------
  
  ALIGN 16
  predict_16x16_p_core_mmx:
-    movd        mm0, parm3d
-    movd        mm2, parm4d
-    movd        mm4, parm5d
+    movd        mm0, parm2d
+    movd        mm2, parm3d
+    movd        mm4, parm4d
      pshufw      mm0, mm0, 0
      pshufw      mm2, mm2, 0
      pshufw      mm4, mm4, 0
@@ -327,7 +304,7 @@ ALIGN 4
      paddsw      mm1, mm4
      paddsw      mm2, mm4
      paddsw      mm3, mm4
-    add         parm1q, parm2q
+    add         parm1q, FDEC_STRIDE
      dec         eax
      jg          .loop
  
@@ -336,48 +313,26 @@ ALIGN 4
      
  ;-----------------------------------------------------------------------------
  ;
-; void predict_16x16_v_mmx( uint8_t *src, int i_stride )
+; void predict_16x16_v_mmx( uint8_t *src )
  ;
  ;-----------------------------------------------------------------------------
  
  ALIGN 16
  predict_16x16_v_mmx :
-    sub         parm1q, parm2q                 ; line -1
-
+    sub         parm1q, FDEC_STRIDE
      movq        mm0, [parm1q]
      movq        mm1, [parm1q + 8]
-    lea         rax, [parm2q + 2 * parm2q]     ; 3 * stride
-
-    SAVE_0_1    (parm1q + parm2q)              ; 0
-    SAVE_0_1    (parm1q + 2 * parm2q)          ; 1
-    SAVE_0_1    (parm1q + rax)                 ; 2
-    SAVE_0_1    (parm1q + 4 * parm2q)          ; 3
-    SAVE_0_1    (parm1q + 2 * rax)             ; 5
-    SAVE_0_1    (parm1q + 8 * parm2q)          ; 7
-    SAVE_0_1    (parm1q + 4 * rax)             ; 11
-    add         parm1q, parm2q                 ; <-- line 0
-    SAVE_0_1    (parm1q + 4 * parm2q)          ; 4
-    SAVE_0_1    (parm1q + 2 * rax)             ; 6
-    SAVE_0_1    (parm1q + 8 * parm2q)          ; 8
-    SAVE_0_1    (parm1q + 4 * rax)             ; 12
-    lea         parm1q, [parm1q + 8 * parm2q]  ; <-- line 8
-    SAVE_0_1    (parm1q + parm2q)              ; 9
-    SAVE_0_1    (parm1q + 2 * parm2q)          ; 10
-    lea         parm1q, [parm1q + 4 * parm2q]  ; <-- line 12
-    SAVE_0_1    (parm1q + parm2q)              ; 13
-    SAVE_0_1    (parm1q + 2 * parm2q)          ; 14
-    SAVE_0_1    (parm1q + rax)                 ; 15
-
+    STORE16x16  mm0, mm1
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_stride, int i_dc_left )
+; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_dc_left )
  ;
  ;-----------------------------------------------------------------------------
  
  %macro PRED16x16_DC 2
-    sub         parm1q, parm2q                ; parm1q <-- line -1
+    sub         parm1q, FDEC_STRIDE
  
      pxor        mm0, mm0
      pxor        mm1, mm1
@@ -387,25 +342,14 @@ predict_16x16_v_mmx :
      paddusw     mm0, %1
      psrlw       mm0, %2                       ; dc
      pshufw      mm0, mm0, 0
-    lea         r8,  [parm2q + 2*parm2q]      ; eax <-- 3* stride
      packuswb    mm0, mm0                      ; dc in bytes
  
-    mov         eax, 4
-ALIGN 4
-.loop:
-    SAVE_0_0    (parm1q +     parm2q)         ; 0
-    SAVE_0_0    (parm1q + 2 * parm2q)         ; 1
-    SAVE_0_0    (parm1q +     r8    )         ; 2
-    SAVE_0_0    (parm1q + 4 * parm2q)         ; 3
-    dec         eax
-    lea         parm1q, [parm1q + 4 * parm2q]
-    jg          .loop
-    nop
+    STORE16x16  mm0, mm0
  %endmacro
  
  ALIGN 16
  predict_16x16_dc_core_mmxext:
-    movd         mm2, parm3d
+    movd         mm2, parm2d
      PRED16x16_DC mm2, 5
      ret
  
diff --git a/common/i386/i386inc.asm b/common/i386/i386inc.asm

index 4e37a25ae9ead9b4dd7cb617e6489361768bc715..e52fbcfe0e2fceea59644354aad59e061216648a 100644 (file)
--- a/common/i386/i386inc.asm
+++ b/common/i386/i386inc.asm
@@ -110,3 +110,4 @@ BITS 32
      %define picesp esp
  %endif
  
+%assign FDEC_STRIDE 32
diff --git a/common/i386/predict-a.asm b/common/i386/predict-a.asm

index beee70bbaa2a97dab01044a80cc4686a611925ed..5cb4b15bfdddcc0dd8d4b060bcf787c337fa93c6 100644 (file)
--- a/common/i386/predict-a.asm
+++ b/common/i386/predict-a.asm
@@ -28,6 +28,19 @@ BITS 32
  
  %include "i386inc.asm"
  
+; this is faster than a constant [edx + Y*FDEC_STRIDE]
+%macro STORE8x8 2
+    movq        [edx +   ecx], %1       ; 0
+    movq        [edx + 2*ecx], %1       ; 1
+    movq        [edx + 4*ecx], %1       ; 3
+    movq        [edx + 8*ecx], %2       ; 7
+    add         edx, eax
+    movq        [edx        ], %1       ; 2
+    movq        [edx + 2*ecx], %2       ; 4
+    movq        [edx +   eax], %2       ; 5
+    movq        [edx + 4*ecx], %2       ; 6
+%endmacro
+
  %macro SAVE_0_1 1
      movq        [%1]         , mm0
      movq        [%1 + 8]     , mm1
@@ -79,8 +92,8 @@ cglobal predict_16x16_dc_top_mmxext
  
  %macro PRED8x8_LOAD_TOP 0
      mov         edx, [picesp + 4]
-    mov         ecx, [picesp + 8]
-    mov         eax, [picesp +12]
+    mov         ecx, FDEC_STRIDE
+    mov         eax, [picesp + 8]
      sub         edx, ecx
  
      and         eax, 12
@@ -92,7 +105,7 @@ cglobal predict_16x16_dc_top_mmxext
      mov         al,  [edx]
      mov         ah,  [edx]
      pinsrw      mm1, eax, 0
-    mov         eax, [picesp + 12]
+    mov         eax, [picesp + 8]
  .have_topleft:
  
      and         eax, byte 4
@@ -107,7 +120,7 @@ cglobal predict_16x16_dc_top_mmxext
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8_v_mmxext( uint8_t *src, int i_stride, int i_neighbors )
+; void predict_8x8_v_mmxext( uint8_t *src, int i_neighbors )
  ;
  ;-----------------------------------------------------------------------------
  
@@ -118,22 +131,14 @@ predict_8x8_v_mmxext:
  
      PRED8x8_LOAD_TOP
      lea         eax, [ecx + 2*ecx]
-    movq        [edx + ecx], mm0        ; 0
-    movq        [edx + 2*ecx], mm0      ; 1
-    movq        [edx + 4*ecx], mm0      ; 3
-    movq        [edx + 8*ecx], mm0      ; 7
-    add         edx, eax
-    movq        [edx], mm0              ; 2
-    movq        [edx + 2*ecx], mm0      ; 4
-    movq        [edx + eax], mm0        ; 5
-    movq        [edx + 4*ecx], mm0      ; 6
+    STORE8x8    mm0, mm0
  
      picpop      ebx
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, uint8_t *pix_left );
+; void predict_8x8_dc_core_mmxext( uint8_t *src, int i_neighbors, uint8_t *pix_left );
  ;
  ;-----------------------------------------------------------------------------
  
@@ -142,7 +147,7 @@ predict_8x8_dc_core_mmxext:
      picpush     ebx
      picgetgot   ebx
  
-    mov         eax, [picesp + 16]
+    mov         eax, [picesp + 12]
      movq        mm1, [eax-1]
      movq        mm2, [eax+1]
      PRED8x8_LOWPASS mm4, [eax]
@@ -159,49 +164,30 @@ predict_8x8_dc_core_mmxext:
      packuswb    mm0, mm0
  
      lea         eax, [ecx + 2*ecx]
-    movq        [edx + ecx], mm0        ; 0
-    movq        [edx + 2*ecx], mm0      ; 1
-    movq        [edx + 4*ecx], mm0      ; 3
-    movq        [edx + 8*ecx], mm0      ; 7
-    add         edx, eax
-    movq        [edx], mm0              ; 2
-    movq        [edx + 2*ecx], mm0      ; 4
-    movq        [edx + eax], mm0        ; 5
-    movq        [edx + 4*ecx], mm0      ; 6
+    STORE8x8    mm0, mm0
  
      picpop      ebx
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
+; void predict_8x8c_v_mmx( uint8_t *src )
  ;
  ;-----------------------------------------------------------------------------
  
  ALIGN 16
  predict_8x8c_v_mmx :
-
-    mov         edx             , [esp + 4]
-    mov         ecx             , [esp + 8]
-    sub         edx             , ecx               ; edx <-- line -1
-
-    movq        mm0             , [edx]
-    movq        [edx + ecx]     , mm0               ; 0
-    movq        [edx + 2 * ecx] , mm0               ; 1
-    movq        [edx + 4 * ecx] , mm0               ; 3
-    movq        [edx + 8 * ecx] , mm0               ; 7
-    add         edx             , ecx               ; edx <-- line 0
-    movq        [edx + 2 * ecx] , mm0               ; 2
-    movq        [edx + 4 * ecx] , mm0               ; 4
-    lea         edx             , [edx + 4 * ecx]   ; edx <-- line 4
-    movq        [edx + ecx]     , mm0               ; 5
-    movq        [edx + 2 * ecx] , mm0               ; 6
-
+    mov         edx, [esp + 4]
+    mov         ecx, FDEC_STRIDE
+    sub         edx, ecx
+    movq        mm0, [edx]
+    lea         eax, [ecx + 2*ecx]
+    STORE8x8    mm0, mm0
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8c_dc_core_mmxext( uint8_t *src, int i_stride, int s2, int s3 )
+; void predict_8x8c_dc_core_mmxext( uint8_t *src, int s2, int s3 )
  ;
  ;-----------------------------------------------------------------------------
  
@@ -211,7 +197,7 @@ predict_8x8c_dc_core_mmxext:
      picgetgot   ebx
  
      mov         edx, [picesp + 4]
-    mov         ecx, [picesp + 8]
+    mov         ecx, FDEC_STRIDE
      sub         edx, ecx
      lea         eax, [ecx + 2*ecx]
  
@@ -223,8 +209,8 @@ predict_8x8c_dc_core_mmxext:
      psadbw      mm1, mm2        ; s1
      psadbw      mm0, mm2        ; s0
  
-    paddw       mm0, [picesp + 12]
-    pshufw      mm2, [picesp + 16], 0
+    paddw       mm0, [picesp +  8]
+    pshufw      mm2, [picesp + 12], 0
      psrlw       mm0, 3
      paddw       mm1, [pw_2 GLOBAL]
      movq        mm3, mm2
@@ -238,22 +224,14 @@ predict_8x8c_dc_core_mmxext:
      packuswb    mm0, mm1        ; dc0,dc1 (b)
      packuswb    mm2, mm3        ; dc2,dc3 (b)
  
-    movq        [edx +   ecx], mm0 ; 0
-    movq        [edx + 2*ecx], mm0 ; 1
-    movq        [edx +   eax], mm0 ; 2
-    movq        [edx + 4*ecx], mm0 ; 3
-    lea         edx, [edx + 4*ecx]
-    movq        [edx +   ecx], mm2 ; 4
-    movq        [edx + 2*ecx], mm2 ; 5
-    movq        [edx +   eax], mm2 ; 6
-    movq        [edx + 4*ecx], mm2 ; 7
+    STORE8x8    mm0, mm2
  
      picpop      ebx
      ret
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_8x8c_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c )
+; void predict_8x8c_p_core_mmx( uint8_t *src, int i00, int b, int c )
  ;
  ;-----------------------------------------------------------------------------
  
@@ -263,10 +241,10 @@ predict_8x8c_p_core_mmx:
      picgetgot   ebx
  
      mov         edx, [picesp + 4]
-    mov         ecx, [picesp + 8]
-    pshufw      mm0, [picesp +12], 0
-    pshufw      mm2, [picesp +16], 0
-    pshufw      mm4, [picesp +20], 0
+    mov         ecx, FDEC_STRIDE
+    pshufw      mm0, [picesp + 8], 0
+    pshufw      mm2, [picesp +12], 0
+    pshufw      mm4, [picesp +16], 0
      movq        mm1, mm2
      pmullw      mm2, [pw_3210 GLOBAL]
      psllw       mm1, 2
@@ -298,7 +276,7 @@ ALIGN 4
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_16x16_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c )
+; void predict_16x16_p_core_mmx( uint8_t *src, int i00, int b, int c )
  ;
  ;-----------------------------------------------------------------------------
  
@@ -309,10 +287,10 @@ predict_16x16_p_core_mmx:
      picgetgot   ebx
  
      mov         edx, [picesp + 4]
-    mov         ecx, [picesp + 8]
-    pshufw      mm0, [picesp +12], 0
-    pshufw      mm2, [picesp +16], 0
-    pshufw      mm4, [picesp +20], 0
+    mov         ecx, FDEC_STRIDE
+    pshufw      mm0, [picesp + 8], 0
+    pshufw      mm2, [picesp +12], 0
+    pshufw      mm4, [picesp +16], 0
      movq        mm5, mm2
      movq        mm1, mm2
      pmullw      mm5, [pw_3210 GLOBAL]
@@ -360,7 +338,7 @@ ALIGN 4
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_16x16_v_mmx( uint8_t *src, int i_stride )
+; void predict_16x16_v_mmx( uint8_t *src )
  ;
  ;-----------------------------------------------------------------------------
  
@@ -368,7 +346,7 @@ ALIGN 16
  predict_16x16_v_mmx :
  
      mov         edx, [esp + 4]
-    mov         ecx, [esp + 8]
+    mov         ecx, FDEC_STRIDE
      sub         edx, ecx                ; edx <-- line -1
  
      movq        mm0, [edx]
@@ -399,13 +377,13 @@ predict_16x16_v_mmx :
  
  ;-----------------------------------------------------------------------------
  ;
-; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_stride, int i_dc_left )
+; void predict_16x16_dc_core_mmxext( uint8_t *src, int i_dc_left )
  ;
  ;-----------------------------------------------------------------------------
  
  %macro PRED16x16_DC 3
      mov         edx, [%3 + 4]
-    mov         ecx, [%3 + 8]
+    mov         ecx, FDEC_STRIDE
      sub         edx, ecx                ; edx <-- line -1
  
      pxor        mm0, mm0
@@ -436,7 +414,7 @@ ALIGN 4
  
  ALIGN 16
  predict_16x16_dc_core_mmxext:
-    PRED16x16_DC [esp+12], 5, esp
+    PRED16x16_DC [esp+8], 5, esp
      ret
  
  ALIGN 16
diff --git a/common/i386/predict.c b/common/i386/predict.c

index 885bff3891329f829cb8ae027f7da3096b21cded..6befbae09e42526efb9792feb1f10f9ef6648812 100644 (file)
--- a/common/i386/predict.c
+++ b/common/i386/predict.c
@@ -25,17 +25,17 @@
  #include "common/clip1.h"
  #include "predict.h"
  
-extern void predict_16x16_v_mmx( uint8_t *src, int i_stride );
-extern void predict_16x16_dc_core_mmxext( uint8_t *src, int i_stride, int i_dc_left );
-extern void predict_16x16_dc_top_mmxext( uint8_t *src, int i_stride );
-extern void predict_16x16_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c );
-extern void predict_8x8c_p_core_mmx( uint8_t *src, int i_stride, int i00, int b, int c );
-extern void predict_8x8c_dc_core_mmxext( uint8_t *src, int i_stride, int s2, int s3 );
-extern void predict_8x8c_v_mmx( uint8_t *src, int i_stride );
-extern void predict_8x8_v_mmxext( uint8_t *src, int i_stride, int i_neighbors );
-extern void predict_8x8_dc_core_mmxext( uint8_t *src, int i_stride, int i_neighbors, uint8_t *pix_left );
-
-static void predict_16x16_p( uint8_t *src, int i_stride )
+extern void predict_16x16_v_mmx( uint8_t *src );
+extern void predict_16x16_dc_core_mmxext( uint8_t *src, int i_dc_left );
+extern void predict_16x16_dc_top_mmxext( uint8_t *src );
+extern void predict_16x16_p_core_mmx( uint8_t *src, int i00, int b, int c );
+extern void predict_8x8c_p_core_mmx( uint8_t *src, int i00, int b, int c );
+extern void predict_8x8c_dc_core_mmxext( uint8_t *src, int s2, int s3 );
+extern void predict_8x8c_v_mmx( uint8_t *src );
+extern void predict_8x8_v_mmxext( uint8_t *src, int i_neighbors );
+extern void predict_8x8_dc_core_mmxext( uint8_t *src, int i_neighbors, uint8_t *pix_left );
+
+static void predict_16x16_p( uint8_t *src )
  {
      int a, b, c, i;
      int H = 0;
@@ -53,10 +53,10 @@ static void predict_16x16_p( uint8_t *src, int i_stride )
      c = ( 5 * V + 32 ) >> 6;
      i00 = a - b * 7 - c * 7 + 16;
  
-    predict_16x16_p_core_mmx( src, FDEC_STRIDE, i00, b, c );
+    predict_16x16_p_core_mmx( src, i00, b, c );
  }
  
-static void predict_8x8c_p( uint8_t *src, int i_stride )
+static void predict_8x8c_p( uint8_t *src )
  {
      int a, b, c, i;
      int H = 0;
@@ -74,10 +74,10 @@ static void predict_8x8c_p( uint8_t *src, int i_stride )
      c = ( 17 * V + 16 ) >> 5;
      i00 = a -3*b -3*c + 16;
  
-    predict_8x8c_p_core_mmx( src, FDEC_STRIDE, i00, b, c );
+    predict_8x8c_p_core_mmx( src, i00, b, c );
  }
  
-static void predict_16x16_dc( uint8_t *src, int i_stride )
+static void predict_16x16_dc( uint8_t *src )
  {
      uint32_t dc=16;
      int i;
@@ -88,10 +88,10 @@ static void predict_16x16_dc( uint8_t *src, int i_stride )
          dc += src[-1 + (i+1) * FDEC_STRIDE];
      }
  
-    predict_16x16_dc_core_mmxext( src, FDEC_STRIDE, dc );
+    predict_16x16_dc_core_mmxext( src, dc );
  }
  
-static void predict_8x8c_dc( uint8_t *src, int i_stride )
+static void predict_8x8c_dc( uint8_t *src )
  {
      int s2 = 4
         + src[-1 + 0*FDEC_STRIDE]
@@ -105,11 +105,11 @@ static void predict_8x8c_dc( uint8_t *src, int i_stride )
         + src[-1 + 6*FDEC_STRIDE]
         + src[-1 + 7*FDEC_STRIDE];
  
-    predict_8x8c_dc_core_mmxext( src, FDEC_STRIDE, s2, s3 );
+    predict_8x8c_dc_core_mmxext( src, s2, s3 );
  }
  
  #define SRC(x,y) src[(x)+(y)*FDEC_STRIDE]
-static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc( uint8_t *src, int i_neighbor )
  {
      uint8_t l[10];
      l[0] = i_neighbor&MB_TOPLEFT ? SRC(-1,-1) : SRC(-1,0);
@@ -123,11 +123,11 @@ static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor )
      l[8] =
      l[9] = SRC(-1,7);
  
-    predict_8x8_dc_core_mmxext( src, FDEC_STRIDE, i_neighbor, l+1 );
+    predict_8x8_dc_core_mmxext( src, i_neighbor, l+1 );
  }
  
  #ifdef ARCH_X86_64
-static void predict_16x16_h( uint8_t *src, int i_stride )
+static void predict_16x16_h( uint8_t *src )
  {
      int y;
      for( y = 0; y < 16; y++ )
@@ -139,7 +139,7 @@ static void predict_16x16_h( uint8_t *src, int i_stride )
      }
  }
  
-static void predict_8x8c_h( uint8_t *src, int i_stride )
+static void predict_8x8c_h( uint8_t *src )
  {
      int y;
      for( y = 0; y < 8; y++ )
@@ -149,7 +149,7 @@ static void predict_8x8c_h( uint8_t *src, int i_stride )
      }
  }
  
-static void predict_16x16_dc_left( uint8_t *src, int i_stride )
+static void predict_16x16_dc_left( uint8_t *src )
  {
      uint32_t s = 0;
      uint64_t dc; 
@@ -169,7 +169,7 @@ static void predict_16x16_dc_left( uint8_t *src, int i_stride )
      }
  }
  
-static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_left( uint8_t *src )
  {
      int y;
      uint32_t s0 = 0, s1 = 0;
@@ -196,7 +196,7 @@ static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
  
  }
  
-static void predict_8x8c_dc_top( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_top( uint8_t *src )
  {
      int y, x;
      uint32_t s0 = 0, s1 = 0;
diff --git a/common/predict.c b/common/predict.c

index 9e789c314e57a8a6385b73a3ca7a99f23ea9b93b..18d784bd92bbc0ae4dc7cff1f98f68da0f1a712a 100644 (file)
--- a/common/predict.c
+++ b/common/predict.c
@@ -50,7 +50,7 @@
          src += FDEC_STRIDE;\
      }
  
-static void predict_16x16_dc( uint8_t *src, int i_stride )
+static void predict_16x16_dc( uint8_t *src )
  {
      uint32_t dc = 0;
      int i;
@@ -64,7 +64,7 @@ static void predict_16x16_dc( uint8_t *src, int i_stride )
  
      PREDICT_16x16_DC(dc);
  }
-static void predict_16x16_dc_left( uint8_t *src, int i_stride )
+static void predict_16x16_dc_left( uint8_t *src )
  {
      uint32_t dc = 0;
      int i;
@@ -77,7 +77,7 @@ static void predict_16x16_dc_left( uint8_t *src, int i_stride )
  
      PREDICT_16x16_DC(dc);
  }
-static void predict_16x16_dc_top( uint8_t *src, int i_stride )
+static void predict_16x16_dc_top( uint8_t *src )
  {
      uint32_t dc = 0;
      int i;
@@ -90,12 +90,12 @@ static void predict_16x16_dc_top( uint8_t *src, int i_stride )
  
      PREDICT_16x16_DC(dc);
  }
-static void predict_16x16_dc_128( uint8_t *src, int i_stride )
+static void predict_16x16_dc_128( uint8_t *src )
  {
      int i;
      PREDICT_16x16_DC(0x80808080);
  }
-static void predict_16x16_h( uint8_t *src, int i_stride )
+static void predict_16x16_h( uint8_t *src )
  {
      int i;
  
@@ -113,7 +113,7 @@ static void predict_16x16_h( uint8_t *src, int i_stride )
  
      }
  }
-static void predict_16x16_v( uint8_t *src, int i_stride )
+static void predict_16x16_v( uint8_t *src )
  {
      uint32_t v0 = *(uint32_t*)&src[ 0-FDEC_STRIDE];
      uint32_t v1 = *(uint32_t*)&src[ 4-FDEC_STRIDE];
@@ -131,7 +131,7 @@ static void predict_16x16_v( uint8_t *src, int i_stride )
          src += FDEC_STRIDE;
      }
  }
-static void predict_16x16_p( uint8_t *src, int i_stride )
+static void predict_16x16_p( uint8_t *src )
  {
      int x, y, i;
      int a, b, c;
@@ -170,7 +170,7 @@ static void predict_16x16_p( uint8_t *src, int i_stride )
   * 8x8 prediction for intra chroma block
   ****************************************************************************/
  
-static void predict_8x8c_dc_128( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_128( uint8_t *src )
  {
      int y;
  
@@ -182,7 +182,7 @@ static void predict_8x8c_dc_128( uint8_t *src, int i_stride )
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_left( uint8_t *src )
  {
      int y;
      uint32_t dc0 = 0, dc1 = 0;
@@ -211,7 +211,7 @@ static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
      }
  
  }
-static void predict_8x8c_dc_top( uint8_t *src, int i_stride )
+static void predict_8x8c_dc_top( uint8_t *src )
  {
      int y, x;
      uint32_t dc0 = 0, dc1 = 0;
@@ -232,7 +232,7 @@ static void predict_8x8c_dc_top( uint8_t *src, int i_stride )
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_dc( uint8_t *src, int i_stride )
+static void predict_8x8c_dc( uint8_t *src )
  {
      int y;
      int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
@@ -276,7 +276,7 @@ static void predict_8x8c_dc( uint8_t *src, int i_stride )
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_h( uint8_t *src, int i_stride )
+static void predict_8x8c_h( uint8_t *src )
  {
      int i;
  
@@ -289,7 +289,7 @@ static void predict_8x8c_h( uint8_t *src, int i_stride )
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_v( uint8_t *src, int i_stride )
+static void predict_8x8c_v( uint8_t *src )
  {
      uint32_t v0 = *(uint32_t*)&src[0-FDEC_STRIDE];
      uint32_t v1 = *(uint32_t*)&src[4-FDEC_STRIDE];
@@ -303,7 +303,7 @@ static void predict_8x8c_v( uint8_t *src, int i_stride )
          src += FDEC_STRIDE;
      }
  }
-static void predict_8x8c_p( uint8_t *src, int i_stride )
+static void predict_8x8c_p( uint8_t *src )
  {
      int i;
      int x,y;
@@ -350,23 +350,23 @@ static void predict_8x8c_p( uint8_t *src, int i_stride )
      }\
  }
  
-static void predict_4x4_dc_128( uint8_t *src, int i_stride )
+static void predict_4x4_dc_128( uint8_t *src )
  {
      PREDICT_4x4_DC(0x80808080);
  }
-static void predict_4x4_dc_left( uint8_t *src, int i_stride )
+static void predict_4x4_dc_left( uint8_t *src )
  {
      uint32_t dc = (( src[-1+0*FDEC_STRIDE] + src[-1+FDEC_STRIDE]+
                       src[-1+2*FDEC_STRIDE] + src[-1+3*FDEC_STRIDE] + 2 ) >> 2)*0x01010101;
      PREDICT_4x4_DC(dc);
  }
-static void predict_4x4_dc_top( uint8_t *src, int i_stride )
+static void predict_4x4_dc_top( uint8_t *src )
  {
      uint32_t dc = (( src[0 - FDEC_STRIDE] + src[1 - FDEC_STRIDE] +
                       src[2 - FDEC_STRIDE] + src[3 - FDEC_STRIDE] + 2 ) >> 2)*0x01010101;
      PREDICT_4x4_DC(dc);
  }
-static void predict_4x4_dc( uint8_t *src, int i_stride )
+static void predict_4x4_dc( uint8_t *src )
  {
      uint32_t dc = (( src[-1+0*FDEC_STRIDE] + src[-1+FDEC_STRIDE] +
                       src[-1+2*FDEC_STRIDE] + src[-1+3*FDEC_STRIDE] +
@@ -374,7 +374,7 @@ static void predict_4x4_dc( uint8_t *src, int i_stride )
                       src[2 - FDEC_STRIDE]  + src[3 - FDEC_STRIDE] + 4 ) >> 3)*0x01010101;
      PREDICT_4x4_DC(dc);
  }
-static void predict_4x4_h( uint8_t *src, int i_stride )
+static void predict_4x4_h( uint8_t *src )
  {
      int i;
  
@@ -384,7 +384,7 @@ static void predict_4x4_h( uint8_t *src, int i_stride )
          src += FDEC_STRIDE;
      }
  }
-static void predict_4x4_v( uint8_t *src, int i_stride )
+static void predict_4x4_v( uint8_t *src )
  {
      uint32_t top = *((uint32_t*)&src[-FDEC_STRIDE]);
      int i;
@@ -414,7 +414,7 @@ static void predict_4x4_v( uint8_t *src, int i_stride )
      const int t6 = src[6-1*FDEC_STRIDE];   \
      UNUSED const int t7 = src[7-1*FDEC_STRIDE];
  
-static void predict_4x4_ddl( uint8_t *src, int i_stride )
+static void predict_4x4_ddl( uint8_t *src )
  {
      PREDICT_4x4_LOAD_TOP
      PREDICT_4x4_LOAD_TOP_RIGHT
@@ -442,7 +442,7 @@ static void predict_4x4_ddl( uint8_t *src, int i_stride )
  
      src[3*FDEC_STRIDE+3] = ( t6 + 3*t7 + 2 ) >> 2;
  }
-static void predict_4x4_ddr( uint8_t *src, int i_stride )
+static void predict_4x4_ddr( uint8_t *src )
  {
      const int lt = src[-1-FDEC_STRIDE];
      PREDICT_4x4_LOAD_LEFT
@@ -472,7 +472,7 @@ static void predict_4x4_ddr( uint8_t *src, int i_stride )
      src[3*FDEC_STRIDE+0] = ( l1 + 2 * l2 + l3 + 2 ) >> 2;
  }
  
-static void predict_4x4_vr( uint8_t *src, int i_stride )
+static void predict_4x4_vr( uint8_t *src )
  {
      const int lt = src[-1-FDEC_STRIDE];
      PREDICT_4x4_LOAD_LEFT
@@ -504,7 +504,7 @@ static void predict_4x4_vr( uint8_t *src, int i_stride )
      src[3*FDEC_STRIDE+0]= ( l0 + 2 * l1 + l2 + 2 ) >> 2;
  }
  
-static void predict_4x4_hd( uint8_t *src, int i_stride )
+static void predict_4x4_hd( uint8_t *src )
  {
      const int lt= src[-1-1*FDEC_STRIDE];
      PREDICT_4x4_LOAD_LEFT
@@ -529,7 +529,7 @@ static void predict_4x4_hd( uint8_t *src, int i_stride )
      src[3*FDEC_STRIDE+1]= ( l1 + 2 * l2 + l3 + 2 ) >> 2;
  }
  
-static void predict_4x4_vl( uint8_t *src, int i_stride )
+static void predict_4x4_vl( uint8_t *src )
  {
      PREDICT_4x4_LOAD_TOP
      PREDICT_4x4_LOAD_TOP_RIGHT
@@ -553,7 +553,7 @@ static void predict_4x4_vl( uint8_t *src, int i_stride )
      src[3*FDEC_STRIDE+3]= ( t4 + 2 * t5 + t6 + 2 ) >> 2;
  }
  
-static void predict_4x4_hu( uint8_t *src, int i_stride )
+static void predict_4x4_hu( uint8_t *src )
  {
      PREDICT_4x4_LOAD_LEFT
  
@@ -622,23 +622,23 @@ static void predict_4x4_hu( uint8_t *src, int i_stride )
          src += FDEC_STRIDE; \
      }
  
-static void predict_8x8_dc_128( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc_128( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_DC(0x80808080);
  }
-static void predict_8x8_dc_left( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc_left( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_LEFT
      const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
      PREDICT_8x8_DC(dc);
  }
-static void predict_8x8_dc_top( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc_top( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_TOP
      const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
      PREDICT_8x8_DC(dc);
  }
-static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_dc( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_LEFT
      PREDICT_8x8_LOAD_TOP
@@ -646,7 +646,7 @@ static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor )
                           +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
      PREDICT_8x8_DC(dc);
  }
-static void predict_8x8_h( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_h( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_LEFT
  #define ROW(y) ((uint32_t*)(src+y*FDEC_STRIDE))[0] =\
@@ -654,7 +654,7 @@ static void predict_8x8_h( uint8_t *src, int i_stride, int i_neighbor )
      ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
  #undef ROW
  }
-static void predict_8x8_v( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_v( uint8_t *src, int i_neighbor )
  {
      int y;
      PREDICT_8x8_LOAD_TOP;
@@ -669,7 +669,7 @@ static void predict_8x8_v( uint8_t *src, int i_stride, int i_neighbor )
      for( y = 1; y < 8; y++ )
          *(uint64_t*)(src+y*FDEC_STRIDE) = *(uint64_t*)src;
  }
-static void predict_8x8_ddl( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_ddl( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_TOPRIGHT
@@ -689,7 +689,7 @@ static void predict_8x8_ddl( uint8_t *src, int i_stride, int i_neighbor )
      SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
      SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
  }
-static void predict_8x8_ddr( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_ddr( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_LEFT
@@ -711,7 +711,7 @@ static void predict_8x8_ddr( uint8_t *src, int i_stride, int i_neighbor )
      SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
    
  }
-static void predict_8x8_vr( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_vr( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_LEFT
@@ -740,7 +740,7 @@ static void predict_8x8_vr( uint8_t *src, int i_stride, int i_neighbor )
      SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
      SRC(7,0)= (t6 + t7 + 1) >> 1;
  }
-static void predict_8x8_hd( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_hd( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_LEFT
@@ -769,7 +769,7 @@ static void predict_8x8_hd( uint8_t *src, int i_stride, int i_neighbor )
      SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
      SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
  }
-static void predict_8x8_vl( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_vl( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_TOP
      PREDICT_8x8_LOAD_TOPRIGHT
@@ -796,7 +796,7 @@ static void predict_8x8_vl( uint8_t *src, int i_stride, int i_neighbor )
      SRC(7,6)= (t10 + t11 + 1) >> 1;
      SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
  }
-static void predict_8x8_hu( uint8_t *src, int i_stride, int i_neighbor )
+static void predict_8x8_hu( uint8_t *src, int i_neighbor )
  {
      PREDICT_8x8_LOAD_LEFT
      SRC(0,0)= (l0 + l1 + 1) >> 1;
diff --git a/common/predict.h b/common/predict.h

index 3a9554d7a5a6965dbc3c2dbbf2373fc75fc9c24a..c8d722c7fc66a36dcf9205d96effec3006e581de 100644 (file)
--- a/common/predict.h
+++ b/common/predict.h
@@ -24,8 +24,8 @@
  #ifndef _PREDICT_H
  #define _PREDICT_H 1
  
-typedef void (*x264_predict_t)( uint8_t *src, int i_stride );
-typedef void (*x264_predict8x8_t)( uint8_t *src, int i_stride, int i_neighbor );
+typedef void (*x264_predict_t)( uint8_t *src );
+typedef void (*x264_predict8x8_t)( uint8_t *src, int i_neighbor );
  
  enum intra_chroma_pred_e
  {
diff --git a/encoder/analyse.c b/encoder/analyse.c

index abe907f08c26063a45218543bb5ce6453c93f940..74769e66771033d4b66f58516366fa0a70adac7e 100644 (file)
--- a/encoder/analyse.c
+++ b/encoder/analyse.c
@@ -430,8 +430,8 @@ static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
          i_mode = predict_mode[i];
  
          /* we do the prediction */
-        h->predict_8x8c[i_mode]( p_dstc[0], FDEC_STRIDE );
-        h->predict_8x8c[i_mode]( p_dstc[1], FDEC_STRIDE );
+        h->predict_8x8c[i_mode]( p_dstc[0] );
+        h->predict_8x8c[i_mode]( p_dstc[1] );
  
          /* we calculate the cost */
          i_sad = h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], FDEC_STRIDE,
@@ -478,7 +478,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_
          int i_mode;
  
          i_mode = predict_mode[i];
-        h->predict_16x16[i_mode]( p_dst, FDEC_STRIDE );
+        h->predict_16x16[i_mode]( p_dst );
  
          i_sad = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
                  a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
@@ -545,7 +545,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_
                  int i_mode;
  
                  i_mode = predict_mode[i];
-                h->predict_4x4[i_mode]( p_dst_by, FDEC_STRIDE );
+                h->predict_4x4[i_mode]( p_dst_by );
  
                  i_sad = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE,
                                                    p_src_by, FENC_STRIDE )
@@ -560,7 +560,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_
              a->i_sad_i4x4 += i_best;
  
              /* we need to encode this block now (for next ones) */
-            h->predict_4x4[a->i_predict4x4[x][y]]( p_dst_by, FDEC_STRIDE );
+            h->predict_4x4[a->i_predict4x4[x][y]]( p_dst_by );
              x264_mb_encode_i4x4( h, idx, a->i_qp );
  
              h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[x][y];
@@ -613,7 +613,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_
                  int i_mode;
  
                  i_mode = predict_mode[i];
-                h->predict_8x8[i_mode]( p_dst_by, FDEC_STRIDE, h->mb.i_neighbour8[idx] );
+                h->predict_8x8[i_mode]( p_dst_by, h->mb.i_neighbour8[idx] );
  
                  /* could use sa8d, but it doesn't seem worth the speed cost (without mmx at least) */
                  i_sad = h->pixf.mbcmp[PIXEL_8x8]( p_dst_by, FDEC_STRIDE,
@@ -629,7 +629,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_
              a->i_sad_i8x8 += i_best;
  
              /* we need to encode this block now (for next ones) */
-            h->predict_8x8[a->i_predict8x8[x][y]]( p_dst_by, FDEC_STRIDE, h->mb.i_neighbour );
+            h->predict_8x8[a->i_predict8x8[x][y]]( p_dst_by, h->mb.i_neighbour );
              x264_mb_encode_i8x8( h, idx, a->i_qp );
  
              x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[x][y] );
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 22cece5e877d903f262fdfd3d90709512c8ce90a..6cc23cde04b3d5ff4fd9380b6d5e6d91c9b16db7 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -446,7 +446,7 @@ void x264_macroblock_encode( x264_t *h )
          const int i_mode = h->mb.i_intra16x16_pred_mode;
          h->mb.b_transform_8x8 = 0;
          /* do the right prediction */
-        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], FDEC_STRIDE );
+        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
  
          /* encode the 16x16 macroblock */
          x264_mb_encode_i16x16( h, i_qp );
@@ -456,11 +456,10 @@ void x264_macroblock_encode( x264_t *h )
          h->mb.b_transform_8x8 = 1;
          for( i = 0; i < 4; i++ )
          {
-            const int i_dst = FDEC_STRIDE;
-            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
+            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
              int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
  
-            h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
+            h->predict_8x8[i_mode]( p_dst, h->mb.i_neighbour8[i] );
              x264_mb_encode_i8x8( h, i, i_qp );
          }
      }
@@ -469,15 +468,14 @@ void x264_macroblock_encode( x264_t *h )
          h->mb.b_transform_8x8 = 0;
          for( i = 0; i < 16; i++ )
          {
-            const int i_dst = FDEC_STRIDE;
-            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
+            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * FDEC_STRIDE];
              int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
  
              if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                  /* emulate missing topright samples */
-                *(uint32_t*) &p_dst[4 - i_dst] = p_dst[3 - i_dst] * 0x01010101U;
+                *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
  
-            h->predict_4x4[i_mode]( p_dst, i_dst );
+            h->predict_4x4[i_mode]( p_dst );
              x264_mb_encode_i4x4( h, i, i_qp );
          }
      }
@@ -604,8 +602,8 @@ void x264_macroblock_encode( x264_t *h )
      if( IS_INTRA( h->mb.i_type ) )
      {
          const int i_mode = h->mb.i_chroma_pred_mode;
-        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], FDEC_STRIDE );
-        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], FDEC_STRIDE );
+        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
+        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
      }
  
      /* encode the 8x8 blocks */
diff --git a/encoder/slicetype_decision.c b/encoder/slicetype_decision.c

index f8473e3be8bf4f6dcdb3197b55f3a05735701aee..f7dc52dad5776bae6946fd367954c80be088da5b 100644 (file)
--- a/encoder/slicetype_decision.c
+++ b/encoder/slicetype_decision.c
@@ -194,7 +194,7 @@ lowres_intra_mb:
          for( i = I_PRED_CHROMA_DC; i <= I_PRED_CHROMA_P; i++ )
          {
              int i_cost;
-            h->predict_8x8c[i]( pix, FDEC_STRIDE );
+            h->predict_8x8c[i]( pix );
              i_cost = h->pixf.mbcmp[PIXEL_8x8]( pix, FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE ) + intra_penalty;
              i_bcost = X264_MIN( i_bcost, i_cost );
          }
diff --git a/tools/checkasm.c b/tools/checkasm.c

index 5bff806070ff68bd2cf35ca16bc16275a99fc05d..84832a0f2590c222477e92b2cbd811647bcec67d 100644 (file)
--- a/tools/checkasm.c
+++ b/tools/checkasm.c
@@ -572,8 +572,8 @@ static int check_intra( int cpu_ref, int cpu_new )
          used_asm = 1; \
          memcpy( buf3, buf1, 32*20 );\
          memcpy( buf4, buf1, 32*20 );\
-        ip_c.name[dir]( buf3+48, FDEC_STRIDE, ##__VA_ARGS__ );\
-        ip_a.name[dir]( buf4+48, FDEC_STRIDE, ##__VA_ARGS__ );\
+        ip_c.name[dir]( buf3+48, ##__VA_ARGS__ );\
+        ip_a.name[dir]( buf4+48, ##__VA_ARGS__ );\
          if( memcmp( buf3, buf4, 32*20 ) )\
          {\
              fprintf( stderr, #name "[%d] :  [FAILED]\n", dir );\
author	Loren Merritt <pengvado@videolan.org>
	Mon, 27 Feb 2006 07:31:36 +0000 (07:31 +0000)
committer	Loren Merritt <pengvado@videolan.org>
	Mon, 27 Feb 2006 07:31:36 +0000 (07:31 +0000)
common/amd64/amd64inc.asm		patch \| blob \| history
common/amd64/predict-a.asm		patch \| blob \| history
common/i386/i386inc.asm		patch \| blob \| history
common/i386/predict-a.asm		patch \| blob \| history
common/i386/predict.c		patch \| blob \| history
common/predict.c		patch \| blob \| history
common/predict.h		patch \| blob \| history
encoder/analyse.c		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history
encoder/slicetype_decision.c		patch \| blob \| history
tools/checkasm.c		patch \| blob \| history