]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/ppc/dsputil_altivec.c
Remove gcc_fixes.h. It only contains workarounds for unsupported gcc versions.
[ffmpeg] / libavcodec / ppc / dsputil_altivec.c
index bb0fad4e48771f170952edd251334788dc54ef49..8a1cd443769e3f630ed9730b194b2af83cdc7a6a 100644 (file)
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+#if HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
 #include "libavcodec/dsputil.h"
-
-#include "gcc_fixes.h"
-
 #include "dsputil_ppc.h"
 #include "util_altivec.h"
+#include "types_altivec.h"
 
 int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
@@ -277,7 +279,7 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 
     sad = (vector unsigned int)vec_splat_u32(0);
 
-    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+    permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
 
     for (i = 0; i < h; i++) {
         /* Read potentially unaligned pixels into t1 and t2
@@ -358,7 +360,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 
     sum = (vector unsigned int)vec_splat_u32(0);
 
-    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
+    permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
 
 
     for (i = 0; i < h; i++) {
@@ -573,6 +575,20 @@ void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
     }
 }
 
+
+static void clear_block_altivec(DCTELEM *block) {
+    LOAD_ZERO;
+    vec_st(zero_s16v,   0, block);
+    vec_st(zero_s16v,  16, block);
+    vec_st(zero_s16v,  32, block);
+    vec_st(zero_s16v,  48, block);
+    vec_st(zero_s16v,  64, block);
+    vec_st(zero_s16v,  80, block);
+    vec_st(zero_s16v,  96, block);
+    vec_st(zero_s16v, 112, block);
+}
+
+
 void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
     register int i;
     register vector unsigned char vdst, vsrc;
@@ -990,20 +1006,20 @@ POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
 POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
     {
     register const vector signed short vprod1 =(const vector signed short)
-                                        AVV( 1,-1, 1,-1, 1,-1, 1,-1);
+                                               { 1,-1, 1,-1, 1,-1, 1,-1 };
     register const vector signed short vprod2 =(const vector signed short)
-                                        AVV( 1, 1,-1,-1, 1, 1,-1,-1);
+                                               { 1, 1,-1,-1, 1, 1,-1,-1 };
     register const vector signed short vprod3 =(const vector signed short)
-                                        AVV( 1, 1, 1, 1,-1,-1,-1,-1);
+                                               { 1, 1, 1, 1,-1,-1,-1,-1 };
     register const vector unsigned char perm1 = (const vector unsigned char)
-      AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
-          0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
+        {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
+         0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D};
     register const vector unsigned char perm2 = (const vector unsigned char)
-      AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
-          0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
+        {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
+         0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B};
     register const vector unsigned char perm3 = (const vector unsigned char)
-      AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
-          0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
+        {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
 
 #define ONEITERBUTTERFLY(i, res)                                          \
     {                                                                     \
@@ -1109,70 +1125,70 @@ xlc goes to around 660 on the regular C code...
 static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) {
     int sum;
     register vector signed short
-        temp0 REG_v(v0),
-        temp1 REG_v(v1),
-        temp2 REG_v(v2),
-        temp3 REG_v(v3),
-        temp4 REG_v(v4),
-        temp5 REG_v(v5),
-        temp6 REG_v(v6),
-        temp7 REG_v(v7);
+        temp0 __asm__ ("v0"),
+        temp1 __asm__ ("v1"),
+        temp2 __asm__ ("v2"),
+        temp3 __asm__ ("v3"),
+        temp4 __asm__ ("v4"),
+        temp5 __asm__ ("v5"),
+        temp6 __asm__ ("v6"),
+        temp7 __asm__ ("v7");
     register vector signed short
-        temp0S REG_v(v8),
-        temp1S REG_v(v9),
-        temp2S REG_v(v10),
-        temp3S REG_v(v11),
-        temp4S REG_v(v12),
-        temp5S REG_v(v13),
-        temp6S REG_v(v14),
-        temp7S REG_v(v15);
-    register const vector unsigned char vzero REG_v(v31)=
+        temp0S __asm__ ("v8"),
+        temp1S __asm__ ("v9"),
+        temp2S __asm__ ("v10"),
+        temp3S __asm__ ("v11"),
+        temp4S __asm__ ("v12"),
+        temp5S __asm__ ("v13"),
+        temp6S __asm__ ("v14"),
+        temp7S __asm__ ("v15");
+    register const vector unsigned char vzero __asm__ ("v31") =
         (const vector unsigned char)vec_splat_u8(0);
     {
-    register const vector signed short vprod1 REG_v(v16)=
-        (const vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
-    register const vector signed short vprod2 REG_v(v17)=
-        (const vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
-    register const vector signed short vprod3 REG_v(v18)=
-        (const vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
-    register const vector unsigned char perm1 REG_v(v19)=
+    register const vector signed short vprod1 __asm__ ("v16") =
+        (const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 };
+    register const vector signed short vprod2 __asm__ ("v17") =
+        (const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 };
+    register const vector signed short vprod3 __asm__ ("v18") =
+        (const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 };
+    register const vector unsigned char perm1 __asm__ ("v19") =
         (const vector unsigned char)
-        AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
-            0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
-    register const vector unsigned char perm2 REG_v(v20)=
+        {0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
+         0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D};
+    register const vector unsigned char perm2 __asm__ ("v20") =
         (const vector unsigned char)
-        AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
-            0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
-    register const vector unsigned char perm3 REG_v(v21)=
+        {0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
+         0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B};
+    register const vector unsigned char perm3 __asm__ ("v21") =
         (const vector unsigned char)
-        AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
-            0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
-
-#define ONEITERBUTTERFLY(i, res1, res2)                                   \
-    {                                                                     \
-    register vector unsigned char src1 REG_v(v22),                    \
-                                  src2 REG_v(v23),                    \
-                                  dst1 REG_v(v24),                    \
-                                  dst2 REG_v(v25),                    \
-                                  srcO REG_v(v22),                    \
-                                  dstO REG_v(v23);                    \
+        {0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
+
+#define ONEITERBUTTERFLY(i, res1, res2)                               \
+    {                                                                 \
+    register vector unsigned char src1 __asm__ ("v22"),               \
+                                  src2 __asm__ ("v23"),               \
+                                  dst1 __asm__ ("v24"),               \
+                                  dst2 __asm__ ("v25"),               \
+                                  srcO __asm__ ("v22"),               \
+                                  dstO __asm__ ("v23");               \
                                                                       \
-    register vector signed short  srcV REG_v(v24),                    \
-                                  dstV REG_v(v25),                    \
-                                  srcW REG_v(v26),                    \
-                                  dstW REG_v(v27),                    \
-                                  but0 REG_v(v28),                    \
-                                  but0S REG_v(v29),                   \
-                                  op1 REG_v(v30),                     \
-                                  but1 REG_v(v22),                    \
-                                  op1S REG_v(v23),                    \
-                                  but1S REG_v(v24),                   \
-                                  op2 REG_v(v25),                     \
-                                  but2 REG_v(v26),                    \
-                                  op2S REG_v(v27),                    \
-                                  but2S REG_v(v28),                   \
-                                  op3 REG_v(v29),                     \
-                                  op3S REG_v(v30);                    \
+    register vector signed short  srcV  __asm__ ("v24"),              \
+                                  dstV  __asm__ ("v25"),              \
+                                  srcW  __asm__ ("v26"),              \
+                                  dstW  __asm__ ("v27"),              \
+                                  but0  __asm__ ("v28"),              \
+                                  but0S __asm__ ("v29"),              \
+                                  op1   __asm__ ("v30"),              \
+                                  but1  __asm__ ("v22"),              \
+                                  op1S  __asm__ ("v23"),              \
+                                  but1S __asm__ ("v24"),              \
+                                  op2   __asm__ ("v25"),              \
+                                  but2  __asm__ ("v26"),              \
+                                  op2S  __asm__ ("v27"),              \
+                                  but2S __asm__ ("v28"),              \
+                                  op3   __asm__ ("v29"),              \
+                                  op3S  __asm__ ("v30");              \
                                                                       \
     src1 = vec_ld(stride * i, src);                                   \
     src2 = vec_ld((stride * i) + 16, src);                            \
@@ -1420,6 +1436,7 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
     c->pix_sum = pix_sum_altivec;
     c->diff_pixels = diff_pixels_altivec;
     c->get_pixels = get_pixels_altivec;
+    c->clear_block = clear_block_altivec;
     c->add_bytes= add_bytes_altivec;
     c->put_pixels_tab[0][0] = put_pixels16_altivec;
     /* the two functions do the same thing, so use the same code */
@@ -1434,6 +1451,6 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
 
     c->hadamard8_diff[0] = hadamard8_diff16_altivec;
     c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
-    if (ENABLE_VORBIS_DECODER)
+    if (CONFIG_VORBIS_DECODER)
         c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;
 }