X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fdsputil.h;h=f48aa96017d2cf7017fd831967e3f3e278445eba;hb=4e13e50432bd1c1a89f626f8196b55a0302c8f19;hp=cadde7d25a5c3d3f40a48fb24da70747ab264c48;hpb=9b027c0d0794b50e3b638cca4bed925638ab68bc;p=ffmpeg

diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index cadde7d25a5..f48aa96017d 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -38,21 +38,18 @@
 /* dct code */
 typedef short DCTELEM;
 
-void fdct_ifast (DCTELEM *data);
-void fdct_ifast248 (DCTELEM *data);
+void ff_fdct_ifast (DCTELEM *data);
+void ff_fdct_ifast248 (DCTELEM *data);
 void ff_jpeg_fdct_islow_8(DCTELEM *data);
 void ff_jpeg_fdct_islow_10(DCTELEM *data);
 void ff_fdct248_islow_8(DCTELEM *data);
 void ff_fdct248_islow_10(DCTELEM *data);
 
-void j_rev_dct (DCTELEM *data);
-void j_rev_dct4 (DCTELEM *data);
-void j_rev_dct2 (DCTELEM *data);
-void j_rev_dct1 (DCTELEM *data);
+void ff_j_rev_dct (DCTELEM *data);
 void ff_wmv2_idct_c(DCTELEM *data);
 
 void ff_fdct_mmx(DCTELEM *block);
-void ff_fdct_mmx2(DCTELEM *block);
+void ff_fdct_mmxext(DCTELEM *block);
 void ff_fdct_sse2(DCTELEM *block);
 
 #define H264_IDCT(depth) \
@@ -104,18 +101,6 @@ PUTAVG_PIXELS(10)
 #define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
 #define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
 
-/* VP3 DSP functions */
-void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
-void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
-void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
-void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
-
-void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
-void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
-
-/* EA functions */
-void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
-
 /* RV40 functions */
 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride);
 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride);
@@ -197,10 +182,6 @@ typedef struct ScanTable{
     const uint8_t *scantable;
     uint8_t permutated[64];
     uint8_t raster_end[64];
-#if ARCH_PPC
-                /** Used by dct_quantize_altivec to find last-non-zero */
-    DECLARE_ALIGNED(16, uint8_t, inverse)[64];
-#endif
 } ScanTable;
 
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
@@ -216,10 +197,6 @@ EMULATED_EDGE(8)
 EMULATED_EDGE(9)
 EMULATED_EDGE(10)
 
-void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
-void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
-void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
-
 /**
  * DSPContext.
  */
@@ -395,18 +372,9 @@ typedef struct DSPContext {
 
     void (*h261_loop_filter)(uint8_t *src, int stride);
 
-    void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
-    void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
-
-    void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
-    void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
-    void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
-
     /* assume len is a multiple of 4, and arrays are 16-byte aligned */
     void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
-    void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
-    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
-    void (*vector_fmul)(float *dst, const float *src0, const float *src1, int len);
+    /* assume len is a multiple of 16, and arrays are 32-byte aligned */
     void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
     /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
     void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
@@ -424,17 +392,6 @@ typedef struct DSPContext {
      */
     void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
                                int len);
-    /**
-     * Multiply a vector of floats by a scalar float and add to
-     * destination vector.  Source and destination vectors must
-     * overlap exactly or not at all.
-     * @param dst result vector, 16-byte aligned
-     * @param src input vector, 16-byte aligned
-     * @param mul scalar value
-     * @param len length of vector, multiple of 4
-     */
-    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
-                               int len);
     /**
      * Calculate the scalar product of two vectors of floats.
      * @param v1  first vector, 16-byte aligned
@@ -522,23 +479,11 @@ typedef struct DSPContext {
 
     void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
 
-    /* mlp/truehd functions */
-    void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
-                               int firorder, int iirorder,
-                               unsigned int filter_shift, int32_t mask, int blocksize,
-                               int32_t *sample_buffer);
-
-    /* intrax8 functions */
-    void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
-    void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
-           int * range, int * sum,  int edges);
-
     /**
      * Calculate scalar product of two vectors.
      * @param len length of vectors, should be multiple of 16
-     * @param shift number of bits to discard from product
      */
-    int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len, int shift);
+    int32_t (*scalarproduct_int16)(const int16_t *v1, const int16_t *v2/*align 16*/, int len);
     /* ape functions */
     /**
      * Calculate scalar product of v1 and v2,
@@ -568,9 +513,9 @@ typedef struct DSPContext {
      * @param src  source array
      *             constraints: 16-byte aligned
      * @param min  minimum value
-     *             constraints: must in the the range [-(1<<24), 1<<24]
+     *             constraints: must be in the range [-(1 << 24), 1 << 24]
      * @param max  maximum value
-     *             constraints: must in the the range [-(1<<24), 1<<24]
+     *             constraints: must be in the range [-(1 << 24), 1 << 24]
      * @param len  number of elements in the array
      *             constraints: multiple of 32 greater than zero
      */
@@ -580,11 +525,22 @@ typedef struct DSPContext {
     op_fill_func fill_block_tab[2];
 } DSPContext;
 
-void dsputil_static_init(void);
-void dsputil_init(DSPContext* p, AVCodecContext *avctx);
+void ff_dsputil_static_init(void);
+void ff_dsputil_init(DSPContext* p, AVCodecContext *avctx);
 
 int ff_check_alignment(void);
 
+/**
+ * Return the scalar product of two vectors.
+ *
+ * @param v1  first input vector
+ * @param v2  first input vector
+ * @param len number of elements
+ *
+ * @return sum of elementwise products
+ */
+float ff_scalarproduct_float_c(const float *v1, const float *v2, int len);
+
 /**
  * permute block according to permuatation.
  * @param last last non zero element in scantable order
@@ -640,43 +596,42 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
     }
 }
 
-void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
 
 void ff_dsputil_init_dwt(DSPContext *c);
-void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
-void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
-void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
 
-#if (ARCH_ARM && HAVE_NEON) || ARCH_PPC || HAVE_MMI || HAVE_MMX
+#if (ARCH_ARM && HAVE_NEON) || ARCH_PPC || HAVE_MMX
 #   define STRIDE_ALIGN 16
 #else
 #   define STRIDE_ALIGN 8
 #endif
 
+// Some broken preprocessors need a second expansion
+// to be forced to tokenize __VA_ARGS__
+#define E(x) x
+
 #define LOCAL_ALIGNED_A(a, t, v, s, o, ...)             \
     uint8_t la_##v[sizeof(t s o) + (a)];                \
     t (*v) o = (void *)FFALIGN((uintptr_t)la_##v, a)
 
 #define LOCAL_ALIGNED_D(a, t, v, s, o, ...) DECLARE_ALIGNED(a, t, v) s o
 
-#define LOCAL_ALIGNED(a, t, v, ...) LOCAL_ALIGNED_A(a, t, v, __VA_ARGS__,,)
+#define LOCAL_ALIGNED(a, t, v, ...) E(LOCAL_ALIGNED_A(a, t, v, __VA_ARGS__,,))
 
 #if HAVE_LOCAL_ALIGNED_8
-#   define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED_D(8, t, v, __VA_ARGS__,,)
+#   define LOCAL_ALIGNED_8(t, v, ...) E(LOCAL_ALIGNED_D(8, t, v, __VA_ARGS__,,))
 #else
 #   define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED(8, t, v, __VA_ARGS__)
 #endif
 
 #if HAVE_LOCAL_ALIGNED_16
-#   define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED_D(16, t, v, __VA_ARGS__,,)
+#   define LOCAL_ALIGNED_16(t, v, ...) E(LOCAL_ALIGNED_D(16, t, v, __VA_ARGS__,,))
 #else
 #   define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED(16, t, v, __VA_ARGS__)
 #endif
@@ -707,7 +662,7 @@ static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN16(dst   , AV_RN16(src   ));
+        AV_COPY16U(dst, src);
         dst+=dstStride;
         src+=srcStride;
     }
@@ -718,7 +673,7 @@ static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
+        AV_COPY32U(dst, src);
         dst+=dstStride;
         src+=srcStride;
     }
@@ -729,8 +684,7 @@ static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
-        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_COPY64U(dst, src);
         dst+=dstStride;
         src+=srcStride;
     }
@@ -741,8 +695,7 @@ static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
-        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_COPY64U(dst, src);
         dst[8]= src[8];
         dst+=dstStride;
         src+=srcStride;
@@ -754,10 +707,7 @@ static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
-        AV_WN32(dst+4 , AV_RN32(src+4 ));
-        AV_WN32(dst+8 , AV_RN32(src+8 ));
-        AV_WN32(dst+12, AV_RN32(src+12));
+        AV_COPY128U(dst, src);
         dst+=dstStride;
         src+=srcStride;
     }
@@ -768,10 +718,7 @@ static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride,
     int i;
     for(i=0; i<h; i++)
     {
-        AV_WN32(dst   , AV_RN32(src   ));
-        AV_WN32(dst+4 , AV_RN32(src+4 ));
-        AV_WN32(dst+8 , AV_RN32(src+8 ));
-        AV_WN32(dst+12, AV_RN32(src+12));
+        AV_COPY128U(dst, src);
         dst[16]= src[16];
         dst+=dstStride;
         src+=srcStride;