moved the tables into header files (and applied the 'static' patch). Nick: why do...

[ffmpeg] / libavcodec / dsputil.h
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h

index 8643e9ac9adf7f3be08a22bf2f49a21990857c5e..0a2935bbff49f8745f6dd085d3dc541a3e308798 100644 (file)
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -34,7 +34,6 @@
  //#define DEBUG
  /* dct code */
  typedef short DCTELEM;
-//typedef int DCTELEM;
  
  void fdct_ifast (DCTELEM *data);
  void ff_jpeg_fdct_islow (DCTELEM *data);
@@ -77,6 +76,7 @@ void clear_blocks_c(DCTELEM *blocks);
  /* add and put pixel (decoding) */
  // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
  typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
+typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
  typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
  typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
  
@@ -146,30 +146,30 @@ typedef struct DSPContext {
      me_cmp_func me_sub_cmp[11];
      me_cmp_func mb_cmp[11];
  
-    /* maybe create an array for 16/8 functions */
+    /* maybe create an array for 16/8/4/2 functions */
      /**
       * Halfpel motion compensation with rounding (a+b+1)>>1.
-     * this is an array[2][4] of motion compensation funcions for 2 
-     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * this is an array[4][4] of motion compensation funcions for 4 
+     * horizontal blocksizes (2,4,8,16) and the 4 halfpel positions<br>
       * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
       * @param block destination where the result is stored
       * @param pixels source
       * @param line_size number of bytes in a horizontal line of block
       * @param h height
       */
-    op_pixels_func put_pixels_tab[2][4];
+    op_pixels_func put_pixels_tab[4][4];
  
      /**
       * Halfpel motion compensation with rounding (a+b+1)>>1.
-     * this is an array[2][4] of motion compensation funcions for 2 
-     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * This is an array[4][4] of motion compensation functions for 4 
+     * horizontal blocksizes (2,4,8,16) and the 4 halfpel positions<br>
       * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
       * @param block destination into which the result is averaged (a+b+1)>>1
       * @param pixels source
       * @param line_size number of bytes in a horizontal line of block
       * @param h height
       */
-    op_pixels_func avg_pixels_tab[2][4];
+    op_pixels_func avg_pixels_tab[4][4];
  
      /**
       * Halfpel motion compensation with no rounding (a+b)>>1.
@@ -194,6 +194,19 @@ typedef struct DSPContext {
       * @param h height
       */
      op_pixels_func avg_no_rnd_pixels_tab[2][4];
+    
+    /**
+     * Thirdpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
+     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+
      qpel_mc_func put_qpel_pixels_tab[2][16];
      qpel_mc_func avg_qpel_pixels_tab[2][16];
      qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
@@ -221,10 +234,14 @@ typedef struct DSPContext {
      /* huffyuv specific */
      void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
      void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
+    void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w);
      
      /* (I)DCT */
      void (*fdct)(DCTELEM *block/* align 16*/);
      
+    /* IDCT really*/
+    void (*idct)(DCTELEM *block/* align 16*/);
+    
      /**
       * block -> idct -> clip to unsigned 8 bit -> dest.
       * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
@@ -240,6 +257,10 @@ typedef struct DSPContext {
      
      /**
       * idct input permutation.
+     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
+     * IDCT)
+     * this permutation must be performed before the idct_put/add, note, normally this can be merged
+     * with the zigzag/alternate scan<br>
       * an example to avoid confusion:
       * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
       * - (x -> referece dct -> reference idct -> x)
@@ -264,6 +285,18 @@ void dsputil_init(DSPContext* p, AVCodecContext *avctx);
   */
  void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
  
+#define        BYTE_VEC32(c)   ((c)*0x01010101UL)
+
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
  /**
   * Empty mmx state.
   * this must be called between any dsp function and float/double code.
@@ -348,6 +381,12 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
  
  void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
  
+#elif defined(ARCH_SH4)
+
+#define __align8 __attribute__ ((aligned (8)))
+
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+
  #else
  
  #define __align8
@@ -358,7 +397,9 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
  
  struct unaligned_64 { uint64_t l; } __attribute__((packed));
  struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
  
+#define LD16(a) (((const struct unaligned_16 *) (a))->l)
  #define LD32(a) (((const struct unaligned_32 *) (a))->l)
  #define LD64(a) (((const struct unaligned_64 *) (a))->l)
  
@@ -366,6 +407,7 @@ struct unaligned_32 { uint32_t l; } __attribute__((packed));
  
  #else /* __GNUC__ */
  
+#define LD16(a) (*((uint16_t*)(a)))
  #define LD32(a) (*((uint32_t*)(a)))
  #define LD64(a) (*((uint64_t*)(a)))