/**
* @file dsputil.h
* DSP utils.
+ * note, many functions in here may use MMX which trashes the FPU state, it is
+ * absolutely necessary to call emms_c() between dsp & float/double code
*/
#ifndef DSPUTIL_H
//#define DEBUG
/* dct code */
typedef short DCTELEM;
-//typedef int DCTELEM;
void fdct_ifast (DCTELEM *data);
void ff_jpeg_fdct_islow (DCTELEM *data);
/* add and put pixel (decoding) */
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
+typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
#define DEF_OLD_QPEL(name)\
void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
+
/**
* DSPContext.
*/
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+ /**
+ * translational global motion compensation.
+ */
void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
+ /**
+ * global motion compensation.
+ */
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
me_cmp_func me_sub_cmp[11];
me_cmp_func mb_cmp[11];
- /* maybe create an array for 16/8 functions */
- op_pixels_func put_pixels_tab[2][4];
- op_pixels_func avg_pixels_tab[2][4];
+ /* maybe create an array for 16/8/4/2 functions */
+ /**
+ * Halfpel motion compensation with rounding (a+b+1)>>1.
+ * this is an array[4][4] of motion compensation funcions for 4
+ * horizontal blocksizes (2,4,8,16) and the 4 halfpel positions<br>
+ * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+ * @param block destination where the result is stored
+ * @param pixels source
+ * @param line_size number of bytes in a horizontal line of block
+ * @param h height
+ */
+ op_pixels_func put_pixels_tab[4][4];
+
+ /**
+ * Halfpel motion compensation with rounding (a+b+1)>>1.
+ * This is an array[4][4] of motion compensation functions for 4
+ * horizontal blocksizes (2,4,8,16) and the 4 halfpel positions<br>
+ * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+ * @param block destination into which the result is averaged (a+b+1)>>1
+ * @param pixels source
+ * @param line_size number of bytes in a horizontal line of block
+ * @param h height
+ */
+ op_pixels_func avg_pixels_tab[4][4];
+
+ /**
+ * Halfpel motion compensation with no rounding (a+b)>>1.
+ * this is an array[2][4] of motion compensation funcions for 2
+ * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+ * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+ * @param block destination where the result is stored
+ * @param pixels source
+ * @param line_size number of bytes in a horizontal line of block
+ * @param h height
+ */
op_pixels_func put_no_rnd_pixels_tab[2][4];
+
+ /**
+ * Halfpel motion compensation with no rounding (a+b)>>1.
+ * this is an array[2][4] of motion compensation funcions for 2
+ * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+ * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+ * @param block destination into which the result is averaged (a+b)>>1
+ * @param pixels source
+ * @param line_size number of bytes in a horizontal line of block
+ * @param h height
+ */
op_pixels_func avg_no_rnd_pixels_tab[2][4];
+
+ /**
+ * Thirdpel motion compensation with rounding (a+b+1)>>1.
+ * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
+ * *pixels_tab[ xthirdpel + 4*ythirdpel ]
+ * @param block destination where the result is stored
+ * @param pixels source
+ * @param line_size number of bytes in a horizontal line of block
+ * @param h height
+ */
+ tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+ tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+
qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func put_mspel_pixels_tab[8];
+
+ /**
+ * h264 Chram MC
+ */
+ h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
+ h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
+ qpel_mc_func put_h264_qpel_pixels_tab[3][16];
+ qpel_mc_func avg_h264_qpel_pixels_tab[3][16];
+
op_pixels_abs_func pix_abs16x16;
op_pixels_abs_func pix_abs16x16_x2;
op_pixels_abs_func pix_abs16x16_y2;
/* huffyuv specific */
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
+ void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w);
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
+ /* IDCT really*/
+ void (*idct)(DCTELEM *block/* align 16*/);
+
/**
* block -> idct -> clip to unsigned 8 bit -> dest.
* (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
- * @param line_size size in pixels of a horizotal line of dest
+ * @param line_size size in bytes of a horizotal line of dest
*/
void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
/**
* block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
- * @param line_size size in pixels of a horizotal line of dest
+ * @param line_size size in bytes of a horizotal line of dest
*/
void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
/**
* idct input permutation.
+ * several optimized IDCTs need a permutated input (relative to the normal order of the reference
+ * IDCT)
+ * this permutation must be performed before the idct_put/add, note, normally this can be merged
+ * with the zigzag/alternate scan<br>
* an example to avoid confusion:
* - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
* - (x -> referece dct -> reference idct -> x)
} DSPContext;
+void dsputil_static_init(void);
void dsputil_init(DSPContext* p, AVCodecContext *avctx);
/**
*/
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
+#define BYTE_VEC32(c) ((c)*0x01010101UL)
+
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+ return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+ return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
/**
* Empty mmx state.
* this must be called between any dsp function and float/double code.
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
+#elif defined(ARCH_SH4)
+
+#define __align8 __attribute__ ((aligned (8)))
+
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+
#else
#define __align8
struct unaligned_64 { uint64_t l; } __attribute__((packed));
struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
+#define LD16(a) (((const struct unaligned_16 *) (a))->l)
#define LD32(a) (((const struct unaligned_32 *) (a))->l)
#define LD64(a) (((const struct unaligned_64 *) (a))->l)
#else /* __GNUC__ */
+#define LD16(a) (*((uint16_t*)(a)))
#define LD32(a) (*((uint32_t*)(a)))
#define LD64(a) (*((uint64_t*)(a)))