void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
const float *src2, int src3, int blocksize, int step);
-void ff_float_to_int16_c(int16_t *dst, const float *src, int len);
+void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
+ const float *win, float add_bias, int len);
+void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
/* encoding scans */
extern const uint8_t ff_alternate_horizontal_scan[64];
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
/* minimum alignment rules ;)
-if u notice errors in the align stuff, need more alignment for some asm code for some cpu
-or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...
+If you notice errors in the align stuff, need more alignment for some ASM code
+for some CPU or need to use a function with less aligned data then send a mail
+to the ffmpeg-devel mailing list, ...
-!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
-i (michael) didnt check them, these are just the alignents which i think could be reached easily ...
+!warning These alignments might not match reality, (missing attribute((align))
+stuff somewhere possible).
+I (Michael) did not check them, these are just the alignments which I think
+could be reached easily ...
!future video codecs might need functions with less strict alignment
*/
// for snow slices
typedef struct slice_buffer_s slice_buffer;
+/**
+ * Scantable.
+ */
+typedef struct ScanTable{
+ const uint8_t *scantable;
+ uint8_t permutated[64];
+ uint8_t raster_end[64];
+#ifdef ARCH_POWERPC
+ /** Used by dct_quantize_altivec to find last-non-zero */
+ DECLARE_ALIGNED(16, uint8_t, inverse[64]);
+#endif
+} ScanTable;
+
+void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
+
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
+ int block_w, int block_h,
+ int src_x, int src_y, int w, int h);
+
/**
* DSPContext.
*/
void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta);
// h264_loop_filter_strength: simd only. the C version is inlined in h264.c
void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
- int bidir, int edges, int step, int mask_mv0, int mask_mv1);
+ int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
+ /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+ void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
/* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
* simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
- void (*float_to_int16)(int16_t *dst, const float *src, int len);
+ void (*float_to_int16)(int16_t *dst, const float *src, long len);
+ void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
#define FF_PARTTRANS_IDCT_PERM 5
+#define FF_SSE2_IDCT_PERM 6
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
#define RECON_SHIFT 6
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
+#define EDGE_WIDTH 16
/* h264 functions */
void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
int * range, int * sum, int edges);
+ /* ape functions */
+ /**
+ * Add contents of the second vector to the first one.
+ * @param len length of vectors, should be multiple of 16
+ */
+ void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
+ /**
+ * Add contents of the second vector to the first one.
+ * @param len length of vectors, should be multiple of 16
+ */
+ void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
+ /**
+ * Calculate scalar product of two vectors.
+ * @param len length of vectors, should be multiple of 16
+ * @param shift number of bits to discard from product
+ */
+ int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
} DSPContext;
void dsputil_static_init(void);
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
#define STRIDE_ALIGN 16
+#else
+
+#define mm_flags 0
+#define mm_support() 0
+
#endif
#ifndef DECLARE_ALIGNED_8
void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
void (*imdct_calc)(struct MDCTContext *s, FFTSample *output,
const FFTSample *input, FFTSample *tmp);
+ void (*imdct_half)(struct MDCTContext *s, FFTSample *output,
+ const FFTSample *input, FFTSample *tmp);
} FFTContext;
int ff_fft_init(FFTContext *s, int nbits, int inverse);
*/
void ff_kbd_window_init(float *window, float alpha, int n);
+/**
+ * Generate a sine window.
+ * @param window pointer to half window
+ * @param n size of half window
+ */
+void ff_sine_window_init(float *window, int n);
+
int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
void ff_imdct_calc(MDCTContext *s, FFTSample *output,
const FFTSample *input, FFTSample *tmp);
+void ff_imdct_half(MDCTContext *s, FFTSample *output,
+ const FFTSample *input, FFTSample *tmp);
void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output,
const FFTSample *input, FFTSample *tmp);
+void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output,
+ const FFTSample *input, FFTSample *tmp);
void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output,
const FFTSample *input, FFTSample *tmp);
+void ff_imdct_half_sse(MDCTContext *s, FFTSample *output,
+ const FFTSample *input, FFTSample *tmp);
void ff_mdct_calc(MDCTContext *s, FFTSample *out,
const FFTSample *input, FFTSample *tmp);
void ff_mdct_end(MDCTContext *s);