X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fdsputil.c;h=9eae404581c25be5da5a4bcbdc094d5d584f04e2;hb=a43bd1d71589d9350c7b2585376c1da0ea229310;hp=624ecd0a5dd514ea751d5b3592b7b7501ba7c519;hpb=6d4985bbdfa5000223523d3de079d68b66f0c90b;p=ffmpeg diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 624ecd0a5dd..9eae404581c 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -25,13 +25,14 @@ void (*ff_idct)(DCTELEM *block); void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); -void (*av_fdct)(DCTELEM *block); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); void (*clear_blocks)(DCTELEM *blocks); +int (*pix_sum)(UINT8 * pix, int line_size); +int (*pix_norm1)(UINT8 * pix, int line_size); op_pixels_abs_func pix_abs16x16; op_pixels_abs_func pix_abs16x16_x2; @@ -46,8 +47,8 @@ op_pixels_abs_func pix_abs8x8_xy2; UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT32 squareTbl[512]; -extern INT16 default_intra_matrix[64]; -extern INT16 default_non_intra_matrix[64]; +extern INT16 ff_mpeg1_default_intra_matrix[64]; +extern INT16 ff_mpeg1_default_non_intra_matrix[64]; extern INT16 ff_mpeg4_default_intra_matrix[64]; extern INT16 ff_mpeg4_default_non_intra_matrix[64]; @@ -159,99 +160,134 @@ static void build_zigzag_end(void) } } -void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size) +int pix_sum_c(UINT8 * pix, int line_size) +{ + int s, i, j; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { + s += pix[0]; + s += pix[1]; + s += pix[2]; + s += pix[3]; + s += pix[4]; + s += pix[5]; + s += pix[6]; + s += pix[7]; + pix += 8; + } + pix += line_size - 16; + } + return s; +} + +int pix_norm1_c(UINT8 * pix, int line_size) +{ + int s, i, j; + UINT32 *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { + s += sq[pix[0]]; + s += sq[pix[1]]; + s += sq[pix[2]]; + s += sq[pix[3]]; + s += sq[pix[4]]; + s += sq[pix[5]]; + s += sq[pix[6]]; + s += sq[pix[7]]; + pix += 8; + } + pix += line_size - 16; + } + return s; +} + + +void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) { - DCTELEM *p; - const UINT8 *pix; int i; /* read the pixels */ - p = block; - pix = pixels; for(i=0;i<8;i++) { - p[0] = pix[0]; - p[1] = pix[1]; - p[2] = pix[2]; - p[3] = pix[3]; - p[4] = pix[4]; - p[5] = pix[5]; - p[6] = pix[6]; - p[7] = pix[7]; - pix += line_size; - p += 8; + block[0] = pixels[0]; + block[1] = pixels[1]; + block[2] = pixels[2]; + block[3] = pixels[3]; + block[4] = pixels[4]; + block[5] = pixels[5]; + block[6] = pixels[6]; + block[7] = pixels[7]; + pixels += line_size; + block += 8; } } -void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){ - DCTELEM *p; +void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2, + int stride){ int i; /* read the pixels */ - p = block; for(i=0;i<8;i++) { - p[0] = s1[0] - s2[0]; - p[1] = s1[1] - s2[1]; - p[2] = s1[2] - s2[2]; - p[3] = s1[3] - s2[3]; - p[4] = s1[4] - s2[4]; - p[5] = s1[5] - s2[5]; - p[6] = s1[6] - s2[6]; - p[7] = s1[7] - s2[7]; + block[0] = s1[0] - s2[0]; + block[1] = s1[1] - s2[1]; + block[2] = s1[2] - s2[2]; + block[3] = s1[3] - s2[3]; + block[4] = s1[4] - s2[4]; + block[5] = s1[5] - s2[5]; + block[6] = s1[6] - s2[6]; + block[7] = s1[7] - s2[7]; s1 += stride; s2 += stride; - p += 8; + block += 8; } } -void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) +void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, + int line_size) { - const DCTELEM *p; - UINT8 *pix; int i; UINT8 *cm = cropTbl + MAX_NEG_CROP; /* read the pixels */ - p = block; - pix = pixels; for(i=0;i<8;i++) { - pix[0] = cm[p[0]]; - pix[1] = cm[p[1]]; - pix[2] = cm[p[2]]; - pix[3] = cm[p[3]]; - pix[4] = cm[p[4]]; - pix[5] = cm[p[5]]; - pix[6] = cm[p[6]]; - pix[7] = cm[p[7]]; - pix += line_size; - p += 8; + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + pixels[2] = cm[block[2]]; + pixels[3] = cm[block[3]]; + pixels[4] = cm[block[4]]; + pixels[5] = cm[block[5]]; + pixels[6] = cm[block[6]]; + pixels[7] = cm[block[7]]; + + pixels += line_size; + block += 8; } } -void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) +void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels, + int line_size) { - const DCTELEM *p; - UINT8 *pix; int i; UINT8 *cm = cropTbl + MAX_NEG_CROP; /* read the pixels */ - p = block; - pix = pixels; for(i=0;i<8;i++) { - pix[0] = cm[pix[0] + p[0]]; - pix[1] = cm[pix[1] + p[1]]; - pix[2] = cm[pix[2] + p[2]]; - pix[3] = cm[pix[3] + p[3]]; - pix[4] = cm[pix[4] + p[4]]; - pix[5] = cm[pix[5] + p[5]]; - pix[6] = cm[pix[6] + p[6]]; - pix[7] = cm[pix[7] + p[7]]; - pix += line_size; - p += 8; + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels[2] = cm[pixels[2] + block[2]]; + pixels[3] = cm[pixels[3] + block[3]]; + pixels[4] = cm[pixels[4] + block[4]]; + pixels[5] = cm[pixels[5] + block[5]]; + pixels[6] = cm[pixels[6] + block[6]]; + pixels[7] = cm[pixels[7] + block[7]]; + pixels += line_size; + block += 8; } } - #if 0 #define PIXOP2(OPNAME, OP) \ @@ -579,7 +615,6 @@ void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, i }; #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) #endif - #define op_put(a, b) a = b PIXOP2(avg, op_avg) @@ -694,8 +729,11 @@ void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_siz #define op_avg(a, b) a = avg2(a, b) #define op_sub(a, b) a -= b +#define op_put(a, b) a = b PIXOP(DCTELEM, sub, op_sub, 8) +PIXOP(uint8_t, avg, op_avg, line_size) +PIXOP(uint8_t, put, op_put, line_size) /* not rounding primitives */ #undef avg2 @@ -703,6 +741,8 @@ PIXOP(DCTELEM, sub, op_sub, 8) #define avg2(a,b) ((a+b)>>1) #define avg4(a,b,c,d) ((a+b+c+d+1)>>2) +PIXOP(uint8_t, avg_no_rnd, op_avg, line_size) +PIXOP(uint8_t, put_no_rnd, op_put, line_size) /* motion estimation */ #undef avg2 @@ -1271,6 +1311,8 @@ void dsputil_init(void) add_pixels_clamped = add_pixels_clamped_c; gmc1= gmc1_c; clear_blocks= clear_blocks_c; + pix_sum= pix_sum_c; + pix_norm1= pix_norm1_c; pix_abs16x16 = pix_abs16x16_c; pix_abs16x16_x2 = pix_abs16x16_x2_c; @@ -1280,7 +1322,6 @@ void dsputil_init(void) pix_abs8x8_x2 = pix_abs8x8_x2_c; pix_abs8x8_y2 = pix_abs8x8_y2_c; pix_abs8x8_xy2 = pix_abs8x8_xy2_c; - av_fdct = fdct_ifast; use_permuted_idct = 1; @@ -1298,17 +1339,23 @@ void dsputil_init(void) dsputil_init_alpha(); use_permuted_idct = 0; #endif +#ifdef ARCH_POWERPC +#ifdef CONFIG_DARWIN + dsputil_init_altivec(); +#endif +#endif #ifdef SIMPLE_IDCT if (ff_idct == NULL) { ff_idct_put = simple_idct_put; ff_idct_add = simple_idct_add; use_permuted_idct=0; - } else { + } +#endif + if(ff_idct != NULL) { ff_idct_put = gen_idct_put; ff_idct_add = gen_idct_add; } -#endif if(use_permuted_idct) #ifdef SIMPLE_IDCT @@ -1332,8 +1379,8 @@ void dsputil_init(void) j = ff_alternate_vertical_scan[i]; ff_alternate_vertical_scan[i] = block_permute_op(j); } - block_permute(default_intra_matrix); - block_permute(default_non_intra_matrix); + block_permute(ff_mpeg1_default_intra_matrix); + block_permute(ff_mpeg1_default_non_intra_matrix); block_permute(ff_mpeg4_default_intra_matrix); block_permute(ff_mpeg4_default_non_intra_matrix); }