*/
#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
#include "avcodec.h"
+#include "copy_block.h"
+#include "dct.h"
#include "dsputil.h"
#include "simple_idct.h"
#include "faandct.h"
#include "faanidct.h"
+#include "imgconvert.h"
#include "mathops.h"
#include "mpegvideo.h"
#include "config.h"
return s;
}
-static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
+static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
const uint8_t *s2, int stride){
int i;
}
-static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
int line_size)
{
int i;
}
}
-static void put_signed_pixels_clamped_c(const DCTELEM *block,
+static void put_signed_pixels_clamped_c(const int16_t *block,
uint8_t *restrict pixels,
int line_size)
{
}
}
-static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+static void add_pixels8_c(uint8_t *restrict pixels,
+ int16_t *block,
+ int line_size)
+{
+ int i;
+
+ for(i=0;i<8;i++) {
+ pixels[0] += block[0];
+ pixels[1] += block[1];
+ pixels[2] += block[2];
+ pixels[3] += block[3];
+ pixels[4] += block[4];
+ pixels[5] += block[5];
+ pixels[6] += block[6];
+ pixels[7] += block[7];
+ pixels += line_size;
+ block += 8;
+ }
+}
+
+static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
int line_size)
{
int i;
}
}
-static int sum_abs_dctelem_c(DCTELEM *block)
+static int sum_abs_dctelem_c(int16_t *block)
{
int sum=0, i;
for(i=0; i<64; i++)
}
}
-/**
- * Permute an 8x8 block.
- * @param block the block which will be permuted according to the given permutation vector
- * @param permutation the permutation vector
- * @param last the last non zero coefficient in scantable order, used to speed the permutation up
- * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
- * (inverse) permutated to scantable order!
- */
-void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
-{
- int i;
- DCTELEM temp[64];
-
- if(last<=0) return;
- //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
-
- for(i=0; i<=last; i++){
- const int j= scantable[i];
- temp[j]= block[j];
- block[j]=0;
- }
-
- for(i=0; i<=last; i++){
- const int j= scantable[i];
- const int perm_j= permutation[j];
- block[perm_j]= temp[j];
- }
-}
-
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
return 0;
}
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
- LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+ LOCAL_ALIGNED_16(int16_t, temp, [64]);
assert(h==8);
static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
- DCTELEM dct[8][8];
+ int16_t dct[8][8];
int i;
int sum=0;
static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
- LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+ LOCAL_ALIGNED_16(int16_t, temp, [64]);
int sum=0, i;
assert(h==8);
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
- LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
- DCTELEM * const bak = temp+64;
+ LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
+ int16_t * const bak = temp+64;
int sum=0, i;
assert(h==8);
s->dsp.diff_pixels(temp, src1, src2, stride);
- memcpy(bak, temp, 64*sizeof(DCTELEM));
+ memcpy(bak, temp, 64*sizeof(int16_t));
s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
s->dct_unquantize_inter(s, temp, 0, s->qscale);
static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
const uint8_t *scantable= s->intra_scantable.permutated;
- LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+ LOCAL_ALIGNED_16(int16_t, temp, [64]);
LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
int i, last, run, bits, level, distortion, start_i;
static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
MpegEncContext * const s= (MpegEncContext *)c;
const uint8_t *scantable= s->intra_scantable.permutated;
- LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+ LOCAL_ALIGNED_16(int16_t, temp, [64]);
int i, last, run, bits, level, start_i;
const int esc_length= s->ac_esc_length;
uint8_t * length;
return score;
}
+#define WRAPPER8_16_SQ(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+ int score=0;\
+ score +=name8(s, dst , src , stride, 8);\
+ score +=name8(s, dst+8 , src+8 , stride, 8);\
+ if(h==16){\
+ dst += 8*stride;\
+ src += 8*stride;\
+ score +=name8(s, dst , src , stride, 8);\
+ score +=name8(s, dst+8 , src+8 , stride, 8);\
+ }\
+ return score;\
+}
+
WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
-static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
- int i;
- src1 += len-1;
- for(i=0; i<len; i++)
- dst[i] = src0[i] * src1[-i];
-}
-
-static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
- int i;
- for(i=0; i<len; i++)
- dst[i] = src0[i] * src1[i] + src2[i];
-}
-
-static void butterflies_float_c(float *restrict v1, float *restrict v2,
- int len)
-{
- int i;
- for (i = 0; i < len; i++) {
- float t = v1[i] - v2[i];
- v1[i] += v2[i];
- v2[i] = t;
- }
-}
-
-static void butterflies_float_interleave_c(float *dst, const float *src0,
- const float *src1, int len)
-{
- int i;
- for (i = 0; i < len; i++) {
- float f1 = src0[i];
- float f2 = src1[i];
- dst[2*i ] = f1 + f2;
- dst[2*i + 1] = f1 - f2;
- }
-}
-
-float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
-{
- float p = 0.0;
- int i;
-
- for (i = 0; i < len; i++)
- p += v1[i] * v2[i];
-
- return p;
-}
-
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
uint32_t maxi, uint32_t maxisign)
{
} while (len > 0);
}
-#define W0 2048
-#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
-#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
-#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
-#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
-#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
-#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
-#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
-
-static void wmv2_idct_row(short * b)
-{
- int s1,s2;
- int a0,a1,a2,a3,a4,a5,a6,a7;
- /*step 1*/
- a1 = W1*b[1]+W7*b[7];
- a7 = W7*b[1]-W1*b[7];
- a5 = W5*b[5]+W3*b[3];
- a3 = W3*b[5]-W5*b[3];
- a2 = W2*b[2]+W6*b[6];
- a6 = W6*b[2]-W2*b[6];
- a0 = W0*b[0]+W0*b[4];
- a4 = W0*b[0]-W0*b[4];
- /*step 2*/
- s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
- s2 = (181*(a1-a5-a7+a3)+128)>>8;
- /*step 3*/
- b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
- b[1] = (a4+a6 +s1 + (1<<7))>>8;
- b[2] = (a4-a6 +s2 + (1<<7))>>8;
- b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
- b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
- b[5] = (a4-a6 -s2 + (1<<7))>>8;
- b[6] = (a4+a6 -s1 + (1<<7))>>8;
- b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
-}
-static void wmv2_idct_col(short * b)
-{
- int s1,s2;
- int a0,a1,a2,a3,a4,a5,a6,a7;
- /*step 1, with extended precision*/
- a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
- a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
- a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
- a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
- a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
- a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
- a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
- a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
- /*step 2*/
- s1 = (181*(a1-a5+a7-a3)+128)>>8;
- s2 = (181*(a1-a5-a7+a3)+128)>>8;
- /*step 3*/
- b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
- b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
- b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
- b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
-
- b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
- b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
- b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
- b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
-}
-void ff_wmv2_idct_c(short * block){
- int i;
-
- for(i=0;i<64;i+=8){
- wmv2_idct_row(block+i);
- }
- for(i=0;i<8;i++){
- wmv2_idct_col(block+i);
- }
-}
-/* XXX: those functions should be suppressed ASAP when all IDCTs are
- converted */
-static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_wmv2_idct_c(block);
- put_pixels_clamped_c(block, dest, line_size);
-}
-static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_wmv2_idct_c(block);
- add_pixels_clamped_c(block, dest, line_size);
-}
-static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+static void ff_jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
{
ff_j_rev_dct (block);
put_pixels_clamped_c(block, dest, line_size);
}
-static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+static void ff_jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
{
ff_j_rev_dct (block);
add_pixels_clamped_c(block, dest, line_size);
c->idct_add= ff_jref_idct_add;
c->idct = ff_j_rev_dct;
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
- }else if(avctx->idct_algo==FF_IDCT_WMV2){
- c->idct_put= ff_wmv2_idct_put_c;
- c->idct_add= ff_wmv2_idct_add_c;
- c->idct = ff_wmv2_idct_c;
- c->idct_permutation_type= FF_NO_IDCT_PERM;
}else if(avctx->idct_algo==FF_IDCT_FAAN){
c->idct_put= ff_faanidct_put;
c->idct_add= ff_faanidct_add;
c->try_8x8basis= try_8x8basis_c;
c->add_8x8basis= add_8x8basis_c;
- c->vector_fmul_reverse = vector_fmul_reverse_c;
- c->vector_fmul_add = vector_fmul_add_c;
c->vector_clipf = vector_clipf_c;
c->scalarproduct_int16 = scalarproduct_int16_c;
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
c->apply_window_int16 = apply_window_int16_c;
c->vector_clip_int32 = vector_clip_int32_c;
- c->scalarproduct_float = ff_scalarproduct_float_c;
- c->butterflies_float = butterflies_float_c;
- c->butterflies_float_interleave = butterflies_float_interleave_c;
c->shrink[0]= av_image_copy_plane;
c->shrink[1]= ff_shrink22;
c->shrink[2]= ff_shrink44;
c->shrink[3]= ff_shrink88;
+ c->add_pixels8 = add_pixels8_c;
+
+#define hpel_funcs(prefix, idx, num) \
+ c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
+ c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
+ c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \
+ c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c
+
+ hpel_funcs(put, [0], 16);
+ hpel_funcs(put, [1], 8);
+ hpel_funcs(put, [2], 4);
+ hpel_funcs(put, [3], 2);
+ hpel_funcs(put_no_rnd, [0], 16);
+ hpel_funcs(put_no_rnd, [1], 8);
+ hpel_funcs(avg, [0], 16);
+ hpel_funcs(avg, [1], 8);
+ hpel_funcs(avg, [2], 4);
+ hpel_funcs(avg, [3], 2);
+ hpel_funcs(avg_no_rnd,, 16);
+
#undef FUNC
#undef FUNCC
#define FUNC(f, depth) f ## _ ## depth
#define FUNCC(f, depth) f ## _ ## depth ## _c
-#define dspfunc1(PFX, IDX, NUM, depth)\
- c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
- c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
- c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
- c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
-
-#define dspfunc2(PFX, IDX, NUM, depth)\
- c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
- c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
- c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
- c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
- c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
- c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
- c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
- c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
- c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
- c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
- c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
- c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
- c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
- c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
- c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
- c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
-
-
#define BIT_DEPTH_FUNCS(depth, dct)\
c->get_pixels = FUNCC(get_pixels ## dct , depth);\
c->draw_edges = FUNCC(draw_edges , depth);\
c->clear_block = FUNCC(clear_block ## dct , depth);\
c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
- c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
- c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
- c->put_no_rnd_pixels_l2 = FUNCC(put_no_rnd_pixels8_l2 , depth);\
-\
- c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
- c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
- c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
- c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
- c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
- c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
-\
- dspfunc1(put , 0, 16, depth);\
- dspfunc1(put , 1, 8, depth);\
- dspfunc1(put , 2, 4, depth);\
- dspfunc1(put , 3, 2, depth);\
- dspfunc1(put_no_rnd, 0, 16, depth);\
- dspfunc1(put_no_rnd, 1, 8, depth);\
- dspfunc1(avg , 0, 16, depth);\
- dspfunc1(avg , 1, 8, depth);\
- dspfunc1(avg , 2, 4, depth);\
- dspfunc1(avg , 3, 2, depth);\
- dspfunc1(avg_no_rnd, 0, 16, depth);\
- dspfunc1(avg_no_rnd, 1, 8, depth);\
-\
- dspfunc2(put_h264_qpel, 0, 16, depth);\
- dspfunc2(put_h264_qpel, 1, 8, depth);\
- dspfunc2(put_h264_qpel, 2, 4, depth);\
- dspfunc2(put_h264_qpel, 3, 2, depth);\
- dspfunc2(avg_h264_qpel, 0, 16, depth);\
- dspfunc2(avg_h264_qpel, 1, 8, depth);\
- dspfunc2(avg_h264_qpel, 2, 4, depth);
switch (avctx->bits_per_raw_sample) {
case 9: