block[0] += 1<<(shift-1);
- for(i=0; i<4; i++){
- const int z0= block[0 + block_stride*i] + block[2 + block_stride*i];
- const int z1= block[0 + block_stride*i] - block[2 + block_stride*i];
- const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i];
- const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1);
-
- block[0 + block_stride*i]= z0 + z3;
- block[1 + block_stride*i]= z1 + z2;
- block[2 + block_stride*i]= z1 - z2;
- block[3 + block_stride*i]= z0 - z3;
- }
-
for(i=0; i<4; i++){
const int z0= block[i + block_stride*0] + block[i + block_stride*2];
const int z1= block[i + block_stride*0] - block[i + block_stride*2];
const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3];
const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1);
+ block[i + block_stride*0]= z0 + z3;
+ block[i + block_stride*1]= z1 + z2;
+ block[i + block_stride*2]= z1 - z2;
+ block[i + block_stride*3]= z0 - z3;
+ }
+
+ for(i=0; i<4; i++){
+ const int z0= block[0 + block_stride*i] + block[2 + block_stride*i];
+ const int z1= block[0 + block_stride*i] - block[2 + block_stride*i];
+ const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i];
+ const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1);
+
dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
for( i = 0; i < 8; i++ )
{
- const int a0 = block[0+i*8] + block[4+i*8];
- const int a2 = block[0+i*8] - block[4+i*8];
- const int a4 = (block[2+i*8]>>1) - block[6+i*8];
- const int a6 = (block[6+i*8]>>1) + block[2+i*8];
+ const int a0 = block[i+0*8] + block[i+4*8];
+ const int a2 = block[i+0*8] - block[i+4*8];
+ const int a4 = (block[i+2*8]>>1) - block[i+6*8];
+ const int a6 = (block[i+6*8]>>1) + block[i+2*8];
const int b0 = a0 + a6;
const int b2 = a2 + a4;
const int b4 = a2 - a4;
const int b6 = a0 - a6;
- const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
- const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
- const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
- const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
+ const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
+ const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
+ const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
+ const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
const int b1 = (a7>>2) + a1;
const int b3 = a3 + (a5>>2);
const int b5 = (a3>>2) - a5;
const int b7 = a7 - (a1>>2);
- block[0+i*8] = b0 + b7;
- block[7+i*8] = b0 - b7;
- block[1+i*8] = b2 + b5;
- block[6+i*8] = b2 - b5;
- block[2+i*8] = b4 + b3;
- block[5+i*8] = b4 - b3;
- block[3+i*8] = b6 + b1;
- block[4+i*8] = b6 - b1;
+ block[i+0*8] = b0 + b7;
+ block[i+7*8] = b0 - b7;
+ block[i+1*8] = b2 + b5;
+ block[i+6*8] = b2 - b5;
+ block[i+2*8] = b4 + b3;
+ block[i+5*8] = b4 - b3;
+ block[i+3*8] = b6 + b1;
+ block[i+4*8] = b6 - b1;
}
for( i = 0; i < 8; i++ )
{
- const int a0 = block[i+0*8] + block[i+4*8];
- const int a2 = block[i+0*8] - block[i+4*8];
- const int a4 = (block[i+2*8]>>1) - block[i+6*8];
- const int a6 = (block[i+6*8]>>1) + block[i+2*8];
+ const int a0 = block[0+i*8] + block[4+i*8];
+ const int a2 = block[0+i*8] - block[4+i*8];
+ const int a4 = (block[2+i*8]>>1) - block[6+i*8];
+ const int a6 = (block[6+i*8]>>1) + block[2+i*8];
const int b0 = a0 + a6;
const int b2 = a2 + a4;
const int b4 = a2 - a4;
const int b6 = a0 - a6;
- const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
- const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
- const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
- const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
+ const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
+ const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
+ const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
+ const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
const int b1 = (a7>>2) + a1;
const int b3 = a3 + (a5>>2);
// assumes all AC coefs are 0
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
int i, j;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int dc = (block[0] + 32) >> 6;
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
for( j = 0; j < 4; j++ )
{
for( i = 0; i < 4; i++ )
- dst[i] = cm[ dst[i] + dc ];
+ dst[i] = cm[ dst[i] ];
dst += stride;
}
}
void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
int i, j;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int dc = (block[0] + 32) >> 6;
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
for( j = 0; j < 8; j++ )
{
for( i = 0; i < 8; i++ )
- dst[i] = cm[ dst[i] + dc ];
+ dst[i] = cm[ dst[i] ];
dst += stride;
}
}
ff_h264_idct_dc_add_c(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
}
}
+/**
+ * IDCT transforms the 16 dc values and dequantizes them.
+ * @param qp quantization parameter
+ */
+void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul){
+#define stride 16
+ int i;
+ int temp[16];
+ static const uint8_t x_offset[4]={0, 2*stride, 8*stride, 10*stride};
+
+ for(i=0; i<4; i++){
+ const int z0= input[4*i+0] + input[4*i+1];
+ const int z1= input[4*i+0] - input[4*i+1];
+ const int z2= input[4*i+2] - input[4*i+3];
+ const int z3= input[4*i+2] + input[4*i+3];
+
+ temp[4*i+0]= z0+z3;
+ temp[4*i+1]= z0-z3;
+ temp[4*i+2]= z1-z2;
+ temp[4*i+3]= z1+z2;
+ }
+
+ for(i=0; i<4; i++){
+ const int offset= x_offset[i];
+ const int z0= temp[4*0+i] + temp[4*2+i];
+ const int z1= temp[4*0+i] - temp[4*2+i];
+ const int z2= temp[4*1+i] - temp[4*3+i];
+ const int z3= temp[4*1+i] + temp[4*3+i];
+
+ output[stride* 0+offset]= ((((z0 + z3)*qmul + 128 ) >> 8));
+ output[stride* 1+offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
+ output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
+ output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
+ }
+}