int qscale, int *overflow)
{
int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ...
- const UINT16 *qmat, *bias;
- static __align8 INT16 temp_block[64];
+ const uint16_t *qmat, *bias;
+ __align8 int16_t temp_block[64];
+
+ assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
+
+ //s->fdct (block);
+ RENAMEl(ff_fdct) (block); //cant be anything else ...
- av_fdct (block);
+ if(s->dct_error_sum)
+ s->denoise_dct(s, block);
if (s->mb_intra) {
int dummy;
if (!s->h263_aic) {
#if 1
asm volatile (
- "xorl %%edx, %%edx \n\t"
- "mul %%ecx \n\t"
+ "imul %%ecx \n\t"
: "=d" (level), "=a"(dummy)
- : "a" (block[0] + (q >> 1)), "c" (inverse[q])
+ : "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
);
#else
asm volatile (
"divw %%cx \n\t"
"movzwl %%ax, %%eax \n\t"
: "=a" (level)
- : "a" (block[0] + (q >> 1)), "c" (q)
+ : "a" ((block[0]>>2) + q), "c" (q<<1)
: "%edx"
);
#endif
} else
/* For AIC we skip quant/dequant of INTRADC */
- level = block[0];
+ level = (block[0] + 4)>>3;
block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q;
last_non_zero_p1 = 1;
- bias = s->q_intra_matrix16_bias[qscale];
- qmat = s->q_intra_matrix16[qscale];
+ bias = s->q_intra_matrix16[qscale][1];
+ qmat = s->q_intra_matrix16[qscale][0];
} else {
last_non_zero_p1 = 0;
- bias = s->q_inter_matrix16_bias[qscale];
- qmat = s->q_inter_matrix16[qscale];
+ bias = s->q_inter_matrix16[qscale][1];
+ qmat = s->q_inter_matrix16[qscale][0];
}
- if(s->out_format == FMT_H263){
+ if(s->out_format == FMT_H263 && s->mpeg_quant==0){
asm volatile(
"movd %%eax, %%mm3 \n\t" // last_non_zero_p1
);
}
- if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute
-// last_non_zero_p1=64;
- /* permute for IDCT */
- asm volatile(
- "movl %0, %%eax \n\t"
- "pushl %%ebp \n\t"
- "movl %%esp, " MANGLE(esp_temp) "\n\t"
- "1: \n\t"
- "movzbl (%1, %%eax), %%ebx \n\t"
- "movzbl 1(%1, %%eax), %%ebp \n\t"
- "movw (%2, %%ebx, 2), %%cx \n\t"
- "movw (%2, %%ebp, 2), %%sp \n\t"
- "movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t"
- "movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t"
- "movw %%cx, (%3, %%ebx, 2) \n\t"
- "movw %%sp, (%3, %%ebp, 2) \n\t"
- "addl $2, %%eax \n\t"
- " js 1b \n\t"
- "movl " MANGLE(esp_temp) ", %%esp\n\t"
- "popl %%ebp \n\t"
- :
- : "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block)
- : "%eax", "%ebx", "%ecx"
- );
+ if(s->mb_intra) block[0]= level;
+ else block[0]= temp_block[0];
+
+ if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
+ if(last_non_zero_p1 <= 1) goto end;
+ block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
+ block[0x20] = temp_block[0x10];
+ if(last_non_zero_p1 <= 4) goto end;
+ block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
+ block[0x09] = temp_block[0x03];
+ if(last_non_zero_p1 <= 7) goto end;
+ block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
+ block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
+ if(last_non_zero_p1 <= 11) goto end;
+ block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
+ block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
+ block[0x0C] = temp_block[0x05];
+ if(last_non_zero_p1 <= 16) goto end;
+ block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
+ block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
+ block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
+ block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
+ if(last_non_zero_p1 <= 24) goto end;
+ block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
+ block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
+ block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
+ block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
+ if(last_non_zero_p1 <= 32) goto end;
+ block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
+ block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
+ block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
+ block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
+ if(last_non_zero_p1 <= 40) goto end;
+ block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
+ block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
+ block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
+ block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
+ if(last_non_zero_p1 <= 48) goto end;
+ block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+ block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
+ block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
+ if(last_non_zero_p1 <= 56) goto end;
+ block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
+ block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
+ block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
+ block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
+ }else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
+ if(last_non_zero_p1 <= 1) goto end;
+ block[0x04] = temp_block[0x01];
+ block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+ if(last_non_zero_p1 <= 4) goto end;
+ block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
+ block[0x05] = temp_block[0x03];
+ if(last_non_zero_p1 <= 7) goto end;
+ block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
+ block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+ if(last_non_zero_p1 <= 11) goto end;
+ block[0x1C] = temp_block[0x19];
+ block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
+ block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
+ if(last_non_zero_p1 <= 16) goto end;
+ block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
+ block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
+ block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+ block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
+ if(last_non_zero_p1 <= 24) goto end;
+ block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
+ block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
+ block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
+ block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
+ if(last_non_zero_p1 <= 32) goto end;
+ block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
+ block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+ block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
+ block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
+ if(last_non_zero_p1 <= 40) goto end;
+ block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
+ block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+ block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
+ block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
+ if(last_non_zero_p1 <= 48) goto end;
+ block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
+ block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
+ block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
+ if(last_non_zero_p1 <= 56) goto end;
+ block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
+ block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
+ block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+ block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
+ }else{
+ if(last_non_zero_p1 <= 1) goto end;
+ block[0x01] = temp_block[0x01];
+ block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+ if(last_non_zero_p1 <= 4) goto end;
+ block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
+ block[0x03] = temp_block[0x03];
+ if(last_non_zero_p1 <= 7) goto end;
+ block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
+ block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+ if(last_non_zero_p1 <= 11) goto end;
+ block[0x19] = temp_block[0x19];
+ block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
+ block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
+ if(last_non_zero_p1 <= 16) goto end;
+ block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
+ block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
+ block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+ block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
+ if(last_non_zero_p1 <= 24) goto end;
+ block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
+ block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
+ block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
+ block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
+ if(last_non_zero_p1 <= 32) goto end;
+ block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
+ block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+ block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
+ block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
+ if(last_non_zero_p1 <= 40) goto end;
+ block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
+ block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+ block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
+ block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
+ if(last_non_zero_p1 <= 48) goto end;
+ block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+ block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
+ block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+ block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
+ if(last_non_zero_p1 <= 56) goto end;
+ block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
+ block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
+ block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+ block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
+ }
+ end:
/*
for(i=0; i<last_non_zero_p1; i++)
{
block[block_permute_op(j)]= temp_block[j];
}
*/
-//block_permute(block);
return last_non_zero_p1 - 1;
}