*/
#undef V_DEBUG
+//#define V_DEBUG
+//#define AV_DEBUG(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
#include <math.h>
av_freep(&vc->residues);
av_freep(&vc->modes);
- ff_mdct_end(&vc->mdct0);
- ff_mdct_end(&vc->mdct1);
+ ff_mdct_end(&vc->mdct[0]);
+ ff_mdct_end(&vc->mdct[1]);
for(i=0;i<vc->codebook_count;++i) {
av_free(vc->codebooks[i].codevectors);
av_free(vc->mappings[i].mux);
}
av_freep(&vc->mappings);
+
+ if(vc->exp_bias){
+ av_freep(&vc->win[0]);
+ av_freep(&vc->win[1]);
+ }
}
// Parse setup header -------------------------------------------------
}
for(k=0;k<(1<<floor_setup->data.t1.class_subclasses[j]);++k) {
- floor_setup->data.t1.subclass_books[j][k]=get_bits(gb, 8)-1;
+ floor_setup->data.t1.subclass_books[j][k]=(int16_t)get_bits(gb, 8)-1;
AV_DEBUG(" book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]);
}
for (blockflag=0;blockflag<2;++blockflag)
{
- n=(blockflag ? vc->blocksize_1 : vc->blocksize_0) / 2;
+ n=vc->blocksize[blockflag]/2;
floors[floor_number].data.t0.map[blockflag]=
av_malloc((n+1) * sizeof(int_fast32_t)); // n+sentinel
vc->bitrate_minimum=get_bits_long_le(gb, 32);
bl0=get_bits(gb, 4);
bl1=get_bits(gb, 4);
- vc->blocksize_0=(1<<bl0);
- vc->blocksize_1=(1<<bl1);
+ vc->blocksize[0]=(1<<bl0);
+ vc->blocksize[1]=(1<<bl1);
if (bl0>13 || bl0<6 || bl1>13 || bl1<6 || bl1<bl0) {
av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis id header packet corrupt (illegal blocksize). \n");
return 3;
}
// output format int16
- if (vc->blocksize_1/2 * vc->audio_channels * 2 >
+ if (vc->blocksize[1]/2 * vc->audio_channels * 2 >
AVCODEC_MAX_AUDIO_FRAME_SIZE) {
av_log(vc->avccontext, AV_LOG_ERROR, "Vorbis channel count makes "
"output packets too large.\n");
return 4;
}
- vc->swin=vwin[bl0-6];
- vc->lwin=vwin[bl1-6];
+ vc->win[0]=vwin[bl0-6];
+ vc->win[1]=vwin[bl1-6];
+
+ if(vc->exp_bias){
+ int i, j;
+ for(j=0; j<2; j++){
+ float *win = av_malloc(vc->blocksize[j]/2 * sizeof(float));
+ for(i=0; i<vc->blocksize[j]/2; i++)
+ win[i] = vc->win[j][i] * (1<<15);
+ vc->win[j] = win;
+ }
+ }
if ((get_bits1(gb)) == 0) {
av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis id header packet corrupt (framing flag not set). \n");
return 2;
}
- vc->channel_residues=(float *)av_malloc((vc->blocksize_1/2)*vc->audio_channels * sizeof(float));
- vc->channel_floors=(float *)av_malloc((vc->blocksize_1/2)*vc->audio_channels * sizeof(float));
- vc->saved=(float *)av_malloc((vc->blocksize_1/2)*vc->audio_channels * sizeof(float));
- vc->ret=(float *)av_malloc((vc->blocksize_1/2)*vc->audio_channels * sizeof(float));
- vc->buf=(float *)av_malloc(vc->blocksize_1 * sizeof(float));
- vc->buf_tmp=(float *)av_malloc(vc->blocksize_1 * sizeof(float));
+ vc->channel_residues=(float *)av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
+ vc->channel_floors=(float *)av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
+ vc->saved=(float *)av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
+ vc->ret=(float *)av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
+ vc->buf=(float *)av_malloc(vc->blocksize[1] * sizeof(float));
+ vc->buf_tmp=(float *)av_malloc(vc->blocksize[1] * sizeof(float));
vc->saved_start=0;
- ff_mdct_init(&vc->mdct0, bl0, 1);
- ff_mdct_init(&vc->mdct1, bl1, 1);
+ ff_mdct_init(&vc->mdct[0], bl0, 1);
+ ff_mdct_init(&vc->mdct[1], bl1, 1);
AV_DEBUG(" vorbis version %d \n audio_channels %d \n audio_samplerate %d \n bitrate_max %d \n bitrate_nom %d \n bitrate_min %d \n blk_0 %d blk_1 %d \n ",
- vc->version, vc->audio_channels, vc->audio_samplerate, vc->bitrate_maximum, vc->bitrate_nominal, vc->bitrate_minimum, vc->blocksize_0, vc->blocksize_1);
+ vc->version, vc->audio_channels, vc->audio_samplerate, vc->bitrate_maximum, vc->bitrate_nominal, vc->bitrate_minimum, vc->blocksize[0], vc->blocksize[1]);
/*
- BLK=vc->blocksize_0;
+ BLK=vc->blocksize[0];
for(i=0;i<BLK/2;++i) {
- vc->swin[i]=sin(0.5*3.14159265358*(sin(((float)i+0.5)/(float)BLK*3.14159265358))*(sin(((float)i+0.5)/(float)BLK*3.14159265358)));
+ vc->win[0][i]=sin(0.5*3.14159265358*(sin(((float)i+0.5)/(float)BLK*3.14159265358))*(sin(((float)i+0.5)/(float)BLK*3.14159265358)));
}
*/
int i, j, hdr_type;
vc->avccontext = avccontext;
+ dsputil_init(&vc->dsp, avccontext);
+
+ if(vc->dsp.float_to_int16 == ff_float_to_int16_c) {
+ vc->add_bias = 385;
+ vc->exp_bias = 0;
+ } else {
+ vc->add_bias = 0;
+ vc->exp_bias = 15<<23;
+ }
if (!headers_len) {
av_log(avccontext, AV_LOG_ERROR, "Extradata corrupt.\n");
adx=vf->x_list[high_neigh_offs]-vf->x_list[low_neigh_offs];
ady= ABS(dy);
err=ady*(vf->x_list[i]-vf->x_list[low_neigh_offs]);
- off=err/adx;
+ off=(int16_t)err/(int16_t)adx;
if (dy<0) {
predicted=floor1_Y_final[low_neigh_offs]-off;
} else {
dy=hy-ly;
adx=hx-lx;
ady= (dy<0) ? -dy:dy;//ABS(dy);
- base=dy/adx;
+ base=(int16_t)dy/(int16_t)adx;
AV_DEBUG(" dy %d adx %d base %d = %d \n", dy, adx, base, dy/adx);
voffset=vr->begin;
for(partition_count=0;partition_count<ptns_to_read;) { // SPEC error
if (!pass) {
+ uint_fast32_t inverse_class = inverse[vr->classifications];
for(j_times_ptns_to_read=0, j=0;j<ch_used;++j) {
if (!do_not_decode[j]) {
uint_fast32_t temp=get_vlc2(gb, vc->codebooks[vr->classbook].vlc.table,
for(i=0;i<c_p_c;++i) {
uint_fast32_t temp2;
- temp2=(((uint_fast64_t)temp) * inverse[vr->classifications])>>32;
+ temp2=(((uint_fast64_t)temp) * inverse_class)>>32;
if (partition_count+c_p_c-1-i < ptns_to_read) {
classifs[j_times_ptns_to_read+partition_count+c_p_c-1-i]=temp-temp2*vr->classifications;
}
if (vqbook>=0) {
uint_fast16_t coffs;
- uint_fast16_t step=vr->partition_size/vc->codebooks[vqbook].dimensions;
+ unsigned dim= vc->codebooks[vqbook].dimensions; // not uint_fast8_t: 64bit is slower here on amd64
+ uint_fast16_t step= dim==1 ? vr->partition_size
+ : FASTDIV(vr->partition_size, dim);
vorbis_codebook codebook= vc->codebooks[vqbook];
if (vr->type==0) {
voffs=voffset+j*vlen;
for(k=0;k<step;++k) {
- coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions;
- for(l=0;l<codebook.dimensions;++l) {
+ coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
+ for(l=0;l<dim;++l) {
vec[voffs+k+l*step]+=codebook.codevectors[coffs+l]; // FPMATH
}
}
else if (vr->type==1) {
voffs=voffset+j*vlen;
for(k=0;k<step;++k) {
- coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions;
- for(l=0;l<codebook.dimensions;++l, ++voffs) {
+ coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
+ for(l=0;l<dim;++l, ++voffs) {
vec[voffs]+=codebook.codevectors[coffs+l]; // FPMATH
AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d \n", pass, voffs, vec[voffs], codebook.codevectors[coffs+l], coffs);
}
}
}
- else if (vr->type==2 && ch==2 && (voffset&1)==0 && (codebook.dimensions&1)==0) { // most frequent case optimized
+ else if (vr->type==2 && ch==2 && (voffset&1)==0 && (dim&1)==0) { // most frequent case optimized
voffs=voffset>>1;
+ if(dim==2) {
+ for(k=0;k<step;++k) {
+ coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * 2;
+ vec[voffs+k ]+=codebook.codevectors[coffs ]; // FPMATH
+ vec[voffs+k+vlen]+=codebook.codevectors[coffs+1]; // FPMATH
+ }
+ } else
for(k=0;k<step;++k) {
- coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions;
- for(l=0;l<codebook.dimensions;l+=2, voffs++) {
+ coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
+ for(l=0;l<dim;l+=2, voffs++) {
vec[voffs ]+=codebook.codevectors[coffs+l ]; // FPMATH
vec[voffs+vlen]+=codebook.codevectors[coffs+l+1]; // FPMATH
voffs=voffset;
for(k=0;k<step;++k) {
- coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * codebook.dimensions;
- for(l=0;l<codebook.dimensions;++l, ++voffs) {
+ coffs=get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
+ for(l=0;l<dim;++l, ++voffs) {
vec[voffs/ch+(voffs%ch)*vlen]+=codebook.codevectors[coffs+l]; // FPMATH FIXME use if and counter instead of / and %
AV_DEBUG(" pass %d offs: %d curr: %f change: %f cv offs.: %d+%d \n", pass, voffset/ch+(voffs%ch)*vlen, vec[voffset/ch+(voffs%ch)*vlen], codebook.codevectors[coffs+l], coffs, l);
return 0;
}
+void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
+{
+ int i;
+ for(i=0; i<blocksize; i++)
+ {
+ if (mag[i]>0.0) {
+ if (ang[i]>0.0) {
+ ang[i]=mag[i]-ang[i];
+ } else {
+ float temp=ang[i];
+ ang[i]=mag[i];
+ mag[i]+=temp;
+ }
+ } else {
+ if (ang[i]>0.0) {
+ ang[i]+=mag[i];
+ } else {
+ float temp=ang[i];
+ ang[i]=mag[i];
+ mag[i]-=temp;
+ }
+ }
+ }
+}
+
// Decode the audio packet using the functions above
-#define BIAS 385
static int vorbis_parse_audio_packet(vorbis_context *vc) {
GetBitContext *gb=&vc->gb;
uint_fast8_t res_num=0;
int_fast16_t retlen=0;
uint_fast16_t saved_start=0;
+ float fadd_bias = vc->add_bias;
if (get_bits1(gb)) {
av_log(vc->avccontext, AV_LOG_ERROR, "Not a Vorbis I audio packet.\n");
next_window=get_bits1(gb);
}
- blocksize=vc->modes[mode_number].blockflag ? vc->blocksize_1 : vc->blocksize_0;
+ blocksize=vc->blocksize[vc->modes[mode_number].blockflag];
memset(ch_res_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
memset(ch_floor_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
mag=vc->channel_residues+res_chan[mapping->magnitude[i]]*blocksize/2;
ang=vc->channel_residues+res_chan[mapping->angle[i]]*blocksize/2;
- for(j=0;j<blocksize/2;++j) {
- float temp;
- if (mag[j]>0.0) {
- if (ang[j]>0.0) {
- ang[j]=mag[j]-ang[j];
- } else {
- temp=ang[j];
- ang[j]=mag[j];
- mag[j]+=temp;
- }
- } else {
- if (ang[j]>0.0) {
- ang[j]+=mag[j];
- } else {
- temp=ang[j];
- ang[j]=mag[j];
- mag[j]-=temp;
- }
- }
- }
+ vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
}
// Dotproduct
for(j=0, ch_floor_ptr=vc->channel_floors;j<vc->audio_channels;++j,ch_floor_ptr+=blocksize/2) {
ch_res_ptr=vc->channel_residues+res_chan[j]*blocksize/2;
-
- for(i=0;i<blocksize/2;++i) {
- ch_floor_ptr[i]*=ch_res_ptr[i]; //FPMATH
- }
+ vc->dsp.vector_fmul(ch_floor_ptr, ch_res_ptr, blocksize/2);
}
// MDCT, overlap/add, save data for next overlapping FPMATH
for(j=0;j<vc->audio_channels;++j) {
uint_fast8_t step=vc->audio_channels;
uint_fast16_t k;
- float *saved=vc->saved+j*vc->blocksize_1/2;
+ float *saved=vc->saved+j*vc->blocksize[1]/2;
float *ret=vc->ret;
- const float *lwin=vc->lwin;
- const float *swin=vc->swin;
+ const float *lwin=vc->win[1];
+ const float *swin=vc->win[0];
float *buf=vc->buf;
float *buf_tmp=vc->buf_tmp;
saved_start=vc->saved_start;
- ff_imdct_calc(vc->modes[mode_number].blockflag ? &vc->mdct1 : &vc->mdct0, buf, ch_floor_ptr, buf_tmp);
+ vc->mdct[0].fft.imdct_calc(&vc->mdct[vc->modes[mode_number].blockflag], buf, ch_floor_ptr, buf_tmp);
+ //FIXME process channels together, to allow faster simd vector_fmul_add_add?
if (vc->modes[mode_number].blockflag) {
// -- overlap/add
if (previous_window) {
- for(k=j, i=0;i<vc->blocksize_1/2;++i, k+=step) {
- ret[k]=saved[i]+buf[i]*lwin[i]+BIAS;
- }
- retlen=vc->blocksize_1/2;
+ vc->dsp.vector_fmul_add_add(ret+j, buf, lwin, saved, vc->add_bias, vc->blocksize[1]/2, step);
+ retlen=vc->blocksize[1]/2;
} else {
- buf += (vc->blocksize_1-vc->blocksize_0)/4;
- for(k=j, i=0;i<vc->blocksize_0/2;++i, k+=step) {
- ret[k]=saved[i]+buf[i]*swin[i]+BIAS;
- }
- buf += vc->blocksize_0/2;
- for(i=0;i<(vc->blocksize_1-vc->blocksize_0)/4;++i, k+=step) {
- ret[k]=buf[i]+BIAS;
+ int len = (vc->blocksize[1]-vc->blocksize[0])/4;
+ buf += len;
+ vc->dsp.vector_fmul_add_add(ret+j, buf, swin, saved, vc->add_bias, vc->blocksize[0]/2, step);
+ k = vc->blocksize[0]/2*step + j;
+ buf += vc->blocksize[0]/2;
+ if(vc->exp_bias){
+ for(i=0; i<len; i++, k+=step)
+ ((uint32_t*)ret)[k] = ((uint32_t*)buf)[i] + vc->exp_bias; // ret[k]=buf[i]*(1<<bias)
+ } else {
+ for(i=0; i<len; i++, k+=step)
+ ret[k] = buf[i] + fadd_bias;
}
buf=vc->buf;
- retlen=vc->blocksize_0/2+(vc->blocksize_1-vc->blocksize_0)/4;
+ retlen=vc->blocksize[0]/2+len;
}
// -- save
if (next_window) {
- buf += vc->blocksize_1/2;
- lwin += vc->blocksize_1/2-1;
- for(i=0;i<vc->blocksize_1/2;++i) {
- saved[i]=buf[i]*lwin[-i];
- }
+ buf += vc->blocksize[1]/2;
+ vc->dsp.vector_fmul_reverse(saved, buf, lwin, vc->blocksize[1]/2);
saved_start=0;
} else {
- saved_start=(vc->blocksize_1-vc->blocksize_0)/4;
- buf += vc->blocksize_1/2;
- for(i=0;i<saved_start;++i) {
- saved[i]=buf[i];
- }
- swin += vc->blocksize_0/2-1;
- for(i=0;i<vc->blocksize_0/2;++i) {
- saved[saved_start+i]=buf[saved_start+i]*swin[-i];
- }
+ saved_start=(vc->blocksize[1]-vc->blocksize[0])/4;
+ buf += vc->blocksize[1]/2;
+ for(i=0; i<saved_start; i++)
+ ((uint32_t*)saved)[i] = ((uint32_t*)buf)[i] + vc->exp_bias;
+ vc->dsp.vector_fmul_reverse(saved+saved_start, buf+saved_start, swin, vc->blocksize[0]/2);
}
} else {
// --overlap/add
- for(k=j, i=0;i<saved_start;++i, k+=step) {
- ret[k]=saved[i]+BIAS;
- }
- for(i=0;i<vc->blocksize_0/2;++i, k+=step) {
- ret[k]=saved[saved_start+i]+buf[i]*swin[i]+BIAS;
+ if(vc->add_bias) {
+ for(k=j, i=0;i<saved_start;++i, k+=step)
+ ret[k] = saved[i] + fadd_bias;
+ } else {
+ for(k=j, i=0;i<saved_start;++i, k+=step)
+ ret[k] = saved[i];
}
- retlen=saved_start+vc->blocksize_0/2;
+ vc->dsp.vector_fmul_add_add(ret+k, buf, swin, saved+saved_start, vc->add_bias, vc->blocksize[0]/2, step);
+ retlen=saved_start+vc->blocksize[0]/2;
// -- save
- buf += vc->blocksize_0/2;
- swin += vc->blocksize_0/2-1;
- for(i=0;i<vc->blocksize_0/2;++i) {
- saved[i]=buf[i]*swin[-i];
- }
+ buf += vc->blocksize[0]/2;
+ vc->dsp.vector_fmul_reverse(saved, buf, swin, vc->blocksize[0]/2);
saved_start=0;
}
}
AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len);
- for(i=0;i<len;++i) {
- int_fast32_t tmp= ((int32_t*)vc->ret)[i];
- if(tmp & 0xf0000){
-// tmp= (0x43c0ffff - tmp)>>31; //ask gcc devs why this is slower
- if(tmp > 0x43c0ffff) tmp= 0xFFFF;
- else tmp= 0;
- }
- ((int16_t*)data)[i]=tmp - 0x8000;
- }
-
+ vc->dsp.float_to_int16(data, vc->ret, len);
*data_size=len*2;
return buf_size ;