]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/aacsbr.c
Set cur_channel in the AAC encoder context where needed.
[ffmpeg] / libavcodec / aacsbr.c
index 7afb47dae891101ad412a42704732a317d32e2de..0de81a5025da392900fbb81b4a6a1fa568888b74 100644 (file)
@@ -21,7 +21,7 @@
  */
 
 /**
- * @file libavcodec/aacsbr.c
+ * @file
  * AAC Spectral Band Replication decoding functions
  * @author Robert Swain ( rob opendot cl )
  */
@@ -174,7 +174,7 @@ static void sbr_make_f_tablelim(SpectralBandReplication *sbr)
                                                1.18509277094158210129f,   //2^(0.49/2)
                                                1.11987160404675912501f }; //2^(0.49/3)
         const float lim_bands_per_octave_warped = bands_warped[sbr->bs_limiter_bands - 1];
-        int16_t patch_borders[5];
+        int16_t patch_borders[7];
         uint16_t *in = sbr->f_tablelim + 1, *out = sbr->f_tablelim;
 
         patch_borders[0] = sbr->kx[1];
@@ -519,6 +519,15 @@ static int sbr_hf_calc_npatches(AACContext *ac, SpectralBandReplication *sbr)
             odd = (sb + sbr->k[0]) & 1;
         }
 
+        // Requirements (14496-3 sp04 p205) sets the maximum number of patches to 5.
+        // After this check the final number of patches can still be six which is
+        // illegal however the Coding Technologies decoder check stream has a final
+        // count of 6 patches
+        if (sbr->num_patches > 5) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "Too many patches: %d\n", sbr->num_patches);
+            return -1;
+        }
+
         sbr->patch_num_subbands[sbr->num_patches]  = FFMAX(sb - usb, 0);
         sbr->patch_start_subband[sbr->num_patches] = sbr->k[0] - odd - sbr->patch_num_subbands[sbr->num_patches];
 
@@ -536,13 +545,6 @@ static int sbr_hf_calc_npatches(AACContext *ac, SpectralBandReplication *sbr)
     if (sbr->patch_num_subbands[sbr->num_patches-1] < 3 && sbr->num_patches > 1)
         sbr->num_patches--;
 
-    // Requirements (14496-3 sp04 p205) sets the maximum number of patches to 5
-    // However the Coding Technologies decoder check uses 6 patches
-    if (sbr->num_patches > 6) {
-        av_log(ac->avccontext, AV_LOG_ERROR, "Too many patches: %d\n", sbr->num_patches);
-        return -1;
-    }
-
     return 0;
 }
 
@@ -618,7 +620,7 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
                          GetBitContext *gb, SBRData *ch_data)
 {
     int i;
-    unsigned bs_pointer;
+    unsigned bs_pointer = 0;
     // frameLengthFlag ? 15 : 16; 960 sample length frames unsupported; this value is numTimeSlots
     int abs_bord_trail = 16;
     int num_rel_lead, num_rel_trail;
@@ -630,8 +632,8 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
 
     switch (ch_data->bs_frame_class = get_bits(gb, 2)) {
     case FIXFIX:
-        ch_data->bs_num_env    = 1 << get_bits(gb, 2);
-        num_rel_lead           = ch_data->bs_num_env - 1;
+        ch_data->bs_num_env                 = 1 << get_bits(gb, 2);
+        num_rel_lead                        = ch_data->bs_num_env - 1;
         if (ch_data->bs_num_env == 1)
             ch_data->bs_amp_res = 0;
 
@@ -650,22 +652,20 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
         for (i = 0; i < num_rel_lead; i++)
             ch_data->t_env[i + 1] = ch_data->t_env[i] + abs_bord_trail;
 
-        bs_pointer = 0;
-
         ch_data->bs_freq_res[1] = get_bits1(gb);
         for (i = 1; i < ch_data->bs_num_env; i++)
             ch_data->bs_freq_res[i + 1] = ch_data->bs_freq_res[1];
         break;
     case FIXVAR:
-        abs_bord_trail         += get_bits(gb, 2);
-        num_rel_trail           = get_bits(gb, 2);
-        num_rel_lead            = 0;
-        ch_data->bs_num_env     = num_rel_trail + 1;
-        ch_data->t_env[0]       = 0;
+        abs_bord_trail                     += get_bits(gb, 2);
+        num_rel_trail                       = get_bits(gb, 2);
+        ch_data->bs_num_env                 = num_rel_trail + 1;
+        ch_data->t_env[0]                   = 0;
         ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
 
         for (i = 0; i < num_rel_trail; i++)
-            ch_data->t_env[ch_data->bs_num_env - 1 - i] = ch_data->t_env[ch_data->bs_num_env - i] - 2 * get_bits(gb, 2) - 2;
+            ch_data->t_env[ch_data->bs_num_env - 1 - i] =
+                ch_data->t_env[ch_data->bs_num_env - i] - 2 * get_bits(gb, 2) - 2;
 
         bs_pointer = get_bits(gb, ceil_log2[ch_data->bs_num_env]);
 
@@ -673,9 +673,9 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
             ch_data->bs_freq_res[ch_data->bs_num_env - i] = get_bits1(gb);
         break;
     case VARFIX:
-        ch_data->t_env[0]       = get_bits(gb, 2);
-        num_rel_lead            = get_bits(gb, 2);
-        ch_data->bs_num_env     = num_rel_lead + 1;
+        ch_data->t_env[0]                   = get_bits(gb, 2);
+        num_rel_lead                        = get_bits(gb, 2);
+        ch_data->bs_num_env                 = num_rel_lead + 1;
         ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
 
         for (i = 0; i < num_rel_lead; i++)
@@ -686,12 +686,11 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
         get_bits1_vector(gb, ch_data->bs_freq_res + 1, ch_data->bs_num_env);
         break;
     case VARVAR:
-        ch_data->t_env[0]       = get_bits(gb, 2);
-        abs_bord_trail         += get_bits(gb, 2);
-        num_rel_lead            = get_bits(gb, 2);
-        num_rel_trail           = get_bits(gb, 2);
-        ch_data->bs_num_env     = num_rel_lead + num_rel_trail + 1;
-        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
+        ch_data->t_env[0]                   = get_bits(gb, 2);
+        abs_bord_trail                     += get_bits(gb, 2);
+        num_rel_lead                        = get_bits(gb, 2);
+        num_rel_trail                       = get_bits(gb, 2);
+        ch_data->bs_num_env                 = num_rel_lead + num_rel_trail + 1;
 
         if (ch_data->bs_num_env > 5) {
             av_log(ac->avccontext, AV_LOG_ERROR,
@@ -700,10 +699,13 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
             return -1;
         }
 
+        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
+
         for (i = 0; i < num_rel_lead; i++)
             ch_data->t_env[i + 1] = ch_data->t_env[i] + 2 * get_bits(gb, 2) + 2;
         for (i = 0; i < num_rel_trail; i++)
-            ch_data->t_env[ch_data->bs_num_env - 1 - i] = ch_data->t_env[ch_data->bs_num_env - i] - 2 * get_bits(gb, 2) - 2;
+            ch_data->t_env[ch_data->bs_num_env - 1 - i] =
+                ch_data->t_env[ch_data->bs_num_env - i] - 2 * get_bits(gb, 2) - 2;
 
         bs_pointer = get_bits(gb, ceil_log2[ch_data->bs_num_env]);
 
@@ -718,9 +720,16 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
         return -1;
     }
 
+    for (i = 1; i <= ch_data->bs_num_env; i++) {
+        if (ch_data->t_env[i-1] > ch_data->t_env[i]) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "Non monotone time borders\n");
+            return -1;
+        }
+    }
+
     ch_data->bs_num_noise = (ch_data->bs_num_env > 1) + 1;
 
-    ch_data->t_q[0] = ch_data->t_env[0];
+    ch_data->t_q[0]                     = ch_data->t_env[0];
     ch_data->t_q[ch_data->bs_num_noise] = ch_data->t_env[ch_data->bs_num_env];
     if (ch_data->bs_num_noise > 1) {
         unsigned int idx;
@@ -751,19 +760,19 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
 
 static void copy_sbr_grid(SBRData *dst, const SBRData *src) {
     //These variables are saved from the previous frame rather than copied
-    dst->bs_freq_res[0] = dst->bs_freq_res[dst->bs_num_env];
+    dst->bs_freq_res[0]    = dst->bs_freq_res[dst->bs_num_env];
     dst->t_env_num_env_old = dst->t_env[dst->bs_num_env];
-    dst->e_a[0]         = -(dst->e_a[1] != dst->bs_num_env);
+    dst->e_a[0]            = -(dst->e_a[1] != dst->bs_num_env);
 
     //These variables are read from the bitstream and therefore copied
     memcpy(dst->bs_freq_res+1, src->bs_freq_res+1, sizeof(dst->bs_freq_res)-sizeof(*dst->bs_freq_res));
     memcpy(dst->t_env,         src->t_env,         sizeof(dst->t_env));
     memcpy(dst->t_q,           src->t_q,           sizeof(dst->t_q));
-    dst->bs_num_env     = src->bs_num_env;
-    dst->bs_amp_res     = src->bs_amp_res;
-    dst->bs_num_noise   = src->bs_num_noise;
-    dst->bs_frame_class = src->bs_frame_class;
-    dst->e_a[1]         = src->e_a[1];
+    dst->bs_num_env        = src->bs_num_env;
+    dst->bs_amp_res        = src->bs_amp_res;
+    dst->bs_num_noise      = src->bs_num_noise;
+    dst->bs_frame_class    = src->bs_frame_class;
+    dst->e_a[1]            = src->e_a[1];
 }
 
 /// Read how the envelope and noise floor data is delta coded
@@ -892,18 +901,24 @@ static void read_sbr_noise(SpectralBandReplication *sbr, GetBitContext *gb,
 
 static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
                                GetBitContext *gb,
-                          int bs_extension_id, int *num_bits_left)
+                               int bs_extension_id, int *num_bits_left)
 {
 //TODO - implement ps_data for parametric stereo parsing
     switch (bs_extension_id) {
     case EXTENSION_ID_PS:
+        if (!ac->m4ac.ps) {
+            av_log(ac->avccontext, AV_LOG_ERROR, "Parametric Stereo signaled to be not-present but was found in the bitstream.\n");
+            skip_bits_long(gb, *num_bits_left); // bs_fill_bits
+            *num_bits_left = 0;
+        } else {
 #if 0
-        *num_bits_left -= ff_ps_data(gb, ps);
+            *num_bits_left -= ff_ps_data(gb, ps);
 #else
-        av_log_missing_feature(ac->avccontext, "Parametric Stereo is", 0);
-        skip_bits_long(gb, *num_bits_left); // bs_fill_bits
-        *num_bits_left = 0;
+            av_log_missing_feature(ac->avccontext, "Parametric Stereo is", 0);
+            skip_bits_long(gb, *num_bits_left); // bs_fill_bits
+            *num_bits_left = 0;
 #endif
+        }
         break;
     default:
         av_log_missing_feature(ac->avccontext, "Reserved SBR extensions are", 1);
@@ -1126,14 +1141,13 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
  */
 static void sbr_qmf_analysis(DSPContext *dsp, RDFTContext *rdft, const float *in, float *x,
                              float z[320], float W[2][32][32][2],
-                             float bias, float scale)
+                             float scale)
 {
     int i, k;
     memcpy(W[0], W[1], sizeof(W[0]));
     memcpy(x    , x+1024, (320-32)*sizeof(x[0]));
-    if (scale != 1.0f || bias != 0.0f)
-        for (i = 0; i < 1024; i++)
-            x[288 + i] = (in[i] - bias) * scale;
+    if (scale != 1.0f)
+        dsp->vector_fmul_scalar(x+288, in, scale, 1024);
     else
         memcpy(x+288, in, 1024*sizeof(*x));
     for (i = 0; i < 32; i++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
@@ -1704,43 +1718,49 @@ static void sbr_hf_assemble(float Y[2][38][64][2], const float X_high[64][40][2]
     ch_data->f_indexsine  = indexsine;
 }
 
-void ff_sbr_dequant(AACContext *ac, SpectralBandReplication *sbr, int id_aac)
-{
-    if (sbr->start) {
-        sbr_dequant(sbr, id_aac);
-    }
-}
-
-void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int ch,
-                  const float* in, float* out)
+void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
+                  float* L, float* R)
 {
     int downsampled = ac->m4ac.ext_sample_rate < sbr->sample_rate;
+    int ch;
+    int nch = (id_aac == TYPE_CPE) ? 2 : 1;
 
-    /* decode channel */
-    sbr_qmf_analysis(&ac->dsp, &sbr->rdft, in, sbr->data[ch].analysis_filterbank_samples,
-                     (float*)sbr->qmf_filter_scratch,
-                     sbr->data[ch].W, ac->add_bias, 1/(-1024 * ac->sf_scale));
-    sbr_lf_gen(ac, sbr, sbr->X_low, sbr->data[ch].W);
     if (sbr->start) {
-        sbr_hf_inverse_filter(sbr->alpha0, sbr->alpha1, sbr->X_low, sbr->k[0]);
-        sbr_chirp(sbr, &sbr->data[ch]);
-        sbr_hf_gen(ac, sbr, sbr->X_high, sbr->X_low, sbr->alpha0, sbr->alpha1,
-                   sbr->data[ch].bw_array, sbr->data[ch].t_env,
-                   sbr->data[ch].bs_num_env);
-
-        // hf_adj
-        sbr_mapping(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
-        sbr_env_estimate(sbr->e_curr, sbr->X_high, sbr, &sbr->data[ch]);
-        sbr_gain_calc(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
-        sbr_hf_assemble(sbr->data[ch].Y, sbr->X_high, sbr, &sbr->data[ch],
-                        sbr->data[ch].e_a);
+        sbr_dequant(sbr, id_aac);
     }
+    for (ch = 0; ch < nch; ch++) {
+        /* decode channel */
+        sbr_qmf_analysis(&ac->dsp, &sbr->rdft, ch ? R : L, sbr->data[ch].analysis_filterbank_samples,
+                         (float*)sbr->qmf_filter_scratch,
+                         sbr->data[ch].W, 1/(-1024 * ac->sf_scale));
+        sbr_lf_gen(ac, sbr, sbr->X_low, sbr->data[ch].W);
+        if (sbr->start) {
+            sbr_hf_inverse_filter(sbr->alpha0, sbr->alpha1, sbr->X_low, sbr->k[0]);
+            sbr_chirp(sbr, &sbr->data[ch]);
+            sbr_hf_gen(ac, sbr, sbr->X_high, sbr->X_low, sbr->alpha0, sbr->alpha1,
+                       sbr->data[ch].bw_array, sbr->data[ch].t_env,
+                       sbr->data[ch].bs_num_env);
+
+            // hf_adj
+            sbr_mapping(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
+            sbr_env_estimate(sbr->e_curr, sbr->X_high, sbr, &sbr->data[ch]);
+            sbr_gain_calc(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
+            sbr_hf_assemble(sbr->data[ch].Y, sbr->X_high, sbr, &sbr->data[ch],
+                            sbr->data[ch].e_a);
+        }
 
-    /* synthesis */
-    sbr_x_gen(sbr, sbr->X, sbr->X_low, sbr->data[ch].Y, ch);
-    sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, out, sbr->X, sbr->qmf_filter_scratch,
-                      sbr->data[ch].synthesis_filterbank_samples,
-                      &sbr->data[ch].synthesis_filterbank_samples_offset,
+        /* synthesis */
+        sbr_x_gen(sbr, sbr->X[ch], sbr->X_low, sbr->data[ch].Y, ch);
+    }
+    sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, L, sbr->X[0], sbr->qmf_filter_scratch,
+                      sbr->data[0].synthesis_filterbank_samples,
+                      &sbr->data[0].synthesis_filterbank_samples_offset,
                       downsampled,
                       ac->add_bias, -1024 * ac->sf_scale);
+    if (nch == 2)
+        sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, R, sbr->X[1], sbr->qmf_filter_scratch,
+                          sbr->data[1].synthesis_filterbank_samples,
+                          &sbr->data[1].synthesis_filterbank_samples_offset,
+                          downsampled,
+                          ac->add_bias, -1024 * ac->sf_scale);
 }