git.sesse.net Git - ffmpeg/blob - libavcodec/aacenc_pred.c

   1 /*
   2  * AAC encoder main-type prediction
   3  * Copyright (C) 2015 Rostislav Pehlivanov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder main prediction
  25  * @author Rostislav Pehlivanov ( atomnuker gmail com )
  26  */
  27
  28 #include "aactab.h"
  29 #include "aacenc_pred.h"
  30 #include "aacenc_utils.h"
  31 #include "aacenc_quantization.h"
  32
  33 static inline float flt16_round(float pf)
  34 {
  35     union av_intfloat32 tmp;
  36     tmp.f = pf;
  37     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
  38     return tmp.f;
  39 }
  40
  41 static inline float flt16_even(float pf)
  42 {
  43     union av_intfloat32 tmp;
  44     tmp.f = pf;
  45     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
  46     return tmp.f;
  47 }
  48
  49 static inline float flt16_trunc(float pf)
  50 {
  51     union av_intfloat32 pun;
  52     pun.f = pf;
  53     pun.i &= 0xFFFF0000U;
  54     return pun.f;
  55 }
  56
  57 static inline void predict(PredictorState *ps, float *coef, float *rcoef,
  58                            int output_enable)
  59 {
  60     const float a     = 0.953125; // 61.0 / 64
  61     float k2;
  62     float   r0 = ps->r0,     r1 = ps->r1;
  63     float cor0 = ps->cor0, cor1 = ps->cor1;
  64     float var0 = ps->var0, var1 = ps->var1;
  65
  66     ps->k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
  67         k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
  68
  69     ps->x_est = flt16_round(ps->k1*r0 + k2*r1);
  70
  71     if (output_enable)
  72         *coef -= ps->x_est;
  73     else
  74         *rcoef = *coef - ps->x_est;
  75 }
  76
  77 static inline void update_predictor(PredictorState *ps, float qcoef)
  78 {
  79     const float alpha = 0.90625;  // 29.0 / 32
  80     const float a     = 0.953125; // 61.0 / 64
  81     float k1 = ps->k1;
  82     float r0 = ps->r0;
  83     float r1 = ps->r1;
  84     float e0 = qcoef + ps->x_est;
  85     float e1 = e0 - k1 * r0;
  86     float cor0 = ps->cor0, cor1 = ps->cor1;
  87     float var0 = ps->var0, var1 = ps->var1;
  88
  89     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
  90     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
  91     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
  92     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
  93
  94     ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
  95     ps->r0 = flt16_trunc(a * e0);
  96 }
  97
  98 static inline void reset_predict_state(PredictorState *ps)
  99 {
 100     ps->r0   = 0.0f;
 101     ps->r1   = 0.0f;
 102     ps->cor0 = 0.0f;
 103     ps->cor1 = 0.0f;
 104     ps->var0 = 1.0f;
 105     ps->var1 = 1.0f;
 106     ps->k1   = 0.0f;
 107     ps->x_est= 0.0f;
 108 }
 109
 110 static inline void reset_all_predictors(SingleChannelElement *sce)
 111 {
 112     int i;
 113     for (i = 0; i < MAX_PREDICTORS; i++)
 114         reset_predict_state(&sce->predictor_state[i]);
 115     for (i = 1; i < 31; i++)
 116         sce->ics.predictor_reset_count[i] = 0;
 117 }
 118
 119 static inline void reset_predictor_group(SingleChannelElement *sce, int group_num)
 120 {
 121     int i;
 122     PredictorState *ps = sce->predictor_state;
 123     sce->ics.predictor_reset_count[group_num] = 0;
 124     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
 125         reset_predict_state(&ps[i]);
 126 }
 127
 128 void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce)
 129 {
 130     int sfb, k;
 131
 132     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
 133         for (sfb = 0; sfb < ff_aac_pred_sfb_max[s->samplerate_index]; sfb++) {
 134             for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++)
 135                 predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k],
 136                         (sce->ics.predictor_present && sce->ics.prediction_used[sfb]));
 137         }
 138     }
 139 }
 140
 141 static void decode_joint_stereo(ChannelElement *cpe)
 142 {
 143     int i, w, w2, g;
 144     SingleChannelElement *sce0 = &cpe->ch[0];
 145     SingleChannelElement *sce1 = &cpe->ch[1];
 146     IndividualChannelStream *ics;
 147
 148     for (i = 0; i < MAX_PREDICTORS; i++)
 149         sce0->prcoeffs[i] = sce0->predictor_state[i].x_est;
 150
 151     ics = &sce0->ics;
 152     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 153         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 154             int start = (w+w2) * 128;
 155             for (g = 0; g < ics->num_swb; g++) {
 156                 int sfb = w*16 + g;
 157                 //apply Intensity stereo coeffs transformation
 158                 if (cpe->is_mask[sfb]) {
 159                     int p = -1 + 2 * (sce1->band_type[sfb] - 14);
 160                     float rscale = ff_aac_pow2sf_tab[-sce1->sf_idx[sfb] + POW_SF2_ZERO];
 161                     p *= 1 - 2 * cpe->ms_mask[sfb];
 162                     for (i = 0; i < ics->swb_sizes[g]; i++) {
 163                         sce0->pqcoeffs[start+i] = (sce0->prcoeffs[start+i] + p*sce0->pqcoeffs[start+i]) * rscale;
 164                     }
 165                 } else if (cpe->ms_mask[sfb] &&
 166                            sce0->band_type[sfb] < NOISE_BT &&
 167                            sce1->band_type[sfb] < NOISE_BT) {
 168                     for (i = 0; i < ics->swb_sizes[g]; i++) {
 169                         float L = sce0->pqcoeffs[start+i] + sce1->pqcoeffs[start+i];
 170                         float R = sce0->pqcoeffs[start+i] - sce1->pqcoeffs[start+i];
 171                         sce0->pqcoeffs[start+i] = L;
 172                         sce1->pqcoeffs[start+i] = R;
 173                     }
 174                 }
 175                 start += ics->swb_sizes[g];
 176             }
 177         }
 178     }
 179 }
 180
 181 static inline void prepare_predictors(SingleChannelElement *sce)
 182 {
 183     int k;
 184     for (k = 0; k < MAX_PREDICTORS; k++)
 185         predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k], 0);
 186 }
 187
 188 void ff_aac_update_main_pred(AACEncContext *s, SingleChannelElement *sce, ChannelElement *cpe)
 189 {
 190     int k;
 191
 192     if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
 193         return;
 194
 195     if (cpe && cpe->common_window)
 196         decode_joint_stereo(cpe);
 197
 198     for (k = 0; k < MAX_PREDICTORS; k++)
 199         update_predictor(&sce->predictor_state[k], sce->pqcoeffs[k]);
 200
 201     if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 202         reset_all_predictors(sce);
 203     }
 204
 205     if (sce->ics.predictor_reset_group)
 206         reset_predictor_group(sce, sce->ics.predictor_reset_group);
 207 }
 208
 209 /* If inc == 0 check if it returns 0 to see if you can reset freely */
 210 static inline int update_counters(IndividualChannelStream *ics, int inc)
 211 {
 212     int i, rg = 0;
 213     for (i = 1; i < 31; i++) {
 214         ics->predictor_reset_count[i] += inc;
 215         if (!rg && ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
 216             rg = i; /* Reset this immediately */
 217     }
 218     return rg;
 219 }
 220
 221 void ff_aac_adjust_common_prediction(AACEncContext *s, ChannelElement *cpe)
 222 {
 223     int start, w, g, count = 0;
 224     SingleChannelElement *sce0 = &cpe->ch[0];
 225     SingleChannelElement *sce1 = &cpe->ch[1];
 226
 227     if (!cpe->common_window || sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
 228         return;
 229
 230     /* Predict if IS or MS is on and at least one channel is marked or when both are */
 231     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
 232         start = 0;
 233         for (g = 0; g < sce0->ics.num_swb; g++) {
 234             int sfb = w*16+g;
 235             if (sfb < PRED_SFB_START || sfb > ff_aac_pred_sfb_max[s->samplerate_index]) {
 236                 ;
 237             } else if ((cpe->is_mask[sfb] || cpe->ms_mask[sfb]) &&
 238                 (sce0->ics.prediction_used[sfb] || sce1->ics.prediction_used[sfb])) {
 239                 sce0->ics.prediction_used[sfb] = sce1->ics.prediction_used[sfb] = 1;
 240                 count++;
 241             } else if (sce0->ics.prediction_used[sfb] && sce1->ics.prediction_used[sfb]) {
 242                 count++;
 243             } else {
 244                 /* Restore band types, if changed - prediction never sets > RESERVED_BT */
 245                 if (sce0->ics.prediction_used[sfb] && sce0->band_type[sfb] < RESERVED_BT)
 246                     sce0->band_type[sfb] = sce0->orig_band_type[sfb];
 247                 if (sce1->ics.prediction_used[sfb] && sce1->band_type[sfb] < RESERVED_BT)
 248                     sce1->band_type[sfb] = sce1->orig_band_type[sfb];
 249                 sce0->ics.prediction_used[sfb] = sce1->ics.prediction_used[sfb] = 0;
 250             }
 251             start += sce0->ics.swb_sizes[g];
 252         }
 253     }
 254
 255     sce1->ics.predictor_present = sce0->ics.predictor_present = !!count;
 256
 257     if (!count)
 258         return;
 259
 260     sce1->ics.predictor_reset_group = sce0->ics.predictor_reset_group;
 261 }
 262
 263 static void update_pred_resets(SingleChannelElement *sce)
 264 {
 265     int i, max_group_id_c, max_frame = 0;
 266     float avg_frame = 0.0f;
 267     IndividualChannelStream *ics = &sce->ics;
 268
 269     /* Some other code probably chose the reset group */
 270     if (ics->predictor_reset_group)
 271         return;
 272
 273     if ((ics->predictor_reset_group = update_counters(&sce->ics, 1)))
 274         return;
 275
 276     for (i = 1; i < 31; i++) {
 277         if (ics->predictor_reset_count[i] > max_frame) {
 278             max_group_id_c = i;
 279             max_frame = ics->predictor_reset_count[i];
 280         }
 281         avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2;
 282     }
 283
 284     if (avg_frame*2 > max_frame && max_frame > PRED_RESET_MIN ||
 285         max_frame > (2*PRED_RESET_MIN)/3) {
 286         ics->predictor_reset_group = max_group_id_c;
 287     } else {
 288         ics->predictor_reset_group = 0;
 289     }
 290 }
 291
 292 void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
 293 {
 294     int sfb, i, count = 0;
 295     float *O34  = &s->scoefs[256*0], *P34  = &s->scoefs[256*1];
 296     int cost_coeffs = PRICE_OFFSET;
 297     int cost_pred = 1+(sce->ics.predictor_reset_group ? 5 : 0) +
 298                   FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
 299
 300     memcpy(sce->orig_band_type, sce->band_type, 128*sizeof(enum BandType));
 301
 302     if (!sce->ics.predictor_initialized ||
 303         sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
 304         reset_all_predictors(sce);
 305         for (i = 1; i < 31; i++)
 306             sce->ics.predictor_reset_count[i] = i;
 307         sce->ics.predictor_initialized = 1;
 308     }
 309
 310     update_pred_resets(sce);
 311     prepare_predictors(sce);
 312     sce->ics.predictor_reset_group = 0;
 313
 314     for (sfb = PRED_SFB_START; sfb < ff_aac_pred_sfb_max[s->samplerate_index]; sfb++) {
 315         float dist1 = 0.0f, dist2 = 0.0f;
 316         int swb_start = sce->ics.swb_offset[sfb];
 317         int swb_len = sce->ics.swb_offset[sfb + 1] - swb_start;
 318         int cb1 = sce->band_type[sfb], cb2, bits1 = 0, bits2 = 0;
 319         FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
 320         abs_pow34_v(O34, &sce->coeffs[swb_start], swb_len);
 321         abs_pow34_v(P34, &sce->prcoeffs[swb_start], swb_len);
 322         cb2 = find_min_book(find_max_val(1, swb_len, P34), sce->sf_idx[sfb]);
 323         if (cb2 <= cb1) {
 324             dist1 += quantize_band_cost(s, &sce->coeffs[swb_start],   O34, swb_len,
 325                                         sce->sf_idx[sfb], cb1, s->lambda / band->threshold,
 326                                         INFINITY, &bits1, 0);
 327             dist2 += quantize_band_cost(s, &sce->prcoeffs[swb_start], P34, swb_len,
 328                                         sce->sf_idx[sfb], cb2, s->lambda / band->threshold,
 329                                         INFINITY, &bits2, 0);
 330             if (dist2 <= dist1) {
 331                 sce->ics.prediction_used[sfb] = 1;
 332                 sce->band_type[sfb] = cb2;
 333                 count++;
 334             }
 335             cost_coeffs += bits1;
 336             cost_pred   += bits2;
 337         }
 338     }
 339
 340     if (count && cost_pred > cost_coeffs) {
 341         memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
 342         memcpy(sce->band_type, sce->orig_band_type, sizeof(sce->band_type));
 343         count = 0;
 344     }
 345
 346     sce->ics.predictor_present = !!count;
 347 }
 348
 349 /**
 350  * Encoder predictors data.
 351  */
 352 void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce)
 353 {
 354     int sfb;
 355
 356     if (!sce->ics.predictor_present ||
 357         sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
 358         return;
 359
 360     put_bits(&s->pb, 1, !!sce->ics.predictor_reset_group);
 361     if (sce->ics.predictor_reset_group)
 362         put_bits(&s->pb, 5, sce->ics.predictor_reset_group);
 363     for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); sfb++)
 364         put_bits(&s->pb, 1, sce->ics.prediction_used[sfb]);
 365 }