git.sesse.net Git - ffmpeg/blob - libavcodec/ra144enc.c

   1 /*
   2  * Real Audio 1.0 (14.4K) encoder
   3  * Copyright (c) 2010 Francesco Lavra <francescolavra@interfree.it>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * Real Audio 1.0 (14.4K) encoder
  25  * @author Francesco Lavra <francescolavra@interfree.it>
  26  */
  27
  28 #include <float.h>
  29
  30 #include "avcodec.h"
  31 #include "put_bits.h"
  32 #include "celp_filters.h"
  33 #include "ra144.h"
  34
  35
  36 static av_cold int ra144_encode_init(AVCodecContext * avctx)
  37 {
  38     RA144Context *ractx;
  39     int ret;
  40
  41     if (avctx->sample_fmt != AV_SAMPLE_FMT_S16) {
  42         av_log(avctx, AV_LOG_ERROR, "invalid sample format\n");
  43         return -1;
  44     }
  45     if (avctx->channels != 1) {
  46         av_log(avctx, AV_LOG_ERROR, "invalid number of channels: %d\n",
  47                avctx->channels);
  48         return -1;
  49     }
  50     avctx->frame_size = NBLOCKS * BLOCKSIZE;
  51     avctx->bit_rate = 8000;
  52     ractx = avctx->priv_data;
  53     ractx->lpc_coef[0] = ractx->lpc_tables[0];
  54     ractx->lpc_coef[1] = ractx->lpc_tables[1];
  55     ractx->avctx = avctx;
  56     ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
  57                       FF_LPC_TYPE_LEVINSON);
  58     return ret;
  59 }
  60
  61
  62 static av_cold int ra144_encode_close(AVCodecContext *avctx)
  63 {
  64     RA144Context *ractx = avctx->priv_data;
  65     ff_lpc_end(&ractx->lpc_ctx);
  66     return 0;
  67 }
  68
  69
  70 /**
  71  * Quantize a value by searching a sorted table for the element with the
  72  * nearest value
  73  *
  74  * @param value value to quantize
  75  * @param table array containing the quantization table
  76  * @param size size of the quantization table
  77  * @return index of the quantization table corresponding to the element with the
  78  *         nearest value
  79  */
  80 static int quantize(int value, const int16_t *table, unsigned int size)
  81 {
  82     unsigned int low = 0, high = size - 1;
  83
  84     while (1) {
  85         int index = (low + high) >> 1;
  86         int error = table[index] - value;
  87
  88         if (index == low)
  89             return table[high] + error > value ? low : high;
  90         if (error > 0) {
  91             high = index;
  92         } else {
  93             low = index;
  94         }
  95     }
  96 }
  97
  98
  99 /**
 100  * Orthogonalize a vector to another vector
 101  *
 102  * @param v vector to orthogonalize
 103  * @param u vector against which orthogonalization is performed
 104  */
 105 static void orthogonalize(float *v, const float *u)
 106 {
 107     int i;
 108     float num = 0, den = 0;
 109
 110     for (i = 0; i < BLOCKSIZE; i++) {
 111         num += v[i] * u[i];
 112         den += u[i] * u[i];
 113     }
 114     num /= den;
 115     for (i = 0; i < BLOCKSIZE; i++)
 116         v[i] -= num * u[i];
 117 }
 118
 119
 120 /**
 121  * Calculate match score and gain of an LPC-filtered vector with respect to
 122  * input data, possibly othogonalizing it to up to 2 other vectors
 123  *
 124  * @param work array used to calculate the filtered vector
 125  * @param coefs coefficients of the LPC filter
 126  * @param vect original vector
 127  * @param ortho1 first vector against which orthogonalization is performed
 128  * @param ortho2 second vector against which orthogonalization is performed
 129  * @param data input data
 130  * @param score pointer to variable where match score is returned
 131  * @param gain pointer to variable where gain is returned
 132  */
 133 static void get_match_score(float *work, const float *coefs, float *vect,
 134                             const float *ortho1, const float *ortho2,
 135                             const float *data, float *score, float *gain)
 136 {
 137     float c, g;
 138     int i;
 139
 140     ff_celp_lp_synthesis_filterf(work, coefs, vect, BLOCKSIZE, LPC_ORDER);
 141     if (ortho1)
 142         orthogonalize(work, ortho1);
 143     if (ortho2)
 144         orthogonalize(work, ortho2);
 145     c = g = 0;
 146     for (i = 0; i < BLOCKSIZE; i++) {
 147         g += work[i] * work[i];
 148         c += data[i] * work[i];
 149     }
 150     if (c <= 0) {
 151         *score = 0;
 152         return;
 153     }
 154     *gain = c / g;
 155     *score = *gain * c;
 156 }
 157
 158
 159 /**
 160  * Create a vector from the adaptive codebook at a given lag value
 161  *
 162  * @param vect array where vector is stored
 163  * @param cb adaptive codebook
 164  * @param lag lag value
 165  */
 166 static void create_adapt_vect(float *vect, const int16_t *cb, int lag)
 167 {
 168     int i;
 169
 170     cb += BUFFERSIZE - lag;
 171     for (i = 0; i < FFMIN(BLOCKSIZE, lag); i++)
 172         vect[i] = cb[i];
 173     if (lag < BLOCKSIZE)
 174         for (i = 0; i < BLOCKSIZE - lag; i++)
 175             vect[lag + i] = cb[i];
 176 }
 177
 178
 179 /**
 180  * Search the adaptive codebook for the best entry and gain and remove its
 181  * contribution from input data
 182  *
 183  * @param adapt_cb array from which the adaptive codebook is extracted
 184  * @param work array used to calculate LPC-filtered vectors
 185  * @param coefs coefficients of the LPC filter
 186  * @param data input data
 187  * @return index of the best entry of the adaptive codebook
 188  */
 189 static int adaptive_cb_search(const int16_t *adapt_cb, float *work,
 190                               const float *coefs, float *data)
 191 {
 192     int i, best_vect;
 193     float score, gain, best_score, best_gain;
 194     float exc[BLOCKSIZE];
 195
 196     gain = best_score = 0;
 197     for (i = BLOCKSIZE / 2; i <= BUFFERSIZE; i++) {
 198         create_adapt_vect(exc, adapt_cb, i);
 199         get_match_score(work, coefs, exc, NULL, NULL, data, &score, &gain);
 200         if (score > best_score) {
 201             best_score = score;
 202             best_vect = i;
 203             best_gain = gain;
 204         }
 205     }
 206     if (!best_score)
 207         return 0;
 208
 209     /**
 210      * Re-calculate the filtered vector from the vector with maximum match score
 211      * and remove its contribution from input data.
 212      */
 213     create_adapt_vect(exc, adapt_cb, best_vect);
 214     ff_celp_lp_synthesis_filterf(work, coefs, exc, BLOCKSIZE, LPC_ORDER);
 215     for (i = 0; i < BLOCKSIZE; i++)
 216         data[i] -= best_gain * work[i];
 217     return best_vect - BLOCKSIZE / 2 + 1;
 218 }
 219
 220
 221 /**
 222  * Find the best vector of a fixed codebook by applying an LPC filter to
 223  * codebook entries, possibly othogonalizing them to up to 2 other vectors and
 224  * matching the results with input data
 225  *
 226  * @param work array used to calculate the filtered vectors
 227  * @param coefs coefficients of the LPC filter
 228  * @param cb fixed codebook
 229  * @param ortho1 first vector against which orthogonalization is performed
 230  * @param ortho2 second vector against which orthogonalization is performed
 231  * @param data input data
 232  * @param idx pointer to variable where the index of the best codebook entry is
 233  *        returned
 234  * @param gain pointer to variable where the gain of the best codebook entry is
 235  *        returned
 236  */
 237 static void find_best_vect(float *work, const float *coefs,
 238                            const int8_t cb[][BLOCKSIZE], const float *ortho1,
 239                            const float *ortho2, float *data, int *idx,
 240                            float *gain)
 241 {
 242     int i, j;
 243     float g, score, best_score;
 244     float vect[BLOCKSIZE];
 245
 246     *idx = *gain = best_score = 0;
 247     for (i = 0; i < FIXED_CB_SIZE; i++) {
 248         for (j = 0; j < BLOCKSIZE; j++)
 249             vect[j] = cb[i][j];
 250         get_match_score(work, coefs, vect, ortho1, ortho2, data, &score, &g);
 251         if (score > best_score) {
 252             best_score = score;
 253             *idx = i;
 254             *gain = g;
 255         }
 256     }
 257 }
 258
 259
 260 /**
 261  * Search the two fixed codebooks for the best entry and gain
 262  *
 263  * @param work array used to calculate LPC-filtered vectors
 264  * @param coefs coefficients of the LPC filter
 265  * @param data input data
 266  * @param cba_idx index of the best entry of the adaptive codebook
 267  * @param cb1_idx pointer to variable where the index of the best entry of the
 268  *        first fixed codebook is returned
 269  * @param cb2_idx pointer to variable where the index of the best entry of the
 270  *        second fixed codebook is returned
 271  */
 272 static void fixed_cb_search(float *work, const float *coefs, float *data,
 273                             int cba_idx, int *cb1_idx, int *cb2_idx)
 274 {
 275     int i, ortho_cb1;
 276     float gain;
 277     float cba_vect[BLOCKSIZE], cb1_vect[BLOCKSIZE];
 278     float vect[BLOCKSIZE];
 279
 280     /**
 281      * The filtered vector from the adaptive codebook can be retrieved from
 282      * work, because this function is called just after adaptive_cb_search().
 283      */
 284     if (cba_idx)
 285         memcpy(cba_vect, work, sizeof(cba_vect));
 286
 287     find_best_vect(work, coefs, ff_cb1_vects, cba_idx ? cba_vect : NULL, NULL,
 288                    data, cb1_idx, &gain);
 289
 290     /**
 291      * Re-calculate the filtered vector from the vector with maximum match score
 292      * and remove its contribution from input data.
 293      */
 294     if (gain) {
 295         for (i = 0; i < BLOCKSIZE; i++)
 296             vect[i] = ff_cb1_vects[*cb1_idx][i];
 297         ff_celp_lp_synthesis_filterf(work, coefs, vect, BLOCKSIZE, LPC_ORDER);
 298         if (cba_idx)
 299             orthogonalize(work, cba_vect);
 300         for (i = 0; i < BLOCKSIZE; i++)
 301             data[i] -= gain * work[i];
 302         memcpy(cb1_vect, work, sizeof(cb1_vect));
 303         ortho_cb1 = 1;
 304     } else
 305         ortho_cb1 = 0;
 306
 307     find_best_vect(work, coefs, ff_cb2_vects, cba_idx ? cba_vect : NULL,
 308                    ortho_cb1 ? cb1_vect : NULL, data, cb2_idx, &gain);
 309 }
 310
 311
 312 /**
 313  * Encode a subblock of the current frame
 314  *
 315  * @param ractx encoder context
 316  * @param sblock_data input data of the subblock
 317  * @param lpc_coefs coefficients of the LPC filter
 318  * @param rms RMS of the reflection coefficients
 319  * @param pb pointer to PutBitContext of the current frame
 320  */
 321 static void ra144_encode_subblock(RA144Context *ractx,
 322                                   const int16_t *sblock_data,
 323                                   const int16_t *lpc_coefs, unsigned int rms,
 324                                   PutBitContext *pb)
 325 {
 326     float data[BLOCKSIZE], work[LPC_ORDER + BLOCKSIZE];
 327     float coefs[LPC_ORDER];
 328     float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE];
 329     int16_t cba_vect[BLOCKSIZE];
 330     int cba_idx, cb1_idx, cb2_idx, gain;
 331     int i, n, m[3];
 332     float g[3];
 333     float error, best_error;
 334
 335     for (i = 0; i < LPC_ORDER; i++) {
 336         work[i] = ractx->curr_sblock[BLOCKSIZE + i];
 337         coefs[i] = lpc_coefs[i] * (1/4096.0);
 338     }
 339
 340     /**
 341      * Calculate the zero-input response of the LPC filter and subtract it from
 342      * input data.
 343      */
 344     memset(data, 0, sizeof(data));
 345     ff_celp_lp_synthesis_filterf(work + LPC_ORDER, coefs, data, BLOCKSIZE,
 346                                  LPC_ORDER);
 347     for (i = 0; i < BLOCKSIZE; i++) {
 348         zero[i] = work[LPC_ORDER + i];
 349         data[i] = sblock_data[i] - zero[i];
 350     }
 351
 352     /**
 353      * Codebook search is performed without taking into account the contribution
 354      * of the previous subblock, since it has been just subtracted from input
 355      * data.
 356      */
 357     memset(work, 0, LPC_ORDER * sizeof(*work));
 358
 359     cba_idx = adaptive_cb_search(ractx->adapt_cb, work + LPC_ORDER, coefs,
 360                                  data);
 361     if (cba_idx) {
 362         /**
 363          * The filtered vector from the adaptive codebook can be retrieved from
 364          * work, see implementation of adaptive_cb_search().
 365          */
 366         memcpy(cba, work + LPC_ORDER, sizeof(cba));
 367
 368         ff_copy_and_dup(cba_vect, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
 369         m[0] = (ff_irms(cba_vect) * rms) >> 12;
 370     }
 371     fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
 372     for (i = 0; i < BLOCKSIZE; i++) {
 373         cb1[i] = ff_cb1_vects[cb1_idx][i];
 374         cb2[i] = ff_cb2_vects[cb2_idx][i];
 375     }
 376     ff_celp_lp_synthesis_filterf(work + LPC_ORDER, coefs, cb1, BLOCKSIZE,
 377                                  LPC_ORDER);
 378     memcpy(cb1, work + LPC_ORDER, sizeof(cb1));
 379     m[1] = (ff_cb1_base[cb1_idx] * rms) >> 8;
 380     ff_celp_lp_synthesis_filterf(work + LPC_ORDER, coefs, cb2, BLOCKSIZE,
 381                                  LPC_ORDER);
 382     memcpy(cb2, work + LPC_ORDER, sizeof(cb2));
 383     m[2] = (ff_cb2_base[cb2_idx] * rms) >> 8;
 384     best_error = FLT_MAX;
 385     gain = 0;
 386     for (n = 0; n < 256; n++) {
 387         g[1] = ((ff_gain_val_tab[n][1] * m[1]) >> ff_gain_exp_tab[n]) *
 388                (1/4096.0);
 389         g[2] = ((ff_gain_val_tab[n][2] * m[2]) >> ff_gain_exp_tab[n]) *
 390                (1/4096.0);
 391         error = 0;
 392         if (cba_idx) {
 393             g[0] = ((ff_gain_val_tab[n][0] * m[0]) >> ff_gain_exp_tab[n]) *
 394                    (1/4096.0);
 395             for (i = 0; i < BLOCKSIZE; i++) {
 396                 data[i] = zero[i] + g[0] * cba[i] + g[1] * cb1[i] +
 397                           g[2] * cb2[i];
 398                 error += (data[i] - sblock_data[i]) *
 399                          (data[i] - sblock_data[i]);
 400             }
 401         } else {
 402             for (i = 0; i < BLOCKSIZE; i++) {
 403                 data[i] = zero[i] + g[1] * cb1[i] + g[2] * cb2[i];
 404                 error += (data[i] - sblock_data[i]) *
 405                          (data[i] - sblock_data[i]);
 406             }
 407         }
 408         if (error < best_error) {
 409             best_error = error;
 410             gain = n;
 411         }
 412     }
 413     put_bits(pb, 7, cba_idx);
 414     put_bits(pb, 8, gain);
 415     put_bits(pb, 7, cb1_idx);
 416     put_bits(pb, 7, cb2_idx);
 417     ff_subblock_synthesis(ractx, lpc_coefs, cba_idx, cb1_idx, cb2_idx, rms,
 418                           gain);
 419 }
 420
 421
 422 static int ra144_encode_frame(AVCodecContext *avctx, uint8_t *frame,
 423                               int buf_size, void *data)
 424 {
 425     static const uint8_t sizes[LPC_ORDER] = {64, 32, 32, 16, 16, 8, 8, 8, 8, 4};
 426     static const uint8_t bit_sizes[LPC_ORDER] = {6, 5, 5, 4, 4, 3, 3, 3, 3, 2};
 427     RA144Context *ractx;
 428     PutBitContext pb;
 429     int32_t lpc_data[NBLOCKS * BLOCKSIZE];
 430     int32_t lpc_coefs[LPC_ORDER][MAX_LPC_ORDER];
 431     int shift[LPC_ORDER];
 432     int16_t block_coefs[NBLOCKS][LPC_ORDER];
 433     int lpc_refl[LPC_ORDER];    /**< reflection coefficients of the frame */
 434     unsigned int refl_rms[NBLOCKS]; /**< RMS of the reflection coefficients */
 435     int energy = 0;
 436     int i, idx;
 437
 438     if (buf_size < FRAMESIZE) {
 439         av_log(avctx, AV_LOG_ERROR, "output buffer too small\n");
 440         return 0;
 441     }
 442     ractx = avctx->priv_data;
 443
 444     /**
 445      * Since the LPC coefficients are calculated on a frame centered over the
 446      * fourth subframe, to encode a given frame, data from the next frame is
 447      * needed. In each call to this function, the previous frame (whose data are
 448      * saved in the encoder context) is encoded, and data from the current frame
 449      * are saved in the encoder context to be used in the next function call.
 450      */
 451     for (i = 0; i < (2 * BLOCKSIZE + BLOCKSIZE / 2); i++) {
 452         lpc_data[i] = ractx->curr_block[BLOCKSIZE + BLOCKSIZE / 2 + i];
 453         energy += (lpc_data[i] * lpc_data[i]) >> 4;
 454     }
 455     for (i = 2 * BLOCKSIZE + BLOCKSIZE / 2; i < NBLOCKS * BLOCKSIZE; i++) {
 456         lpc_data[i] = *((int16_t *)data + i - 2 * BLOCKSIZE - BLOCKSIZE / 2) >>
 457                       2;
 458         energy += (lpc_data[i] * lpc_data[i]) >> 4;
 459     }
 460     energy = ff_energy_tab[quantize(ff_t_sqrt(energy >> 5) >> 10, ff_energy_tab,
 461                                     32)];
 462
 463     ff_lpc_calc_coefs(&ractx->lpc_ctx, lpc_data, NBLOCKS * BLOCKSIZE, LPC_ORDER,
 464                       LPC_ORDER, 16, lpc_coefs, shift, FF_LPC_TYPE_LEVINSON,
 465                       0, ORDER_METHOD_EST, 12, 0);
 466     for (i = 0; i < LPC_ORDER; i++)
 467         block_coefs[NBLOCKS - 1][i] = -(lpc_coefs[LPC_ORDER - 1][i] <<
 468                                         (12 - shift[LPC_ORDER - 1]));
 469
 470     /**
 471      * TODO: apply perceptual weighting of the input speech through bandwidth
 472      * expansion of the LPC filter.
 473      */
 474
 475     if (ff_eval_refl(lpc_refl, block_coefs[NBLOCKS - 1], avctx)) {
 476         /**
 477          * The filter is unstable: use the coefficients of the previous frame.
 478          */
 479         ff_int_to_int16(block_coefs[NBLOCKS - 1], ractx->lpc_coef[1]);
 480         if (ff_eval_refl(lpc_refl, block_coefs[NBLOCKS - 1], avctx)) {
 481             /* the filter is still unstable. set reflection coeffs to zero. */
 482             memset(lpc_refl, 0, sizeof(lpc_refl));
 483         }
 484     }
 485     init_put_bits(&pb, frame, buf_size);
 486     for (i = 0; i < LPC_ORDER; i++) {
 487         idx = quantize(lpc_refl[i], ff_lpc_refl_cb[i], sizes[i]);
 488         put_bits(&pb, bit_sizes[i], idx);
 489         lpc_refl[i] = ff_lpc_refl_cb[i][idx];
 490     }
 491     ractx->lpc_refl_rms[0] = ff_rms(lpc_refl);
 492     ff_eval_coefs(ractx->lpc_coef[0], lpc_refl);
 493     refl_rms[0] = ff_interp(ractx, block_coefs[0], 1, 1, ractx->old_energy);
 494     refl_rms[1] = ff_interp(ractx, block_coefs[1], 2,
 495                             energy <= ractx->old_energy,
 496                             ff_t_sqrt(energy * ractx->old_energy) >> 12);
 497     refl_rms[2] = ff_interp(ractx, block_coefs[2], 3, 0, energy);
 498     refl_rms[3] = ff_rescale_rms(ractx->lpc_refl_rms[0], energy);
 499     ff_int_to_int16(block_coefs[NBLOCKS - 1], ractx->lpc_coef[0]);
 500     put_bits(&pb, 5, quantize(energy, ff_energy_tab, 32));
 501     for (i = 0; i < NBLOCKS; i++)
 502         ra144_encode_subblock(ractx, ractx->curr_block + i * BLOCKSIZE,
 503                               block_coefs[i], refl_rms[i], &pb);
 504     flush_put_bits(&pb);
 505     ractx->old_energy = energy;
 506     ractx->lpc_refl_rms[1] = ractx->lpc_refl_rms[0];
 507     FFSWAP(unsigned int *, ractx->lpc_coef[0], ractx->lpc_coef[1]);
 508     for (i = 0; i < NBLOCKS * BLOCKSIZE; i++)
 509         ractx->curr_block[i] = *((int16_t *)data + i) >> 2;
 510     return FRAMESIZE;
 511 }
 512
 513
 514 AVCodec ff_ra_144_encoder = {
 515     .name           = "real_144",
 516     .type           = AVMEDIA_TYPE_AUDIO,
 517     .id             = CODEC_ID_RA_144,
 518     .priv_data_size = sizeof(RA144Context),
 519     .init           = ra144_encode_init,
 520     .encode         = ra144_encode_frame,
 521     .close          = ra144_encode_close,
 522     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
 523                                                      AV_SAMPLE_FMT_NONE },
 524     .long_name      = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"),
 525 };