git.sesse.net Git - ffmpeg/blob - libavcodec/nellymoserenc.c

   1 /*
   2  * Nellymoser encoder
   3  * This code is developed as part of Google Summer of Code 2008 Program.
   4  *
   5  * Copyright (c) 2008 Bartlomiej Wolowiec
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Nellymoser encoder
  27  * by Bartlomiej Wolowiec
  28  *
  29  * Generic codec information: libavcodec/nellymoserdec.c
  30  *
  31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
  32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  33  *
  34  * for more information about nellymoser format, visit:
  35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
  36  */
  37
  38 #include "nellymoser.h"
  39 #include "avcodec.h"
  40 #include "dsputil.h"
  41 #include "fft.h"
  42 #include "sinewin.h"
  43
  44 #define BITSTREAM_WRITER_LE
  45 #include "put_bits.h"
  46
  47 #define POW_TABLE_SIZE (1<<11)
  48 #define POW_TABLE_OFFSET 3
  49 #define OPT_SIZE ((1<<15) + 3000)
  50
  51 typedef struct NellyMoserEncodeContext {
  52     AVCodecContext  *avctx;
  53     int             last_frame;
  54     int             bufsel;
  55     int             have_saved;
  56     DSPContext      dsp;
  57     FFTContext      mdct_ctx;
  58     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
  59     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
  60     DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];     ///< sample buffer
  61     float           (*opt )[NELLY_BANDS];
  62     uint8_t         (*path)[NELLY_BANDS];
  63 } NellyMoserEncodeContext;
  64
  65 static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
  66
  67 static const uint8_t sf_lut[96] = {
  68      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  69      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
  70     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
  71     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
  72     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
  73     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
  74 };
  75
  76 static const uint8_t sf_delta_lut[78] = {
  77      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  78      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
  79     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
  80     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
  81     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
  82 };
  83
  84 static const uint8_t quant_lut[230] = {
  85      0,
  86
  87      0,  1,  2,
  88
  89      0,  1,  2,  3,  4,  5,  6,
  90
  91      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
  92     12, 13, 13, 13, 14,
  93
  94      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
  95      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
  96     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
  97     30,
  98
  99      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
 100      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
 101     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
 102     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
 103     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
 104     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
 105     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
 106     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
 107     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
 108     61, 61, 61, 61, 62,
 109 };
 110
 111 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
 112 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
 113 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
 114
 115 static void apply_mdct(NellyMoserEncodeContext *s)
 116 {
 117     s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
 118     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
 119                                NELLY_BUF_LEN);
 120     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
 121
 122     s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
 123                        ff_sine_128, NELLY_BUF_LEN);
 124     s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
 125                                NELLY_BUF_LEN);
 126     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
 127 }
 128
 129 static av_cold int encode_init(AVCodecContext *avctx)
 130 {
 131     NellyMoserEncodeContext *s = avctx->priv_data;
 132     int i;
 133
 134     if (avctx->channels != 1) {
 135         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
 136         return -1;
 137     }
 138
 139     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
 140         avctx->sample_rate != 11025 &&
 141         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
 142         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 143         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
 144         return -1;
 145     }
 146
 147     avctx->frame_size = NELLY_SAMPLES;
 148     s->avctx = avctx;
 149     ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
 150     dsputil_init(&s->dsp, avctx);
 151
 152     /* Generate overlap window */
 153     ff_sine_window_init(ff_sine_128, 128);
 154     for (i = 0; i < POW_TABLE_SIZE; i++)
 155         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
 156
 157     if (s->avctx->trellis) {
 158         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
 159         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
 160     }
 161
 162     return 0;
 163 }
 164
 165 static av_cold int encode_end(AVCodecContext *avctx)
 166 {
 167     NellyMoserEncodeContext *s = avctx->priv_data;
 168
 169     ff_mdct_end(&s->mdct_ctx);
 170
 171     if (s->avctx->trellis) {
 172         av_free(s->opt);
 173         av_free(s->path);
 174     }
 175
 176     return 0;
 177 }
 178
 179 #define find_best(val, table, LUT, LUT_add, LUT_size) \
 180     best_idx = \
 181         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
 182     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
 183         best_idx++;
 184
 185 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 186 {
 187     int band, best_idx, power_idx = 0;
 188     float power_candidate;
 189
 190     //base exponent
 191     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
 192     idx_table[0] = best_idx;
 193     power_idx = ff_nelly_init_table[best_idx];
 194
 195     for (band = 1; band < NELLY_BANDS; band++) {
 196         power_candidate = cand[band] - power_idx;
 197         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
 198         idx_table[band] = best_idx;
 199         power_idx += ff_nelly_delta_table[best_idx];
 200     }
 201 }
 202
 203 static inline float distance(float x, float y, int band)
 204 {
 205     //return pow(fabs(x-y), 2.0);
 206     float tmp = x - y;
 207     return tmp * tmp;
 208 }
 209
 210 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 211 {
 212     int i, j, band, best_idx;
 213     float power_candidate, best_val;
 214
 215     float  (*opt )[NELLY_BANDS] = s->opt ;
 216     uint8_t(*path)[NELLY_BANDS] = s->path;
 217
 218     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
 219         opt[0][i] = INFINITY;
 220     }
 221
 222     for (i = 0; i < 64; i++) {
 223         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
 224         path[0][ff_nelly_init_table[i]] = i;
 225     }
 226
 227     for (band = 1; band < NELLY_BANDS; band++) {
 228         int q, c = 0;
 229         float tmp;
 230         int idx_min, idx_max, idx;
 231         power_candidate = cand[band];
 232         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
 233             idx_min = FFMAX(0, cand[band] - q);
 234             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
 235             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
 236                 if ( isinf(opt[band - 1][i]) )
 237                     continue;
 238                 for (j = 0; j < 32; j++) {
 239                     idx = i + ff_nelly_delta_table[j];
 240                     if (idx > idx_max)
 241                         break;
 242                     if (idx >= idx_min) {
 243                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
 244                         if (opt[band][idx] > tmp) {
 245                             opt[band][idx] = tmp;
 246                             path[band][idx] = j;
 247                             c = 1;
 248                         }
 249                     }
 250                 }
 251             }
 252         }
 253         assert(c); //FIXME
 254     }
 255
 256     best_val = INFINITY;
 257     best_idx = -1;
 258     band = NELLY_BANDS - 1;
 259     for (i = 0; i < OPT_SIZE; i++) {
 260         if (best_val > opt[band][i]) {
 261             best_val = opt[band][i];
 262             best_idx = i;
 263         }
 264     }
 265     for (band = NELLY_BANDS - 1; band >= 0; band--) {
 266         idx_table[band] = path[band][best_idx];
 267         if (band) {
 268             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
 269         }
 270     }
 271 }
 272
 273 /**
 274  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
 275  *  @param s               encoder context
 276  *  @param output          output buffer
 277  *  @param output_size     size of output buffer
 278  */
 279 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
 280 {
 281     PutBitContext pb;
 282     int i, j, band, block, best_idx, power_idx = 0;
 283     float power_val, coeff, coeff_sum;
 284     float pows[NELLY_FILL_LEN];
 285     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
 286     float cand[NELLY_BANDS];
 287
 288     apply_mdct(s);
 289
 290     init_put_bits(&pb, output, output_size * 8);
 291
 292     i = 0;
 293     for (band = 0; band < NELLY_BANDS; band++) {
 294         coeff_sum = 0;
 295         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 296             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
 297                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
 298         }
 299         cand[band] =
 300             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
 301     }
 302
 303     if (s->avctx->trellis) {
 304         get_exponent_dynamic(s, cand, idx_table);
 305     } else {
 306         get_exponent_greedy(s, cand, idx_table);
 307     }
 308
 309     i = 0;
 310     for (band = 0; band < NELLY_BANDS; band++) {
 311         if (band) {
 312             power_idx += ff_nelly_delta_table[idx_table[band]];
 313             put_bits(&pb, 5, idx_table[band]);
 314         } else {
 315             power_idx = ff_nelly_init_table[idx_table[0]];
 316             put_bits(&pb, 6, idx_table[0]);
 317         }
 318         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
 319         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 320             s->mdct_out[i] *= power_val;
 321             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
 322             pows[i] = power_idx;
 323         }
 324     }
 325
 326     ff_nelly_get_sample_bits(pows, bits);
 327
 328     for (block = 0; block < 2; block++) {
 329         for (i = 0; i < NELLY_FILL_LEN; i++) {
 330             if (bits[i] > 0) {
 331                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
 332                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
 333                 best_idx =
 334                     quant_lut[av_clip (
 335                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
 336                             quant_lut_offset[bits[i]],
 337                             quant_lut_offset[bits[i]+1] - 1
 338                             )];
 339                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
 340                     best_idx++;
 341
 342                 put_bits(&pb, bits[i], best_idx);
 343             }
 344         }
 345         if (!block)
 346             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
 347     }
 348
 349     flush_put_bits(&pb);
 350 }
 351
 352 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
 353 {
 354     NellyMoserEncodeContext *s = avctx->priv_data;
 355     const int16_t *samples = data;
 356     int i;
 357
 358     if (s->last_frame)
 359         return 0;
 360
 361     if (data) {
 362         for (i = 0; i < avctx->frame_size; i++) {
 363             s->buf[s->bufsel][i] = samples[i];
 364         }
 365         for (; i < NELLY_SAMPLES; i++) {
 366             s->buf[s->bufsel][i] = 0;
 367         }
 368         s->bufsel = 1 - s->bufsel;
 369         if (!s->have_saved) {
 370             s->have_saved = 1;
 371             return 0;
 372         }
 373     } else {
 374         memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
 375         s->bufsel = 1 - s->bufsel;
 376         s->last_frame = 1;
 377     }
 378
 379     if (s->have_saved) {
 380         encode_block(s, frame, buf_size);
 381         return NELLY_BLOCK_LEN;
 382     }
 383     return 0;
 384 }
 385
 386 AVCodec ff_nellymoser_encoder = {
 387     .name = "nellymoser",
 388     .type = AVMEDIA_TYPE_AUDIO,
 389     .id = CODEC_ID_NELLYMOSER,
 390     .priv_data_size = sizeof(NellyMoserEncodeContext),
 391     .init = encode_init,
 392     .encode = encode_frame,
 393     .close = encode_end,
 394     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
 395     .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
 396     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
 397 };