git.sesse.net Git - ffmpeg/blob - libavcodec/nellymoserenc.c

   1 /*
   2  * Nellymoser encoder
   3  * This code is developed as part of Google Summer of Code 2008 Program.
   4  *
   5  * Copyright (c) 2008 Bartlomiej Wolowiec
   6  *
   7  * This file is part of Libav.
   8  *
   9  * Libav is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * Libav is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with Libav; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Nellymoser encoder
  27  * by Bartlomiej Wolowiec
  28  *
  29  * Generic codec information: libavcodec/nellymoserdec.c
  30  *
  31  * Some information also from: http://samples.libav.org/A-codecs/Nelly_Moser/ASAO/ASAO.zip
  32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  33  *
  34  * for more information about nellymoser format, visit:
  35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
  36  */
  37
  38 #include "libavutil/mathematics.h"
  39 #include "nellymoser.h"
  40 #include "avcodec.h"
  41 #include "dsputil.h"
  42 #include "fft.h"
  43 #include "sinewin.h"
  44
  45 #define BITSTREAM_WRITER_LE
  46 #include "put_bits.h"
  47
  48 #define POW_TABLE_SIZE (1<<11)
  49 #define POW_TABLE_OFFSET 3
  50 #define OPT_SIZE ((1<<15) + 3000)
  51
  52 typedef struct NellyMoserEncodeContext {
  53     AVCodecContext  *avctx;
  54     int             last_frame;
  55     int             bufsel;
  56     int             have_saved;
  57     DSPContext      dsp;
  58     FFTContext      mdct_ctx;
  59     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
  60     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
  61     DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];     ///< sample buffer
  62     float           (*opt )[NELLY_BANDS];
  63     uint8_t         (*path)[NELLY_BANDS];
  64 } NellyMoserEncodeContext;
  65
  66 static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
  67
  68 static const uint8_t sf_lut[96] = {
  69      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  70      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
  71     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
  72     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
  73     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
  74     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
  75 };
  76
  77 static const uint8_t sf_delta_lut[78] = {
  78      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  79      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
  80     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
  81     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
  82     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
  83 };
  84
  85 static const uint8_t quant_lut[230] = {
  86      0,
  87
  88      0,  1,  2,
  89
  90      0,  1,  2,  3,  4,  5,  6,
  91
  92      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
  93     12, 13, 13, 13, 14,
  94
  95      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
  96      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
  97     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
  98     30,
  99
 100      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
 101      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
 102     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
 103     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
 104     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
 105     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
 106     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
 107     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
 108     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
 109     61, 61, 61, 61, 62,
 110 };
 111
 112 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
 113 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
 114 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
 115
 116 static void apply_mdct(NellyMoserEncodeContext *s)
 117 {
 118     s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
 119     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
 120                                NELLY_BUF_LEN);
 121     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
 122
 123     s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
 124                        ff_sine_128, NELLY_BUF_LEN);
 125     s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
 126                                NELLY_BUF_LEN);
 127     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
 128 }
 129
 130 static av_cold int encode_init(AVCodecContext *avctx)
 131 {
 132     NellyMoserEncodeContext *s = avctx->priv_data;
 133     int i;
 134
 135     if (avctx->channels != 1) {
 136         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
 137         return -1;
 138     }
 139
 140     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
 141         avctx->sample_rate != 11025 &&
 142         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
 143         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 144         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
 145         return -1;
 146     }
 147
 148     avctx->frame_size = NELLY_SAMPLES;
 149     s->avctx = avctx;
 150     ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0);
 151     dsputil_init(&s->dsp, avctx);
 152
 153     /* Generate overlap window */
 154     ff_sine_window_init(ff_sine_128, 128);
 155     for (i = 0; i < POW_TABLE_SIZE; i++)
 156         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
 157
 158     if (s->avctx->trellis) {
 159         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
 160         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
 161     }
 162
 163     return 0;
 164 }
 165
 166 static av_cold int encode_end(AVCodecContext *avctx)
 167 {
 168     NellyMoserEncodeContext *s = avctx->priv_data;
 169
 170     ff_mdct_end(&s->mdct_ctx);
 171
 172     if (s->avctx->trellis) {
 173         av_free(s->opt);
 174         av_free(s->path);
 175     }
 176
 177     return 0;
 178 }
 179
 180 #define find_best(val, table, LUT, LUT_add, LUT_size) \
 181     best_idx = \
 182         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
 183     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
 184         best_idx++;
 185
 186 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 187 {
 188     int band, best_idx, power_idx = 0;
 189     float power_candidate;
 190
 191     //base exponent
 192     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
 193     idx_table[0] = best_idx;
 194     power_idx = ff_nelly_init_table[best_idx];
 195
 196     for (band = 1; band < NELLY_BANDS; band++) {
 197         power_candidate = cand[band] - power_idx;
 198         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
 199         idx_table[band] = best_idx;
 200         power_idx += ff_nelly_delta_table[best_idx];
 201     }
 202 }
 203
 204 static inline float distance(float x, float y, int band)
 205 {
 206     //return pow(fabs(x-y), 2.0);
 207     float tmp = x - y;
 208     return tmp * tmp;
 209 }
 210
 211 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 212 {
 213     int i, j, band, best_idx;
 214     float power_candidate, best_val;
 215
 216     float  (*opt )[NELLY_BANDS] = s->opt ;
 217     uint8_t(*path)[NELLY_BANDS] = s->path;
 218
 219     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
 220         opt[0][i] = INFINITY;
 221     }
 222
 223     for (i = 0; i < 64; i++) {
 224         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
 225         path[0][ff_nelly_init_table[i]] = i;
 226     }
 227
 228     for (band = 1; band < NELLY_BANDS; band++) {
 229         int q, c = 0;
 230         float tmp;
 231         int idx_min, idx_max, idx;
 232         power_candidate = cand[band];
 233         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
 234             idx_min = FFMAX(0, cand[band] - q);
 235             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
 236             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
 237                 if ( isinf(opt[band - 1][i]) )
 238                     continue;
 239                 for (j = 0; j < 32; j++) {
 240                     idx = i + ff_nelly_delta_table[j];
 241                     if (idx > idx_max)
 242                         break;
 243                     if (idx >= idx_min) {
 244                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
 245                         if (opt[band][idx] > tmp) {
 246                             opt[band][idx] = tmp;
 247                             path[band][idx] = j;
 248                             c = 1;
 249                         }
 250                     }
 251                 }
 252             }
 253         }
 254         assert(c); //FIXME
 255     }
 256
 257     best_val = INFINITY;
 258     best_idx = -1;
 259     band = NELLY_BANDS - 1;
 260     for (i = 0; i < OPT_SIZE; i++) {
 261         if (best_val > opt[band][i]) {
 262             best_val = opt[band][i];
 263             best_idx = i;
 264         }
 265     }
 266     for (band = NELLY_BANDS - 1; band >= 0; band--) {
 267         idx_table[band] = path[band][best_idx];
 268         if (band) {
 269             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
 270         }
 271     }
 272 }
 273
 274 /**
 275  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
 276  *  @param s               encoder context
 277  *  @param output          output buffer
 278  *  @param output_size     size of output buffer
 279  */
 280 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
 281 {
 282     PutBitContext pb;
 283     int i, j, band, block, best_idx, power_idx = 0;
 284     float power_val, coeff, coeff_sum;
 285     float pows[NELLY_FILL_LEN];
 286     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
 287     float cand[NELLY_BANDS];
 288
 289     apply_mdct(s);
 290
 291     init_put_bits(&pb, output, output_size * 8);
 292
 293     i = 0;
 294     for (band = 0; band < NELLY_BANDS; band++) {
 295         coeff_sum = 0;
 296         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 297             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
 298                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
 299         }
 300         cand[band] =
 301             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
 302     }
 303
 304     if (s->avctx->trellis) {
 305         get_exponent_dynamic(s, cand, idx_table);
 306     } else {
 307         get_exponent_greedy(s, cand, idx_table);
 308     }
 309
 310     i = 0;
 311     for (band = 0; band < NELLY_BANDS; band++) {
 312         if (band) {
 313             power_idx += ff_nelly_delta_table[idx_table[band]];
 314             put_bits(&pb, 5, idx_table[band]);
 315         } else {
 316             power_idx = ff_nelly_init_table[idx_table[0]];
 317             put_bits(&pb, 6, idx_table[0]);
 318         }
 319         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
 320         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 321             s->mdct_out[i] *= power_val;
 322             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
 323             pows[i] = power_idx;
 324         }
 325     }
 326
 327     ff_nelly_get_sample_bits(pows, bits);
 328
 329     for (block = 0; block < 2; block++) {
 330         for (i = 0; i < NELLY_FILL_LEN; i++) {
 331             if (bits[i] > 0) {
 332                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
 333                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
 334                 best_idx =
 335                     quant_lut[av_clip (
 336                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
 337                             quant_lut_offset[bits[i]],
 338                             quant_lut_offset[bits[i]+1] - 1
 339                             )];
 340                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
 341                     best_idx++;
 342
 343                 put_bits(&pb, bits[i], best_idx);
 344             }
 345         }
 346         if (!block)
 347             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
 348     }
 349
 350     flush_put_bits(&pb);
 351 }
 352
 353 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
 354 {
 355     NellyMoserEncodeContext *s = avctx->priv_data;
 356     const float *samples = data;
 357     int i;
 358
 359     if (s->last_frame)
 360         return 0;
 361
 362     if (data) {
 363         memcpy(s->buf[s->bufsel], samples, avctx->frame_size * sizeof(*samples));
 364         for (i = avctx->frame_size; i < NELLY_SAMPLES; i++) {
 365             s->buf[s->bufsel][i] = 0;
 366         }
 367         s->bufsel = 1 - s->bufsel;
 368         if (!s->have_saved) {
 369             s->have_saved = 1;
 370             return 0;
 371         }
 372     } else {
 373         memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
 374         s->bufsel = 1 - s->bufsel;
 375         s->last_frame = 1;
 376     }
 377
 378     if (s->have_saved) {
 379         encode_block(s, frame, buf_size);
 380         return NELLY_BLOCK_LEN;
 381     }
 382     return 0;
 383 }
 384
 385 AVCodec ff_nellymoser_encoder = {
 386     .name = "nellymoser",
 387     .type = AVMEDIA_TYPE_AUDIO,
 388     .id = CODEC_ID_NELLYMOSER,
 389     .priv_data_size = sizeof(NellyMoserEncodeContext),
 390     .init = encode_init,
 391     .encode = encode_frame,
 392     .close = encode_end,
 393     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
 394     .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
 395     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
 396 };