git.sesse.net Git - ffmpeg/blob - libavcodec/nellymoserenc.c

   1 /*
   2  * Nellymoser encoder
   3  * This code is developed as part of Google Summer of Code 2008 Program.
   4  *
   5  * Copyright (c) 2008 Bartlomiej Wolowiec
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Nellymoser encoder
  27  * by Bartlomiej Wolowiec
  28  *
  29  * Generic codec information: libavcodec/nellymoserdec.c
  30  *
  31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
  32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  33  *
  34  * for more information about nellymoser format, visit:
  35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
  36  */
  37
  38 #include "libavutil/common.h"
  39 #include "libavutil/float_dsp.h"
  40 #include "libavutil/mathematics.h"
  41
  42 #include "audio_frame_queue.h"
  43 #include "avcodec.h"
  44 #include "fft.h"
  45 #include "internal.h"
  46 #include "nellymoser.h"
  47 #include "sinewin.h"
  48
  49 #define BITSTREAM_WRITER_LE
  50 #include "put_bits.h"
  51
  52 #define POW_TABLE_SIZE (1<<11)
  53 #define POW_TABLE_OFFSET 3
  54 #define OPT_SIZE ((1<<15) + 3000)
  55
  56 typedef struct NellyMoserEncodeContext {
  57     AVCodecContext  *avctx;
  58     int             last_frame;
  59     AVFloatDSPContext *fdsp;
  60     FFTContext      mdct_ctx;
  61     AudioFrameQueue afq;
  62     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
  63     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
  64     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
  65     float           (*opt )[OPT_SIZE];
  66     uint8_t         (*path)[OPT_SIZE];
  67 } NellyMoserEncodeContext;
  68
  69 static float pow_table[POW_TABLE_SIZE];     ///< pow(2, -i / 2048.0 - 3.0);
  70
  71 static const uint8_t sf_lut[96] = {
  72      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  73      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
  74     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
  75     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
  76     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
  77     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
  78 };
  79
  80 static const uint8_t sf_delta_lut[78] = {
  81      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  82      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
  83     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
  84     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
  85     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
  86 };
  87
  88 static const uint8_t quant_lut[230] = {
  89      0,
  90
  91      0,  1,  2,
  92
  93      0,  1,  2,  3,  4,  5,  6,
  94
  95      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
  96     12, 13, 13, 13, 14,
  97
  98      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
  99      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
 100     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
 101     30,
 102
 103      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
 104      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
 105     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
 106     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
 107     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
 108     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
 109     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
 110     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
 111     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
 112     61, 61, 61, 61, 62,
 113 };
 114
 115 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
 116 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
 117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
 118
 119 static void apply_mdct(NellyMoserEncodeContext *s)
 120 {
 121     float *in0 = s->buf;
 122     float *in1 = s->buf + NELLY_BUF_LEN;
 123     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
 124
 125     s->fdsp->vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
 126     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
 127     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
 128
 129     s->fdsp->vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
 130     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
 131     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
 132 }
 133
 134 static av_cold int encode_end(AVCodecContext *avctx)
 135 {
 136     NellyMoserEncodeContext *s = avctx->priv_data;
 137
 138     ff_mdct_end(&s->mdct_ctx);
 139
 140     if (s->avctx->trellis) {
 141         av_freep(&s->opt);
 142         av_freep(&s->path);
 143     }
 144     ff_af_queue_close(&s->afq);
 145     av_freep(&s->fdsp);
 146
 147     return 0;
 148 }
 149
 150 static av_cold int encode_init(AVCodecContext *avctx)
 151 {
 152     NellyMoserEncodeContext *s = avctx->priv_data;
 153     int i, ret;
 154
 155     if (avctx->channels != 1) {
 156         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
 157         return AVERROR(EINVAL);
 158     }
 159
 160     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
 161         avctx->sample_rate != 11025 &&
 162         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
 163         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 164         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
 165         return AVERROR(EINVAL);
 166     }
 167
 168     avctx->frame_size = NELLY_SAMPLES;
 169     avctx->initial_padding = NELLY_BUF_LEN;
 170     ff_af_queue_init(avctx, &s->afq);
 171     s->avctx = avctx;
 172     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
 173         goto error;
 174     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 175     if (!s->fdsp) {
 176         ret = AVERROR(ENOMEM);
 177         goto error;
 178     }
 179
 180     /* Generate overlap window */
 181     ff_init_ff_sine_windows(7);
 182     for (i = 0; i < POW_TABLE_SIZE; i++)
 183         pow_table[i] = pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
 184
 185     if (s->avctx->trellis) {
 186         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
 187         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
 188         if (!s->opt || !s->path) {
 189             ret = AVERROR(ENOMEM);
 190             goto error;
 191         }
 192     }
 193
 194     return 0;
 195 error:
 196     encode_end(avctx);
 197     return ret;
 198 }
 199
 200 #define find_best(val, table, LUT, LUT_add, LUT_size) \
 201     best_idx = \
 202         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
 203     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
 204         best_idx++;
 205
 206 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 207 {
 208     int band, best_idx, power_idx = 0;
 209     float power_candidate;
 210
 211     //base exponent
 212     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
 213     idx_table[0] = best_idx;
 214     power_idx = ff_nelly_init_table[best_idx];
 215
 216     for (band = 1; band < NELLY_BANDS; band++) {
 217         power_candidate = cand[band] - power_idx;
 218         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
 219         idx_table[band] = best_idx;
 220         power_idx += ff_nelly_delta_table[best_idx];
 221     }
 222 }
 223
 224 static inline float distance(float x, float y, int band)
 225 {
 226     //return pow(fabs(x-y), 2.0);
 227     float tmp = x - y;
 228     return tmp * tmp;
 229 }
 230
 231 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 232 {
 233     int i, j, band, best_idx;
 234     float power_candidate, best_val;
 235
 236     float  (*opt )[OPT_SIZE] = s->opt ;
 237     uint8_t(*path)[OPT_SIZE] = s->path;
 238
 239     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
 240         opt[0][i] = INFINITY;
 241     }
 242
 243     for (i = 0; i < 64; i++) {
 244         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
 245         path[0][ff_nelly_init_table[i]] = i;
 246     }
 247
 248     for (band = 1; band < NELLY_BANDS; band++) {
 249         int q, c = 0;
 250         float tmp;
 251         int idx_min, idx_max, idx;
 252         power_candidate = cand[band];
 253         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
 254             idx_min = FFMAX(0, cand[band] - q);
 255             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
 256             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
 257                 if ( isinf(opt[band - 1][i]) )
 258                     continue;
 259                 for (j = 0; j < 32; j++) {
 260                     idx = i + ff_nelly_delta_table[j];
 261                     if (idx > idx_max)
 262                         break;
 263                     if (idx >= idx_min) {
 264                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
 265                         if (opt[band][idx] > tmp) {
 266                             opt[band][idx] = tmp;
 267                             path[band][idx] = j;
 268                             c = 1;
 269                         }
 270                     }
 271                 }
 272             }
 273         }
 274         av_assert1(c); //FIXME
 275     }
 276
 277     best_val = INFINITY;
 278     best_idx = -1;
 279     band = NELLY_BANDS - 1;
 280     for (i = 0; i < OPT_SIZE; i++) {
 281         if (best_val > opt[band][i]) {
 282             best_val = opt[band][i];
 283             best_idx = i;
 284         }
 285     }
 286     for (band = NELLY_BANDS - 1; band >= 0; band--) {
 287         idx_table[band] = path[band][best_idx];
 288         if (band) {
 289             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
 290         }
 291     }
 292 }
 293
 294 /**
 295  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
 296  *  @param s               encoder context
 297  *  @param output          output buffer
 298  *  @param output_size     size of output buffer
 299  */
 300 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
 301 {
 302     PutBitContext pb;
 303     int i, j, band, block, best_idx, power_idx = 0;
 304     float power_val, coeff, coeff_sum;
 305     float pows[NELLY_FILL_LEN];
 306     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
 307     float cand[NELLY_BANDS];
 308
 309     apply_mdct(s);
 310
 311     init_put_bits(&pb, output, output_size);
 312
 313     i = 0;
 314     for (band = 0; band < NELLY_BANDS; band++) {
 315         coeff_sum = 0;
 316         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 317             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
 318                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
 319         }
 320         cand[band] =
 321             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
 322     }
 323
 324     if (s->avctx->trellis) {
 325         get_exponent_dynamic(s, cand, idx_table);
 326     } else {
 327         get_exponent_greedy(s, cand, idx_table);
 328     }
 329
 330     i = 0;
 331     for (band = 0; band < NELLY_BANDS; band++) {
 332         if (band) {
 333             power_idx += ff_nelly_delta_table[idx_table[band]];
 334             put_bits(&pb, 5, idx_table[band]);
 335         } else {
 336             power_idx = ff_nelly_init_table[idx_table[0]];
 337             put_bits(&pb, 6, idx_table[0]);
 338         }
 339         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
 340         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 341             s->mdct_out[i] *= power_val;
 342             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
 343             pows[i] = power_idx;
 344         }
 345     }
 346
 347     ff_nelly_get_sample_bits(pows, bits);
 348
 349     for (block = 0; block < 2; block++) {
 350         for (i = 0; i < NELLY_FILL_LEN; i++) {
 351             if (bits[i] > 0) {
 352                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
 353                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
 354                 best_idx =
 355                     quant_lut[av_clip (
 356                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
 357                             quant_lut_offset[bits[i]],
 358                             quant_lut_offset[bits[i]+1] - 1
 359                             )];
 360                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
 361                     best_idx++;
 362
 363                 put_bits(&pb, bits[i], best_idx);
 364             }
 365         }
 366         if (!block)
 367             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
 368     }
 369
 370     flush_put_bits(&pb);
 371     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
 372 }
 373
 374 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 375                         const AVFrame *frame, int *got_packet_ptr)
 376 {
 377     NellyMoserEncodeContext *s = avctx->priv_data;
 378     int ret;
 379
 380     if (s->last_frame)
 381         return 0;
 382
 383     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
 384     if (frame) {
 385         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
 386                frame->nb_samples * sizeof(*s->buf));
 387         if (frame->nb_samples < NELLY_SAMPLES) {
 388             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
 389                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
 390             if (frame->nb_samples >= NELLY_BUF_LEN)
 391                 s->last_frame = 1;
 392         }
 393         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 394             return ret;
 395     } else {
 396         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
 397         s->last_frame = 1;
 398     }
 399
 400     if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
 401         return ret;
 402     encode_block(s, avpkt->data, avpkt->size);
 403
 404     /* Get the next frame pts/duration */
 405     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 406                        &avpkt->duration);
 407
 408     *got_packet_ptr = 1;
 409     return 0;
 410 }
 411
 412 AVCodec ff_nellymoser_encoder = {
 413     .name           = "nellymoser",
 414     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
 415     .type           = AVMEDIA_TYPE_AUDIO,
 416     .id             = AV_CODEC_ID_NELLYMOSER,
 417     .priv_data_size = sizeof(NellyMoserEncodeContext),
 418     .init           = encode_init,
 419     .encode2        = encode_frame,
 420     .close          = encode_end,
 421     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
 422     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
 423                                                      AV_SAMPLE_FMT_NONE },
 424 };