git.sesse.net Git - ffmpeg/blob - libavcodec/nellymoserenc.c

   1 /*
   2  * Nellymoser encoder
   3  * This code is developed as part of Google Summer of Code 2008 Program.
   4  *
   5  * Copyright (c) 2008 Bartlomiej Wolowiec
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Nellymoser encoder
  27  * by Bartlomiej Wolowiec
  28  *
  29  * Generic codec information: libavcodec/nellymoserdec.c
  30  *
  31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
  32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  33  *
  34  * for more information about nellymoser format, visit:
  35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
  36  */
  37
  38 #include "libavutil/common.h"
  39 #include "libavutil/float_dsp.h"
  40 #include "libavutil/mathematics.h"
  41
  42 #include "audio_frame_queue.h"
  43 #include "avcodec.h"
  44 #include "fft.h"
  45 #include "internal.h"
  46 #include "nellymoser.h"
  47 #include "sinewin.h"
  48
  49 #define BITSTREAM_WRITER_LE
  50 #include "put_bits.h"
  51
  52 #define POW_TABLE_SIZE (1<<11)
  53 #define POW_TABLE_OFFSET 3
  54 #define OPT_SIZE ((1<<15) + 3000)
  55
  56 typedef struct NellyMoserEncodeContext {
  57     AVCodecContext  *avctx;
  58     int             last_frame;
  59     AVFloatDSPContext fdsp;
  60     FFTContext      mdct_ctx;
  61     AudioFrameQueue afq;
  62     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
  63     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
  64     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
  65     float           (*opt )[OPT_SIZE];
  66     uint8_t         (*path)[OPT_SIZE];
  67 } NellyMoserEncodeContext;
  68
  69 static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
  70
  71 static const uint8_t sf_lut[96] = {
  72      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  73      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
  74     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
  75     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
  76     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
  77     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
  78 };
  79
  80 static const uint8_t sf_delta_lut[78] = {
  81      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  82      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
  83     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
  84     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
  85     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
  86 };
  87
  88 static const uint8_t quant_lut[230] = {
  89      0,
  90
  91      0,  1,  2,
  92
  93      0,  1,  2,  3,  4,  5,  6,
  94
  95      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
  96     12, 13, 13, 13, 14,
  97
  98      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
  99      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
 100     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
 101     30,
 102
 103      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
 104      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
 105     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
 106     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
 107     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
 108     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
 109     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
 110     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
 111     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
 112     61, 61, 61, 61, 62,
 113 };
 114
 115 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
 116 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
 117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
 118
 119 static void apply_mdct(NellyMoserEncodeContext *s)
 120 {
 121     float *in0 = s->buf;
 122     float *in1 = s->buf + NELLY_BUF_LEN;
 123     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
 124
 125     s->fdsp.vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
 126     s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
 127     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
 128
 129     s->fdsp.vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
 130     s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
 131     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
 132 }
 133
 134 static av_cold int encode_end(AVCodecContext *avctx)
 135 {
 136     NellyMoserEncodeContext *s = avctx->priv_data;
 137
 138     ff_mdct_end(&s->mdct_ctx);
 139
 140     if (s->avctx->trellis) {
 141         av_freep(&s->opt);
 142         av_freep(&s->path);
 143     }
 144     ff_af_queue_close(&s->afq);
 145
 146     return 0;
 147 }
 148
 149 static av_cold int encode_init(AVCodecContext *avctx)
 150 {
 151     NellyMoserEncodeContext *s = avctx->priv_data;
 152     int i, ret;
 153
 154     if (avctx->channels != 1) {
 155         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
 156         return AVERROR(EINVAL);
 157     }
 158
 159     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
 160         avctx->sample_rate != 11025 &&
 161         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
 162         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 163         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
 164         return AVERROR(EINVAL);
 165     }
 166
 167     avctx->frame_size = NELLY_SAMPLES;
 168     avctx->initial_padding = NELLY_BUF_LEN;
 169     ff_af_queue_init(avctx, &s->afq);
 170     s->avctx = avctx;
 171     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
 172         goto error;
 173     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 174
 175     /* Generate overlap window */
 176     ff_init_ff_sine_windows(7);
 177     for (i = 0; i < POW_TABLE_SIZE; i++)
 178         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
 179
 180     if (s->avctx->trellis) {
 181         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
 182         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
 183         if (!s->opt || !s->path) {
 184             ret = AVERROR(ENOMEM);
 185             goto error;
 186         }
 187     }
 188
 189     return 0;
 190 error:
 191     encode_end(avctx);
 192     return ret;
 193 }
 194
 195 #define find_best(val, table, LUT, LUT_add, LUT_size) \
 196     best_idx = \
 197         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
 198     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
 199         best_idx++;
 200
 201 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 202 {
 203     int band, best_idx, power_idx = 0;
 204     float power_candidate;
 205
 206     //base exponent
 207     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
 208     idx_table[0] = best_idx;
 209     power_idx = ff_nelly_init_table[best_idx];
 210
 211     for (band = 1; band < NELLY_BANDS; band++) {
 212         power_candidate = cand[band] - power_idx;
 213         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
 214         idx_table[band] = best_idx;
 215         power_idx += ff_nelly_delta_table[best_idx];
 216     }
 217 }
 218
 219 static inline float distance(float x, float y, int band)
 220 {
 221     //return pow(fabs(x-y), 2.0);
 222     float tmp = x - y;
 223     return tmp * tmp;
 224 }
 225
 226 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 227 {
 228     int i, j, band, best_idx;
 229     float power_candidate, best_val;
 230
 231     float  (*opt )[OPT_SIZE] = s->opt ;
 232     uint8_t(*path)[OPT_SIZE] = s->path;
 233
 234     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
 235         opt[0][i] = INFINITY;
 236     }
 237
 238     for (i = 0; i < 64; i++) {
 239         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
 240         path[0][ff_nelly_init_table[i]] = i;
 241     }
 242
 243     for (band = 1; band < NELLY_BANDS; band++) {
 244         int q, c = 0;
 245         float tmp;
 246         int idx_min, idx_max, idx;
 247         power_candidate = cand[band];
 248         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
 249             idx_min = FFMAX(0, cand[band] - q);
 250             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
 251             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
 252                 if ( isinf(opt[band - 1][i]) )
 253                     continue;
 254                 for (j = 0; j < 32; j++) {
 255                     idx = i + ff_nelly_delta_table[j];
 256                     if (idx > idx_max)
 257                         break;
 258                     if (idx >= idx_min) {
 259                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
 260                         if (opt[band][idx] > tmp) {
 261                             opt[band][idx] = tmp;
 262                             path[band][idx] = j;
 263                             c = 1;
 264                         }
 265                     }
 266                 }
 267             }
 268         }
 269         assert(c); //FIXME
 270     }
 271
 272     best_val = INFINITY;
 273     best_idx = -1;
 274     band = NELLY_BANDS - 1;
 275     for (i = 0; i < OPT_SIZE; i++) {
 276         if (best_val > opt[band][i]) {
 277             best_val = opt[band][i];
 278             best_idx = i;
 279         }
 280     }
 281     for (band = NELLY_BANDS - 1; band >= 0; band--) {
 282         idx_table[band] = path[band][best_idx];
 283         if (band) {
 284             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
 285         }
 286     }
 287 }
 288
 289 /**
 290  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
 291  *  @param s               encoder context
 292  *  @param output          output buffer
 293  *  @param output_size     size of output buffer
 294  */
 295 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
 296 {
 297     PutBitContext pb;
 298     int i, j, band, block, best_idx, power_idx = 0;
 299     float power_val, coeff, coeff_sum;
 300     float pows[NELLY_FILL_LEN];
 301     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
 302     float cand[NELLY_BANDS];
 303
 304     apply_mdct(s);
 305
 306     init_put_bits(&pb, output, output_size * 8);
 307
 308     i = 0;
 309     for (band = 0; band < NELLY_BANDS; band++) {
 310         coeff_sum = 0;
 311         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 312             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
 313                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
 314         }
 315         cand[band] =
 316             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
 317     }
 318
 319     if (s->avctx->trellis) {
 320         get_exponent_dynamic(s, cand, idx_table);
 321     } else {
 322         get_exponent_greedy(s, cand, idx_table);
 323     }
 324
 325     i = 0;
 326     for (band = 0; band < NELLY_BANDS; band++) {
 327         if (band) {
 328             power_idx += ff_nelly_delta_table[idx_table[band]];
 329             put_bits(&pb, 5, idx_table[band]);
 330         } else {
 331             power_idx = ff_nelly_init_table[idx_table[0]];
 332             put_bits(&pb, 6, idx_table[0]);
 333         }
 334         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
 335         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 336             s->mdct_out[i] *= power_val;
 337             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
 338             pows[i] = power_idx;
 339         }
 340     }
 341
 342     ff_nelly_get_sample_bits(pows, bits);
 343
 344     for (block = 0; block < 2; block++) {
 345         for (i = 0; i < NELLY_FILL_LEN; i++) {
 346             if (bits[i] > 0) {
 347                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
 348                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
 349                 best_idx =
 350                     quant_lut[av_clip (
 351                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
 352                             quant_lut_offset[bits[i]],
 353                             quant_lut_offset[bits[i]+1] - 1
 354                             )];
 355                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
 356                     best_idx++;
 357
 358                 put_bits(&pb, bits[i], best_idx);
 359             }
 360         }
 361         if (!block)
 362             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
 363     }
 364
 365     flush_put_bits(&pb);
 366     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
 367 }
 368
 369 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 370                         const AVFrame *frame, int *got_packet_ptr)
 371 {
 372     NellyMoserEncodeContext *s = avctx->priv_data;
 373     int ret;
 374
 375     if (s->last_frame)
 376         return 0;
 377
 378     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
 379     if (frame) {
 380         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
 381                frame->nb_samples * sizeof(*s->buf));
 382         if (frame->nb_samples < NELLY_SAMPLES) {
 383             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
 384                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
 385             if (frame->nb_samples >= NELLY_BUF_LEN)
 386                 s->last_frame = 1;
 387         }
 388         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 389             return ret;
 390     } else {
 391         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
 392         s->last_frame = 1;
 393     }
 394
 395     if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
 396         return ret;
 397     encode_block(s, avpkt->data, avpkt->size);
 398
 399     /* Get the next frame pts/duration */
 400     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 401                        &avpkt->duration);
 402
 403     *got_packet_ptr = 1;
 404     return 0;
 405 }
 406
 407 AVCodec ff_nellymoser_encoder = {
 408     .name           = "nellymoser",
 409     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
 410     .type           = AVMEDIA_TYPE_AUDIO,
 411     .id             = AV_CODEC_ID_NELLYMOSER,
 412     .priv_data_size = sizeof(NellyMoserEncodeContext),
 413     .init           = encode_init,
 414     .encode2        = encode_frame,
 415     .close          = encode_end,
 416     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
 417     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
 418                                                      AV_SAMPLE_FMT_NONE },
 419 };