git.sesse.net Git - ffmpeg/blob - libavcodec/nellymoserenc.c

   1 /*
   2  * Nellymoser encoder
   3  * This code is developed as part of Google Summer of Code 2008 Program.
   4  *
   5  * Copyright (c) 2008 Bartlomiej Wolowiec
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Nellymoser encoder
  27  * by Bartlomiej Wolowiec
  28  *
  29  * Generic codec information: libavcodec/nellymoserdec.c
  30  *
  31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
  32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  33  *
  34  * for more information about nellymoser format, visit:
  35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
  36  */
  37
  38 #include "libavutil/common.h"
  39 #include "libavutil/float_dsp.h"
  40 #include "libavutil/mathematics.h"
  41
  42 #include "audio_frame_queue.h"
  43 #include "avcodec.h"
  44 #include "fft.h"
  45 #include "internal.h"
  46 #include "nellymoser.h"
  47 #include "sinewin.h"
  48
  49 #define BITSTREAM_WRITER_LE
  50 #include "put_bits.h"
  51
  52 #define POW_TABLE_SIZE (1<<11)
  53 #define POW_TABLE_OFFSET 3
  54 #define OPT_SIZE ((1<<15) + 3000)
  55
  56 typedef struct NellyMoserEncodeContext {
  57     AVCodecContext  *avctx;
  58     int             last_frame;
  59     AVFloatDSPContext *fdsp;
  60     FFTContext      mdct_ctx;
  61     AudioFrameQueue afq;
  62     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
  63     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
  64     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
  65     float           (*opt )[OPT_SIZE];
  66     uint8_t         (*path)[OPT_SIZE];
  67 } NellyMoserEncodeContext;
  68
  69 static float pow_table[POW_TABLE_SIZE];     ///< pow(2, -i / 2048.0 - 3.0);
  70
  71 static const uint8_t sf_lut[96] = {
  72      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  73      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
  74     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
  75     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
  76     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
  77     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
  78 };
  79
  80 static const uint8_t sf_delta_lut[78] = {
  81      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  82      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
  83     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
  84     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
  85     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
  86 };
  87
  88 static const uint8_t quant_lut[230] = {
  89      0,
  90
  91      0,  1,  2,
  92
  93      0,  1,  2,  3,  4,  5,  6,
  94
  95      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
  96     12, 13, 13, 13, 14,
  97
  98      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
  99      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
 100     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
 101     30,
 102
 103      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
 104      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
 105     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
 106     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
 107     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
 108     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
 109     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
 110     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
 111     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
 112     61, 61, 61, 61, 62,
 113 };
 114
 115 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
 116 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
 117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
 118
 119 static void apply_mdct(NellyMoserEncodeContext *s)
 120 {
 121     float *in0 = s->buf;
 122     float *in1 = s->buf + NELLY_BUF_LEN;
 123     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
 124
 125     s->fdsp->vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
 126     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
 127     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
 128
 129     s->fdsp->vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
 130     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
 131     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
 132 }
 133
 134 static av_cold int encode_end(AVCodecContext *avctx)
 135 {
 136     NellyMoserEncodeContext *s = avctx->priv_data;
 137
 138     ff_mdct_end(&s->mdct_ctx);
 139
 140     if (s->avctx->trellis) {
 141         av_freep(&s->opt);
 142         av_freep(&s->path);
 143     }
 144     ff_af_queue_close(&s->afq);
 145     av_freep(&s->fdsp);
 146
 147     return 0;
 148 }
 149
 150 static av_cold int encode_init(AVCodecContext *avctx)
 151 {
 152     NellyMoserEncodeContext *s = avctx->priv_data;
 153     int i, ret;
 154
 155     if (avctx->channels != 1) {
 156         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
 157         return AVERROR(EINVAL);
 158     }
 159
 160     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
 161         avctx->sample_rate != 11025 &&
 162         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
 163         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 164         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
 165         return AVERROR(EINVAL);
 166     }
 167
 168     avctx->frame_size = NELLY_SAMPLES;
 169     avctx->initial_padding = NELLY_BUF_LEN;
 170     ff_af_queue_init(avctx, &s->afq);
 171     s->avctx = avctx;
 172     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
 173         return ret;
 174     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 175     if (!s->fdsp)
 176         return AVERROR(ENOMEM);
 177
 178     /* Generate overlap window */
 179     ff_init_ff_sine_windows(7);
 180     /* faster way of doing
 181     for (i = 0; i < POW_TABLE_SIZE; i++)
 182        pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
 183     pow_table[0] = 1;
 184     pow_table[1024] = M_SQRT1_2;
 185     for (i = 1; i < 513; i++) {
 186         double tmp = exp2(-i / 2048.0);
 187         pow_table[i] = tmp;
 188         pow_table[1024-i] = M_SQRT1_2 / tmp;
 189         pow_table[1024+i] = tmp * M_SQRT1_2;
 190         pow_table[2048-i] = 0.5 / tmp;
 191     }
 192
 193     if (s->avctx->trellis) {
 194         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
 195         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
 196         if (!s->opt || !s->path)
 197             return AVERROR(ENOMEM);
 198     }
 199
 200     return 0;
 201 }
 202
 203 #define find_best(val, table, LUT, LUT_add, LUT_size) \
 204     best_idx = \
 205         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
 206     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
 207         best_idx++;
 208
 209 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 210 {
 211     int band, best_idx, power_idx = 0;
 212     float power_candidate;
 213
 214     //base exponent
 215     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
 216     idx_table[0] = best_idx;
 217     power_idx = ff_nelly_init_table[best_idx];
 218
 219     for (band = 1; band < NELLY_BANDS; band++) {
 220         power_candidate = cand[band] - power_idx;
 221         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
 222         idx_table[band] = best_idx;
 223         power_idx += ff_nelly_delta_table[best_idx];
 224     }
 225 }
 226
 227 static inline float distance(float x, float y, int band)
 228 {
 229     //return pow(fabs(x-y), 2.0);
 230     float tmp = x - y;
 231     return tmp * tmp;
 232 }
 233
 234 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 235 {
 236     int i, j, band, best_idx;
 237     float power_candidate, best_val;
 238
 239     float  (*opt )[OPT_SIZE] = s->opt ;
 240     uint8_t(*path)[OPT_SIZE] = s->path;
 241
 242     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
 243         opt[0][i] = INFINITY;
 244     }
 245
 246     for (i = 0; i < 64; i++) {
 247         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
 248         path[0][ff_nelly_init_table[i]] = i;
 249     }
 250
 251     for (band = 1; band < NELLY_BANDS; band++) {
 252         int q, c = 0;
 253         float tmp;
 254         int idx_min, idx_max, idx;
 255         power_candidate = cand[band];
 256         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
 257             idx_min = FFMAX(0, cand[band] - q);
 258             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
 259             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
 260                 if ( isinf(opt[band - 1][i]) )
 261                     continue;
 262                 for (j = 0; j < 32; j++) {
 263                     idx = i + ff_nelly_delta_table[j];
 264                     if (idx > idx_max)
 265                         break;
 266                     if (idx >= idx_min) {
 267                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
 268                         if (opt[band][idx] > tmp) {
 269                             opt[band][idx] = tmp;
 270                             path[band][idx] = j;
 271                             c = 1;
 272                         }
 273                     }
 274                 }
 275             }
 276         }
 277         av_assert1(c); //FIXME
 278     }
 279
 280     best_val = INFINITY;
 281     best_idx = -1;
 282     band = NELLY_BANDS - 1;
 283     for (i = 0; i < OPT_SIZE; i++) {
 284         if (best_val > opt[band][i]) {
 285             best_val = opt[band][i];
 286             best_idx = i;
 287         }
 288     }
 289     for (band = NELLY_BANDS - 1; band >= 0; band--) {
 290         idx_table[band] = path[band][best_idx];
 291         if (band) {
 292             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
 293         }
 294     }
 295 }
 296
 297 /**
 298  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
 299  *  @param s               encoder context
 300  *  @param output          output buffer
 301  *  @param output_size     size of output buffer
 302  */
 303 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
 304 {
 305     PutBitContext pb;
 306     int i, j, band, block, best_idx, power_idx = 0;
 307     float power_val, coeff, coeff_sum;
 308     float pows[NELLY_FILL_LEN];
 309     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
 310     float cand[NELLY_BANDS];
 311
 312     apply_mdct(s);
 313
 314     init_put_bits(&pb, output, output_size);
 315
 316     i = 0;
 317     for (band = 0; band < NELLY_BANDS; band++) {
 318         coeff_sum = 0;
 319         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 320             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
 321                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
 322         }
 323         cand[band] =
 324             log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
 325     }
 326
 327     if (s->avctx->trellis) {
 328         get_exponent_dynamic(s, cand, idx_table);
 329     } else {
 330         get_exponent_greedy(s, cand, idx_table);
 331     }
 332
 333     i = 0;
 334     for (band = 0; band < NELLY_BANDS; band++) {
 335         if (band) {
 336             power_idx += ff_nelly_delta_table[idx_table[band]];
 337             put_bits(&pb, 5, idx_table[band]);
 338         } else {
 339             power_idx = ff_nelly_init_table[idx_table[0]];
 340             put_bits(&pb, 6, idx_table[0]);
 341         }
 342         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
 343         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 344             s->mdct_out[i] *= power_val;
 345             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
 346             pows[i] = power_idx;
 347         }
 348     }
 349
 350     ff_nelly_get_sample_bits(pows, bits);
 351
 352     for (block = 0; block < 2; block++) {
 353         for (i = 0; i < NELLY_FILL_LEN; i++) {
 354             if (bits[i] > 0) {
 355                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
 356                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
 357                 best_idx =
 358                     quant_lut[av_clip (
 359                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
 360                             quant_lut_offset[bits[i]],
 361                             quant_lut_offset[bits[i]+1] - 1
 362                             )];
 363                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
 364                     best_idx++;
 365
 366                 put_bits(&pb, bits[i], best_idx);
 367             }
 368         }
 369         if (!block)
 370             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
 371     }
 372
 373     flush_put_bits(&pb);
 374     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
 375 }
 376
 377 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 378                         const AVFrame *frame, int *got_packet_ptr)
 379 {
 380     NellyMoserEncodeContext *s = avctx->priv_data;
 381     int ret;
 382
 383     if (s->last_frame)
 384         return 0;
 385
 386     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
 387     if (frame) {
 388         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
 389                frame->nb_samples * sizeof(*s->buf));
 390         if (frame->nb_samples < NELLY_SAMPLES) {
 391             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
 392                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
 393             if (frame->nb_samples >= NELLY_BUF_LEN)
 394                 s->last_frame = 1;
 395         }
 396         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 397             return ret;
 398     } else {
 399         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
 400         s->last_frame = 1;
 401     }
 402
 403     if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
 404         return ret;
 405     encode_block(s, avpkt->data, avpkt->size);
 406
 407     /* Get the next frame pts/duration */
 408     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 409                        &avpkt->duration);
 410
 411     *got_packet_ptr = 1;
 412     return 0;
 413 }
 414
 415 AVCodec ff_nellymoser_encoder = {
 416     .name           = "nellymoser",
 417     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
 418     .type           = AVMEDIA_TYPE_AUDIO,
 419     .id             = AV_CODEC_ID_NELLYMOSER,
 420     .priv_data_size = sizeof(NellyMoserEncodeContext),
 421     .init           = encode_init,
 422     .encode2        = encode_frame,
 423     .close          = encode_end,
 424     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
 425     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
 426                                                      AV_SAMPLE_FMT_NONE },
 427     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 428 };