git.sesse.net Git - ffmpeg/blob - libavcodec/nellymoserenc.c

   1 /*
   2  * Nellymoser encoder
   3  * This code is developed as part of Google Summer of Code 2008 Program.
   4  *
   5  * Copyright (c) 2008 Bartlomiej Wolowiec
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Nellymoser encoder
  27  * by Bartlomiej Wolowiec
  28  *
  29  * Generic codec information: libavcodec/nellymoserdec.c
  30  *
  31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
  32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  33  *
  34  * for more information about nellymoser format, visit:
  35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
  36  */
  37
  38 #include "libavutil/float_dsp.h"
  39 #include "libavutil/mathematics.h"
  40 #include "nellymoser.h"
  41 #include "avcodec.h"
  42 #include "audio_frame_queue.h"
  43 #include "dsputil.h"
  44 #include "fft.h"
  45 #include "internal.h"
  46 #include "sinewin.h"
  47
  48 #define BITSTREAM_WRITER_LE
  49 #include "put_bits.h"
  50
  51 #define POW_TABLE_SIZE (1<<11)
  52 #define POW_TABLE_OFFSET 3
  53 #define OPT_SIZE ((1<<15) + 3000)
  54
  55 typedef struct NellyMoserEncodeContext {
  56     AVCodecContext  *avctx;
  57     int             last_frame;
  58     AVFloatDSPContext fdsp;
  59     FFTContext      mdct_ctx;
  60     AudioFrameQueue afq;
  61     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
  62     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
  63     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
  64     float           (*opt )[OPT_SIZE];
  65     uint8_t         (*path)[OPT_SIZE];
  66 } NellyMoserEncodeContext;
  67
  68 static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
  69
  70 static const uint8_t sf_lut[96] = {
  71      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  72      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
  73     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
  74     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
  75     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
  76     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
  77 };
  78
  79 static const uint8_t sf_delta_lut[78] = {
  80      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  81      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
  82     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
  83     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
  84     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
  85 };
  86
  87 static const uint8_t quant_lut[230] = {
  88      0,
  89
  90      0,  1,  2,
  91
  92      0,  1,  2,  3,  4,  5,  6,
  93
  94      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
  95     12, 13, 13, 13, 14,
  96
  97      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
  98      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
  99     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
 100     30,
 101
 102      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
 103      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
 104     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
 105     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
 106     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
 107     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
 108     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
 109     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
 110     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
 111     61, 61, 61, 61, 62,
 112 };
 113
 114 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
 115 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
 116 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
 117
 118 static void apply_mdct(NellyMoserEncodeContext *s)
 119 {
 120     float *in0 = s->buf;
 121     float *in1 = s->buf + NELLY_BUF_LEN;
 122     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
 123
 124     s->fdsp.vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
 125     s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
 126     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
 127
 128     s->fdsp.vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
 129     s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
 130     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
 131 }
 132
 133 static av_cold int encode_end(AVCodecContext *avctx)
 134 {
 135     NellyMoserEncodeContext *s = avctx->priv_data;
 136
 137     ff_mdct_end(&s->mdct_ctx);
 138
 139     if (s->avctx->trellis) {
 140         av_free(s->opt);
 141         av_free(s->path);
 142     }
 143     ff_af_queue_close(&s->afq);
 144 #if FF_API_OLD_ENCODE_AUDIO
 145     av_freep(&avctx->coded_frame);
 146 #endif
 147
 148     return 0;
 149 }
 150
 151 static av_cold int encode_init(AVCodecContext *avctx)
 152 {
 153     NellyMoserEncodeContext *s = avctx->priv_data;
 154     int i, ret;
 155
 156     if (avctx->channels != 1) {
 157         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
 158         return AVERROR(EINVAL);
 159     }
 160
 161     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
 162         avctx->sample_rate != 11025 &&
 163         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
 164         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 165         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
 166         return AVERROR(EINVAL);
 167     }
 168
 169     avctx->frame_size = NELLY_SAMPLES;
 170     avctx->delay      = NELLY_BUF_LEN;
 171     ff_af_queue_init(avctx, &s->afq);
 172     s->avctx = avctx;
 173     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
 174         goto error;
 175     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 176
 177     /* Generate overlap window */
 178     ff_init_ff_sine_windows(7);
 179     for (i = 0; i < POW_TABLE_SIZE; i++)
 180         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
 181
 182     if (s->avctx->trellis) {
 183         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
 184         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
 185         if (!s->opt || !s->path) {
 186             ret = AVERROR(ENOMEM);
 187             goto error;
 188         }
 189     }
 190
 191 #if FF_API_OLD_ENCODE_AUDIO
 192     avctx->coded_frame = avcodec_alloc_frame();
 193     if (!avctx->coded_frame) {
 194         ret = AVERROR(ENOMEM);
 195         goto error;
 196     }
 197 #endif
 198
 199     return 0;
 200 error:
 201     encode_end(avctx);
 202     return ret;
 203 }
 204
 205 #define find_best(val, table, LUT, LUT_add, LUT_size) \
 206     best_idx = \
 207         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
 208     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
 209         best_idx++;
 210
 211 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 212 {
 213     int band, best_idx, power_idx = 0;
 214     float power_candidate;
 215
 216     //base exponent
 217     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
 218     idx_table[0] = best_idx;
 219     power_idx = ff_nelly_init_table[best_idx];
 220
 221     for (band = 1; band < NELLY_BANDS; band++) {
 222         power_candidate = cand[band] - power_idx;
 223         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
 224         idx_table[band] = best_idx;
 225         power_idx += ff_nelly_delta_table[best_idx];
 226     }
 227 }
 228
 229 static inline float distance(float x, float y, int band)
 230 {
 231     //return pow(fabs(x-y), 2.0);
 232     float tmp = x - y;
 233     return tmp * tmp;
 234 }
 235
 236 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 237 {
 238     int i, j, band, best_idx;
 239     float power_candidate, best_val;
 240
 241     float  (*opt )[OPT_SIZE] = s->opt ;
 242     uint8_t(*path)[OPT_SIZE] = s->path;
 243
 244     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
 245         opt[0][i] = INFINITY;
 246     }
 247
 248     for (i = 0; i < 64; i++) {
 249         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
 250         path[0][ff_nelly_init_table[i]] = i;
 251     }
 252
 253     for (band = 1; band < NELLY_BANDS; band++) {
 254         int q, c = 0;
 255         float tmp;
 256         int idx_min, idx_max, idx;
 257         power_candidate = cand[band];
 258         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
 259             idx_min = FFMAX(0, cand[band] - q);
 260             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
 261             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
 262                 if ( isinf(opt[band - 1][i]) )
 263                     continue;
 264                 for (j = 0; j < 32; j++) {
 265                     idx = i + ff_nelly_delta_table[j];
 266                     if (idx > idx_max)
 267                         break;
 268                     if (idx >= idx_min) {
 269                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
 270                         if (opt[band][idx] > tmp) {
 271                             opt[band][idx] = tmp;
 272                             path[band][idx] = j;
 273                             c = 1;
 274                         }
 275                     }
 276                 }
 277             }
 278         }
 279         assert(c); //FIXME
 280     }
 281
 282     best_val = INFINITY;
 283     best_idx = -1;
 284     band = NELLY_BANDS - 1;
 285     for (i = 0; i < OPT_SIZE; i++) {
 286         if (best_val > opt[band][i]) {
 287             best_val = opt[band][i];
 288             best_idx = i;
 289         }
 290     }
 291     for (band = NELLY_BANDS - 1; band >= 0; band--) {
 292         idx_table[band] = path[band][best_idx];
 293         if (band) {
 294             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
 295         }
 296     }
 297 }
 298
 299 /**
 300  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
 301  *  @param s               encoder context
 302  *  @param output          output buffer
 303  *  @param output_size     size of output buffer
 304  */
 305 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
 306 {
 307     PutBitContext pb;
 308     int i, j, band, block, best_idx, power_idx = 0;
 309     float power_val, coeff, coeff_sum;
 310     float pows[NELLY_FILL_LEN];
 311     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
 312     float cand[NELLY_BANDS];
 313
 314     apply_mdct(s);
 315
 316     init_put_bits(&pb, output, output_size * 8);
 317
 318     i = 0;
 319     for (band = 0; band < NELLY_BANDS; band++) {
 320         coeff_sum = 0;
 321         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 322             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
 323                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
 324         }
 325         cand[band] =
 326             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
 327     }
 328
 329     if (s->avctx->trellis) {
 330         get_exponent_dynamic(s, cand, idx_table);
 331     } else {
 332         get_exponent_greedy(s, cand, idx_table);
 333     }
 334
 335     i = 0;
 336     for (band = 0; band < NELLY_BANDS; band++) {
 337         if (band) {
 338             power_idx += ff_nelly_delta_table[idx_table[band]];
 339             put_bits(&pb, 5, idx_table[band]);
 340         } else {
 341             power_idx = ff_nelly_init_table[idx_table[0]];
 342             put_bits(&pb, 6, idx_table[0]);
 343         }
 344         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
 345         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 346             s->mdct_out[i] *= power_val;
 347             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
 348             pows[i] = power_idx;
 349         }
 350     }
 351
 352     ff_nelly_get_sample_bits(pows, bits);
 353
 354     for (block = 0; block < 2; block++) {
 355         for (i = 0; i < NELLY_FILL_LEN; i++) {
 356             if (bits[i] > 0) {
 357                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
 358                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
 359                 best_idx =
 360                     quant_lut[av_clip (
 361                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
 362                             quant_lut_offset[bits[i]],
 363                             quant_lut_offset[bits[i]+1] - 1
 364                             )];
 365                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
 366                     best_idx++;
 367
 368                 put_bits(&pb, bits[i], best_idx);
 369             }
 370         }
 371         if (!block)
 372             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
 373     }
 374
 375     flush_put_bits(&pb);
 376     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
 377 }
 378
 379 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 380                         const AVFrame *frame, int *got_packet_ptr)
 381 {
 382     NellyMoserEncodeContext *s = avctx->priv_data;
 383     int ret;
 384
 385     if (s->last_frame)
 386         return 0;
 387
 388     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
 389     if (frame) {
 390         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
 391                frame->nb_samples * sizeof(*s->buf));
 392         if (frame->nb_samples < NELLY_SAMPLES) {
 393             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
 394                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
 395             if (frame->nb_samples >= NELLY_BUF_LEN)
 396                 s->last_frame = 1;
 397         }
 398         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 399             return ret;
 400     } else {
 401         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
 402         s->last_frame = 1;
 403     }
 404
 405     if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)))
 406         return ret;
 407     encode_block(s, avpkt->data, avpkt->size);
 408
 409     /* Get the next frame pts/duration */
 410     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 411                        &avpkt->duration);
 412
 413     *got_packet_ptr = 1;
 414     return 0;
 415 }
 416
 417 AVCodec ff_nellymoser_encoder = {
 418     .name           = "nellymoser",
 419     .type           = AVMEDIA_TYPE_AUDIO,
 420     .id             = AV_CODEC_ID_NELLYMOSER,
 421     .priv_data_size = sizeof(NellyMoserEncodeContext),
 422     .init           = encode_init,
 423     .encode2        = encode_frame,
 424     .close          = encode_end,
 425     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
 426     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
 427     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
 428                                                      AV_SAMPLE_FMT_NONE },
 429 };