git.sesse.net Git - ffmpeg/blob - libavcodec/nellymoserenc.c

   1 /*
   2  * Nellymoser encoder
   3  * This code is developed as part of Google Summer of Code 2008 Program.
   4  *
   5  * Copyright (c) 2008 Bartlomiej Wolowiec
   6  *
   7  * This file is part of Libav.
   8  *
   9  * Libav is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * Libav is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with Libav; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * Nellymoser encoder
  27  * by Bartlomiej Wolowiec
  28  *
  29  * Generic codec information: libavcodec/nellymoserdec.c
  30  *
  31  * Some information also from: http://samples.libav.org/A-codecs/Nelly_Moser/ASAO/ASAO.zip
  32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  33  *
  34  * for more information about nellymoser format, visit:
  35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
  36  */
  37
  38 #include "libavutil/float_dsp.h"
  39 #include "libavutil/mathematics.h"
  40 #include "nellymoser.h"
  41 #include "avcodec.h"
  42 #include "audio_frame_queue.h"
  43 #include "dsputil.h"
  44 #include "fft.h"
  45 #include "internal.h"
  46 #include "sinewin.h"
  47
  48 #define BITSTREAM_WRITER_LE
  49 #include "put_bits.h"
  50
  51 #define POW_TABLE_SIZE (1<<11)
  52 #define POW_TABLE_OFFSET 3
  53 #define OPT_SIZE ((1<<15) + 3000)
  54
  55 typedef struct NellyMoserEncodeContext {
  56     AVCodecContext  *avctx;
  57     int             last_frame;
  58     DSPContext      dsp;
  59     AVFloatDSPContext fdsp;
  60     FFTContext      mdct_ctx;
  61     AudioFrameQueue afq;
  62     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
  63     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
  64     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
  65     float           (*opt )[NELLY_BANDS];
  66     uint8_t         (*path)[NELLY_BANDS];
  67 } NellyMoserEncodeContext;
  68
  69 static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
  70
  71 static const uint8_t sf_lut[96] = {
  72      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  73      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
  74     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
  75     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
  76     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
  77     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
  78 };
  79
  80 static const uint8_t sf_delta_lut[78] = {
  81      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
  82      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
  83     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
  84     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
  85     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
  86 };
  87
  88 static const uint8_t quant_lut[230] = {
  89      0,
  90
  91      0,  1,  2,
  92
  93      0,  1,  2,  3,  4,  5,  6,
  94
  95      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
  96     12, 13, 13, 13, 14,
  97
  98      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
  99      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
 100     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
 101     30,
 102
 103      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
 104      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
 105     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
 106     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
 107     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
 108     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
 109     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
 110     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
 111     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
 112     61, 61, 61, 61, 62,
 113 };
 114
 115 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
 116 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
 117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
 118
 119 static void apply_mdct(NellyMoserEncodeContext *s)
 120 {
 121     float *in0 = s->buf;
 122     float *in1 = s->buf + NELLY_BUF_LEN;
 123     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
 124
 125     s->fdsp.vector_fmul       (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
 126     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
 127     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
 128
 129     s->fdsp.vector_fmul       (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
 130     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
 131     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
 132 }
 133
 134 static av_cold int encode_end(AVCodecContext *avctx)
 135 {
 136     NellyMoserEncodeContext *s = avctx->priv_data;
 137
 138     ff_mdct_end(&s->mdct_ctx);
 139
 140     if (s->avctx->trellis) {
 141         av_free(s->opt);
 142         av_free(s->path);
 143     }
 144     ff_af_queue_close(&s->afq);
 145 #if FF_API_OLD_ENCODE_AUDIO
 146     av_freep(&avctx->coded_frame);
 147 #endif
 148
 149     return 0;
 150 }
 151
 152 static av_cold int encode_init(AVCodecContext *avctx)
 153 {
 154     NellyMoserEncodeContext *s = avctx->priv_data;
 155     int i, ret;
 156
 157     if (avctx->channels != 1) {
 158         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
 159         return AVERROR(EINVAL);
 160     }
 161
 162     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
 163         avctx->sample_rate != 11025 &&
 164         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
 165         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 166         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
 167         return AVERROR(EINVAL);
 168     }
 169
 170     avctx->frame_size = NELLY_SAMPLES;
 171     avctx->delay      = NELLY_BUF_LEN;
 172     ff_af_queue_init(avctx, &s->afq);
 173     s->avctx = avctx;
 174     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
 175         goto error;
 176     ff_dsputil_init(&s->dsp, avctx);
 177     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 178
 179     /* Generate overlap window */
 180     ff_sine_window_init(ff_sine_128, 128);
 181     for (i = 0; i < POW_TABLE_SIZE; i++)
 182         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
 183
 184     if (s->avctx->trellis) {
 185         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
 186         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
 187         if (!s->opt || !s->path) {
 188             ret = AVERROR(ENOMEM);
 189             goto error;
 190         }
 191     }
 192
 193 #if FF_API_OLD_ENCODE_AUDIO
 194     avctx->coded_frame = avcodec_alloc_frame();
 195     if (!avctx->coded_frame) {
 196         ret = AVERROR(ENOMEM);
 197         goto error;
 198     }
 199 #endif
 200
 201     return 0;
 202 error:
 203     encode_end(avctx);
 204     return ret;
 205 }
 206
 207 #define find_best(val, table, LUT, LUT_add, LUT_size) \
 208     best_idx = \
 209         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
 210     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
 211         best_idx++;
 212
 213 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 214 {
 215     int band, best_idx, power_idx = 0;
 216     float power_candidate;
 217
 218     //base exponent
 219     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
 220     idx_table[0] = best_idx;
 221     power_idx = ff_nelly_init_table[best_idx];
 222
 223     for (band = 1; band < NELLY_BANDS; band++) {
 224         power_candidate = cand[band] - power_idx;
 225         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
 226         idx_table[band] = best_idx;
 227         power_idx += ff_nelly_delta_table[best_idx];
 228     }
 229 }
 230
 231 static inline float distance(float x, float y, int band)
 232 {
 233     //return pow(fabs(x-y), 2.0);
 234     float tmp = x - y;
 235     return tmp * tmp;
 236 }
 237
 238 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
 239 {
 240     int i, j, band, best_idx;
 241     float power_candidate, best_val;
 242
 243     float  (*opt )[NELLY_BANDS] = s->opt ;
 244     uint8_t(*path)[NELLY_BANDS] = s->path;
 245
 246     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
 247         opt[0][i] = INFINITY;
 248     }
 249
 250     for (i = 0; i < 64; i++) {
 251         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
 252         path[0][ff_nelly_init_table[i]] = i;
 253     }
 254
 255     for (band = 1; band < NELLY_BANDS; band++) {
 256         int q, c = 0;
 257         float tmp;
 258         int idx_min, idx_max, idx;
 259         power_candidate = cand[band];
 260         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
 261             idx_min = FFMAX(0, cand[band] - q);
 262             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
 263             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
 264                 if ( isinf(opt[band - 1][i]) )
 265                     continue;
 266                 for (j = 0; j < 32; j++) {
 267                     idx = i + ff_nelly_delta_table[j];
 268                     if (idx > idx_max)
 269                         break;
 270                     if (idx >= idx_min) {
 271                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
 272                         if (opt[band][idx] > tmp) {
 273                             opt[band][idx] = tmp;
 274                             path[band][idx] = j;
 275                             c = 1;
 276                         }
 277                     }
 278                 }
 279             }
 280         }
 281         assert(c); //FIXME
 282     }
 283
 284     best_val = INFINITY;
 285     best_idx = -1;
 286     band = NELLY_BANDS - 1;
 287     for (i = 0; i < OPT_SIZE; i++) {
 288         if (best_val > opt[band][i]) {
 289             best_val = opt[band][i];
 290             best_idx = i;
 291         }
 292     }
 293     for (band = NELLY_BANDS - 1; band >= 0; band--) {
 294         idx_table[band] = path[band][best_idx];
 295         if (band) {
 296             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
 297         }
 298     }
 299 }
 300
 301 /**
 302  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
 303  *  @param s               encoder context
 304  *  @param output          output buffer
 305  *  @param output_size     size of output buffer
 306  */
 307 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
 308 {
 309     PutBitContext pb;
 310     int i, j, band, block, best_idx, power_idx = 0;
 311     float power_val, coeff, coeff_sum;
 312     float pows[NELLY_FILL_LEN];
 313     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
 314     float cand[NELLY_BANDS];
 315
 316     apply_mdct(s);
 317
 318     init_put_bits(&pb, output, output_size * 8);
 319
 320     i = 0;
 321     for (band = 0; band < NELLY_BANDS; band++) {
 322         coeff_sum = 0;
 323         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 324             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
 325                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
 326         }
 327         cand[band] =
 328             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
 329     }
 330
 331     if (s->avctx->trellis) {
 332         get_exponent_dynamic(s, cand, idx_table);
 333     } else {
 334         get_exponent_greedy(s, cand, idx_table);
 335     }
 336
 337     i = 0;
 338     for (band = 0; band < NELLY_BANDS; band++) {
 339         if (band) {
 340             power_idx += ff_nelly_delta_table[idx_table[band]];
 341             put_bits(&pb, 5, idx_table[band]);
 342         } else {
 343             power_idx = ff_nelly_init_table[idx_table[0]];
 344             put_bits(&pb, 6, idx_table[0]);
 345         }
 346         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
 347         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
 348             s->mdct_out[i] *= power_val;
 349             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
 350             pows[i] = power_idx;
 351         }
 352     }
 353
 354     ff_nelly_get_sample_bits(pows, bits);
 355
 356     for (block = 0; block < 2; block++) {
 357         for (i = 0; i < NELLY_FILL_LEN; i++) {
 358             if (bits[i] > 0) {
 359                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
 360                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
 361                 best_idx =
 362                     quant_lut[av_clip (
 363                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
 364                             quant_lut_offset[bits[i]],
 365                             quant_lut_offset[bits[i]+1] - 1
 366                             )];
 367                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
 368                     best_idx++;
 369
 370                 put_bits(&pb, bits[i], best_idx);
 371             }
 372         }
 373         if (!block)
 374             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
 375     }
 376
 377     flush_put_bits(&pb);
 378     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
 379 }
 380
 381 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 382                         const AVFrame *frame, int *got_packet_ptr)
 383 {
 384     NellyMoserEncodeContext *s = avctx->priv_data;
 385     int ret;
 386
 387     if (s->last_frame)
 388         return 0;
 389
 390     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
 391     if (frame) {
 392         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
 393                frame->nb_samples * sizeof(*s->buf));
 394         if (frame->nb_samples < NELLY_SAMPLES) {
 395             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
 396                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
 397             if (frame->nb_samples >= NELLY_BUF_LEN)
 398                 s->last_frame = 1;
 399         }
 400         if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
 401             return ret;
 402     } else {
 403         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
 404         s->last_frame = 1;
 405     }
 406
 407     if ((ret = ff_alloc_packet(avpkt, NELLY_BLOCK_LEN))) {
 408         av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
 409         return ret;
 410     }
 411     encode_block(s, avpkt->data, avpkt->size);
 412
 413     /* Get the next frame pts/duration */
 414     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 415                        &avpkt->duration);
 416
 417     *got_packet_ptr = 1;
 418     return 0;
 419 }
 420
 421 AVCodec ff_nellymoser_encoder = {
 422     .name           = "nellymoser",
 423     .type           = AVMEDIA_TYPE_AUDIO,
 424     .id             = AV_CODEC_ID_NELLYMOSER,
 425     .priv_data_size = sizeof(NellyMoserEncodeContext),
 426     .init           = encode_init,
 427     .encode2        = encode_frame,
 428     .close          = encode_end,
 429     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
 430     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
 431     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
 432                                                      AV_SAMPLE_FMT_NONE },
 433 };