3 * This code is developed as part of Google Summer of Code 2008 Program.
5 * Copyright (c) 2008 Bartlomiej Wolowiec
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * by Bartlomiej Wolowiec
29 * Generic codec information: libavcodec/nellymoserdec.c
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
42 #include "audio_frame_queue.h"
46 #include "nellymoser.h"
49 #define BITSTREAM_WRITER_LE
52 #define POW_TABLE_SIZE (1<<11)
53 #define POW_TABLE_OFFSET 3
54 #define OPT_SIZE ((1<<15) + 3000)
56 typedef struct NellyMoserEncodeContext {
57 AVCodecContext *avctx;
59 AVFloatDSPContext *fdsp;
62 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
63 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
64 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
65 float (*opt )[OPT_SIZE];
66 uint8_t (*path)[OPT_SIZE];
67 } NellyMoserEncodeContext;
69 static float pow_table[POW_TABLE_SIZE]; ///< pow(2, -i / 2048.0 - 3.0);
71 static const uint8_t sf_lut[96] = {
72 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
73 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
74 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
80 static const uint8_t sf_delta_lut[78] = {
81 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
82 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
83 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
88 static const uint8_t quant_lut[230] = {
95 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
98 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
99 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
103 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
104 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
105 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
115 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
116 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
119 static void apply_mdct(NellyMoserEncodeContext *s)
122 float *in1 = s->buf + NELLY_BUF_LEN;
123 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
125 s->fdsp->vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
126 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
129 s->fdsp->vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
130 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
131 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
134 static av_cold int encode_end(AVCodecContext *avctx)
136 NellyMoserEncodeContext *s = avctx->priv_data;
138 ff_mdct_end(&s->mdct_ctx);
140 if (s->avctx->trellis) {
144 ff_af_queue_close(&s->afq);
150 static av_cold int encode_init(AVCodecContext *avctx)
152 NellyMoserEncodeContext *s = avctx->priv_data;
155 if (avctx->channels != 1) {
156 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
157 return AVERROR(EINVAL);
160 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
161 avctx->sample_rate != 11025 &&
162 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
163 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
164 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
165 return AVERROR(EINVAL);
168 avctx->frame_size = NELLY_SAMPLES;
169 avctx->initial_padding = NELLY_BUF_LEN;
170 ff_af_queue_init(avctx, &s->afq);
172 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
174 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
176 return AVERROR(ENOMEM);
178 /* Generate overlap window */
179 ff_init_ff_sine_windows(7);
180 /* faster way of doing
181 for (i = 0; i < POW_TABLE_SIZE; i++)
182 pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
184 pow_table[1024] = M_SQRT1_2;
185 for (i = 1; i < 513; i++) {
186 double tmp = exp2(-i / 2048.0);
188 pow_table[1024-i] = M_SQRT1_2 / tmp;
189 pow_table[1024+i] = tmp * M_SQRT1_2;
190 pow_table[2048-i] = 0.5 / tmp;
193 if (s->avctx->trellis) {
194 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
195 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
196 if (!s->opt || !s->path)
197 return AVERROR(ENOMEM);
203 #define find_best(val, table, LUT, LUT_add, LUT_size) \
205 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
206 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
209 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
211 int band, best_idx, power_idx = 0;
212 float power_candidate;
215 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
216 idx_table[0] = best_idx;
217 power_idx = ff_nelly_init_table[best_idx];
219 for (band = 1; band < NELLY_BANDS; band++) {
220 power_candidate = cand[band] - power_idx;
221 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
222 idx_table[band] = best_idx;
223 power_idx += ff_nelly_delta_table[best_idx];
227 static inline float distance(float x, float y, int band)
229 //return pow(fabs(x-y), 2.0);
234 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
236 int i, j, band, best_idx;
237 float power_candidate, best_val;
239 float (*opt )[OPT_SIZE] = s->opt ;
240 uint8_t(*path)[OPT_SIZE] = s->path;
242 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
243 opt[0][i] = INFINITY;
246 for (i = 0; i < 64; i++) {
247 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
248 path[0][ff_nelly_init_table[i]] = i;
251 for (band = 1; band < NELLY_BANDS; band++) {
254 int idx_min, idx_max, idx;
255 power_candidate = cand[band];
256 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
257 idx_min = FFMAX(0, cand[band] - q);
258 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
259 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
260 if ( isinf(opt[band - 1][i]) )
262 for (j = 0; j < 32; j++) {
263 idx = i + ff_nelly_delta_table[j];
266 if (idx >= idx_min) {
267 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
268 if (opt[band][idx] > tmp) {
269 opt[band][idx] = tmp;
277 av_assert1(c); //FIXME
282 band = NELLY_BANDS - 1;
283 for (i = 0; i < OPT_SIZE; i++) {
284 if (best_val > opt[band][i]) {
285 best_val = opt[band][i];
289 for (band = NELLY_BANDS - 1; band >= 0; band--) {
290 idx_table[band] = path[band][best_idx];
292 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
298 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
299 * @param s encoder context
300 * @param output output buffer
301 * @param output_size size of output buffer
303 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
306 int i, j, band, block, best_idx, power_idx = 0;
307 float power_val, coeff, coeff_sum;
308 float pows[NELLY_FILL_LEN];
309 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
310 float cand[NELLY_BANDS];
314 init_put_bits(&pb, output, output_size);
317 for (band = 0; band < NELLY_BANDS; band++) {
319 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
320 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
321 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
324 log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
327 if (s->avctx->trellis) {
328 get_exponent_dynamic(s, cand, idx_table);
330 get_exponent_greedy(s, cand, idx_table);
334 for (band = 0; band < NELLY_BANDS; band++) {
336 power_idx += ff_nelly_delta_table[idx_table[band]];
337 put_bits(&pb, 5, idx_table[band]);
339 power_idx = ff_nelly_init_table[idx_table[0]];
340 put_bits(&pb, 6, idx_table[0]);
342 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
343 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
344 s->mdct_out[i] *= power_val;
345 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
350 ff_nelly_get_sample_bits(pows, bits);
352 for (block = 0; block < 2; block++) {
353 for (i = 0; i < NELLY_FILL_LEN; i++) {
355 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
356 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
359 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
360 quant_lut_offset[bits[i]],
361 quant_lut_offset[bits[i]+1] - 1
363 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
366 put_bits(&pb, bits[i], best_idx);
370 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
374 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
377 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
378 const AVFrame *frame, int *got_packet_ptr)
380 NellyMoserEncodeContext *s = avctx->priv_data;
386 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
388 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
389 frame->nb_samples * sizeof(*s->buf));
390 if (frame->nb_samples < NELLY_SAMPLES) {
391 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
392 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
393 if (frame->nb_samples >= NELLY_BUF_LEN)
396 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
399 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
403 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
405 encode_block(s, avpkt->data, avpkt->size);
407 /* Get the next frame pts/duration */
408 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
415 AVCodec ff_nellymoser_encoder = {
416 .name = "nellymoser",
417 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
418 .type = AVMEDIA_TYPE_AUDIO,
419 .id = AV_CODEC_ID_NELLYMOSER,
420 .priv_data_size = sizeof(NellyMoserEncodeContext),
422 .encode2 = encode_frame,
424 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
425 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
426 AV_SAMPLE_FMT_NONE },
427 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,