3 * This code is developed as part of Google Summer of Code 2008 Program.
5 * Copyright (c) 2008 Bartlomiej Wolowiec
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * by Bartlomiej Wolowiec
29 * Generic codec information: libavcodec/nellymoserdec.c
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
41 #include "libavutil/thread.h"
43 #include "audio_frame_queue.h"
47 #include "nellymoser.h"
50 #define BITSTREAM_WRITER_LE
53 #define POW_TABLE_SIZE (1<<11)
54 #define POW_TABLE_OFFSET 3
55 #define OPT_SIZE ((1<<15) + 3000)
57 typedef struct NellyMoserEncodeContext {
58 AVCodecContext *avctx;
60 AVFloatDSPContext *fdsp;
63 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
64 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
65 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
66 float (*opt )[OPT_SIZE];
67 uint8_t (*path)[OPT_SIZE];
68 } NellyMoserEncodeContext;
70 static float pow_table[POW_TABLE_SIZE]; ///< pow(2, -i / 2048.0 - 3.0);
72 static const uint8_t sf_lut[96] = {
73 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
74 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
75 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
76 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
77 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
78 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
81 static const uint8_t sf_delta_lut[78] = {
82 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
83 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
84 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
85 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
86 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
89 static const uint8_t quant_lut[230] = {
96 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
99 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
100 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
101 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
104 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
105 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
106 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
107 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
108 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
109 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
110 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
111 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
112 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
116 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
117 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
118 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
120 static void apply_mdct(NellyMoserEncodeContext *s)
123 float *in1 = s->buf + NELLY_BUF_LEN;
124 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
126 s->fdsp->vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
127 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
128 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
130 s->fdsp->vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
131 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
132 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
135 static av_cold int encode_end(AVCodecContext *avctx)
137 NellyMoserEncodeContext *s = avctx->priv_data;
139 ff_mdct_end(&s->mdct_ctx);
143 ff_af_queue_close(&s->afq);
149 static av_cold void nellymoser_init_static(void)
151 /* faster way of doing
152 for (int i = 0; i < POW_TABLE_SIZE; i++)
153 pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
155 pow_table[1024] = M_SQRT1_2;
156 for (int i = 1; i < 513; i++) {
157 double tmp = exp2(-i / 2048.0);
159 pow_table[1024-i] = M_SQRT1_2 / tmp;
160 pow_table[1024+i] = tmp * M_SQRT1_2;
161 pow_table[2048-i] = 0.5 / tmp;
163 /* Generate overlap window */
164 ff_init_ff_sine_windows(7);
167 static av_cold int encode_init(AVCodecContext *avctx)
169 static AVOnce init_static_once = AV_ONCE_INIT;
170 NellyMoserEncodeContext *s = avctx->priv_data;
173 if (avctx->channels != 1) {
174 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
175 return AVERROR(EINVAL);
178 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
179 avctx->sample_rate != 11025 &&
180 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
181 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
182 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
183 return AVERROR(EINVAL);
186 avctx->frame_size = NELLY_SAMPLES;
187 avctx->initial_padding = NELLY_BUF_LEN;
188 ff_af_queue_init(avctx, &s->afq);
190 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
192 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
194 return AVERROR(ENOMEM);
196 if (s->avctx->trellis) {
197 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
198 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
199 if (!s->opt || !s->path)
200 return AVERROR(ENOMEM);
203 ff_thread_once(&init_static_once, nellymoser_init_static);
208 #define find_best(val, table, LUT, LUT_add, LUT_size) \
210 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
211 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
214 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
216 int band, best_idx, power_idx = 0;
217 float power_candidate;
220 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
221 idx_table[0] = best_idx;
222 power_idx = ff_nelly_init_table[best_idx];
224 for (band = 1; band < NELLY_BANDS; band++) {
225 power_candidate = cand[band] - power_idx;
226 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
227 idx_table[band] = best_idx;
228 power_idx += ff_nelly_delta_table[best_idx];
232 static inline float distance(float x, float y, int band)
234 //return pow(fabs(x-y), 2.0);
239 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
241 int i, j, band, best_idx;
242 float power_candidate, best_val;
244 float (*opt )[OPT_SIZE] = s->opt ;
245 uint8_t(*path)[OPT_SIZE] = s->path;
247 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
248 opt[0][i] = INFINITY;
251 for (i = 0; i < 64; i++) {
252 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
253 path[0][ff_nelly_init_table[i]] = i;
256 for (band = 1; band < NELLY_BANDS; band++) {
259 int idx_min, idx_max, idx;
260 power_candidate = cand[band];
261 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
262 idx_min = FFMAX(0, cand[band] - q);
263 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
264 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
265 if ( isinf(opt[band - 1][i]) )
267 for (j = 0; j < 32; j++) {
268 idx = i + ff_nelly_delta_table[j];
271 if (idx >= idx_min) {
272 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
273 if (opt[band][idx] > tmp) {
274 opt[band][idx] = tmp;
282 av_assert1(c); //FIXME
287 band = NELLY_BANDS - 1;
288 for (i = 0; i < OPT_SIZE; i++) {
289 if (best_val > opt[band][i]) {
290 best_val = opt[band][i];
294 for (band = NELLY_BANDS - 1; band >= 0; band--) {
295 idx_table[band] = path[band][best_idx];
297 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
303 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
304 * @param s encoder context
305 * @param output output buffer
306 * @param output_size size of output buffer
308 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
311 int i, j, band, block, best_idx, power_idx = 0;
312 float power_val, coeff, coeff_sum;
313 float pows[NELLY_FILL_LEN];
314 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
315 float cand[NELLY_BANDS];
319 init_put_bits(&pb, output, output_size);
322 for (band = 0; band < NELLY_BANDS; band++) {
324 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
325 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
326 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
329 log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
332 if (s->avctx->trellis) {
333 get_exponent_dynamic(s, cand, idx_table);
335 get_exponent_greedy(s, cand, idx_table);
339 for (band = 0; band < NELLY_BANDS; band++) {
341 power_idx += ff_nelly_delta_table[idx_table[band]];
342 put_bits(&pb, 5, idx_table[band]);
344 power_idx = ff_nelly_init_table[idx_table[0]];
345 put_bits(&pb, 6, idx_table[0]);
347 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
348 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
349 s->mdct_out[i] *= power_val;
350 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
355 ff_nelly_get_sample_bits(pows, bits);
357 for (block = 0; block < 2; block++) {
358 for (i = 0; i < NELLY_FILL_LEN; i++) {
360 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
361 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
364 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
365 quant_lut_offset[bits[i]],
366 quant_lut_offset[bits[i]+1] - 1
368 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
371 put_bits(&pb, bits[i], best_idx);
375 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
379 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
382 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
383 const AVFrame *frame, int *got_packet_ptr)
385 NellyMoserEncodeContext *s = avctx->priv_data;
391 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
393 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
394 frame->nb_samples * sizeof(*s->buf));
395 if (frame->nb_samples < NELLY_SAMPLES) {
396 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
397 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
398 if (frame->nb_samples >= NELLY_BUF_LEN)
401 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
404 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
408 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
410 encode_block(s, avpkt->data, avpkt->size);
412 /* Get the next frame pts/duration */
413 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
420 AVCodec ff_nellymoser_encoder = {
421 .name = "nellymoser",
422 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
423 .type = AVMEDIA_TYPE_AUDIO,
424 .id = AV_CODEC_ID_NELLYMOSER,
425 .priv_data_size = sizeof(NellyMoserEncodeContext),
427 .encode2 = encode_frame,
429 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
430 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
431 AV_SAMPLE_FMT_NONE },
432 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,