3 * This code is developed as part of Google Summer of Code 2008 Program.
5 * Copyright (c) 2008 Bartlomiej Wolowiec
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * by Bartlomiej Wolowiec
29 * Generic codec information: libavcodec/nellymoserdec.c
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
42 #include "audio_frame_queue.h"
46 #include "nellymoser.h"
49 #define BITSTREAM_WRITER_LE
52 #define POW_TABLE_SIZE (1<<11)
53 #define POW_TABLE_OFFSET 3
54 #define OPT_SIZE ((1<<15) + 3000)
56 typedef struct NellyMoserEncodeContext {
57 AVCodecContext *avctx;
59 AVFloatDSPContext *fdsp;
62 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
63 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
64 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
65 float (*opt )[OPT_SIZE];
66 uint8_t (*path)[OPT_SIZE];
67 } NellyMoserEncodeContext;
69 static float pow_table[POW_TABLE_SIZE]; ///< pow(2, -i / 2048.0 - 3.0);
71 static const uint8_t sf_lut[96] = {
72 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
73 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
74 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
75 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
76 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
77 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
80 static const uint8_t sf_delta_lut[78] = {
81 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
82 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
83 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
84 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
85 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
88 static const uint8_t quant_lut[230] = {
95 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
98 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
99 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
100 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
103 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
104 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
105 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
106 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
107 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
108 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
109 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
110 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
111 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
115 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
116 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
117 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
119 static void apply_mdct(NellyMoserEncodeContext *s)
122 float *in1 = s->buf + NELLY_BUF_LEN;
123 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
125 s->fdsp->vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
126 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
127 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
129 s->fdsp->vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
130 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
131 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
134 static av_cold int encode_end(AVCodecContext *avctx)
136 NellyMoserEncodeContext *s = avctx->priv_data;
138 ff_mdct_end(&s->mdct_ctx);
140 if (s->avctx->trellis) {
144 ff_af_queue_close(&s->afq);
150 static av_cold int encode_init(AVCodecContext *avctx)
152 NellyMoserEncodeContext *s = avctx->priv_data;
155 if (avctx->channels != 1) {
156 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
157 return AVERROR(EINVAL);
160 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
161 avctx->sample_rate != 11025 &&
162 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
163 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
164 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
165 return AVERROR(EINVAL);
168 avctx->frame_size = NELLY_SAMPLES;
169 avctx->initial_padding = NELLY_BUF_LEN;
170 ff_af_queue_init(avctx, &s->afq);
172 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
174 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
176 ret = AVERROR(ENOMEM);
180 /* Generate overlap window */
181 ff_init_ff_sine_windows(7);
182 /* faster way of doing
183 for (i = 0; i < POW_TABLE_SIZE; i++)
184 pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
186 pow_table[1024] = M_SQRT1_2;
187 for (i = 1; i < 513; i++) {
188 double tmp = exp2(-i / 2048.0);
190 pow_table[1024-i] = M_SQRT1_2 / tmp;
191 pow_table[1024+i] = tmp * M_SQRT1_2;
192 pow_table[2048-i] = 0.5 / tmp;
195 if (s->avctx->trellis) {
196 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
197 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
198 if (!s->opt || !s->path) {
199 ret = AVERROR(ENOMEM);
210 #define find_best(val, table, LUT, LUT_add, LUT_size) \
212 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
213 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
216 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
218 int band, best_idx, power_idx = 0;
219 float power_candidate;
222 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
223 idx_table[0] = best_idx;
224 power_idx = ff_nelly_init_table[best_idx];
226 for (band = 1; band < NELLY_BANDS; band++) {
227 power_candidate = cand[band] - power_idx;
228 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
229 idx_table[band] = best_idx;
230 power_idx += ff_nelly_delta_table[best_idx];
234 static inline float distance(float x, float y, int band)
236 //return pow(fabs(x-y), 2.0);
241 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
243 int i, j, band, best_idx;
244 float power_candidate, best_val;
246 float (*opt )[OPT_SIZE] = s->opt ;
247 uint8_t(*path)[OPT_SIZE] = s->path;
249 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
250 opt[0][i] = INFINITY;
253 for (i = 0; i < 64; i++) {
254 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
255 path[0][ff_nelly_init_table[i]] = i;
258 for (band = 1; band < NELLY_BANDS; band++) {
261 int idx_min, idx_max, idx;
262 power_candidate = cand[band];
263 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
264 idx_min = FFMAX(0, cand[band] - q);
265 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
266 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
267 if ( isinf(opt[band - 1][i]) )
269 for (j = 0; j < 32; j++) {
270 idx = i + ff_nelly_delta_table[j];
273 if (idx >= idx_min) {
274 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
275 if (opt[band][idx] > tmp) {
276 opt[band][idx] = tmp;
284 av_assert1(c); //FIXME
289 band = NELLY_BANDS - 1;
290 for (i = 0; i < OPT_SIZE; i++) {
291 if (best_val > opt[band][i]) {
292 best_val = opt[band][i];
296 for (band = NELLY_BANDS - 1; band >= 0; band--) {
297 idx_table[band] = path[band][best_idx];
299 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
305 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
306 * @param s encoder context
307 * @param output output buffer
308 * @param output_size size of output buffer
310 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
313 int i, j, band, block, best_idx, power_idx = 0;
314 float power_val, coeff, coeff_sum;
315 float pows[NELLY_FILL_LEN];
316 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
317 float cand[NELLY_BANDS];
321 init_put_bits(&pb, output, output_size);
324 for (band = 0; band < NELLY_BANDS; band++) {
326 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
327 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
328 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
331 log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
334 if (s->avctx->trellis) {
335 get_exponent_dynamic(s, cand, idx_table);
337 get_exponent_greedy(s, cand, idx_table);
341 for (band = 0; band < NELLY_BANDS; band++) {
343 power_idx += ff_nelly_delta_table[idx_table[band]];
344 put_bits(&pb, 5, idx_table[band]);
346 power_idx = ff_nelly_init_table[idx_table[0]];
347 put_bits(&pb, 6, idx_table[0]);
349 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
350 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
351 s->mdct_out[i] *= power_val;
352 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
357 ff_nelly_get_sample_bits(pows, bits);
359 for (block = 0; block < 2; block++) {
360 for (i = 0; i < NELLY_FILL_LEN; i++) {
362 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
363 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
366 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
367 quant_lut_offset[bits[i]],
368 quant_lut_offset[bits[i]+1] - 1
370 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
373 put_bits(&pb, bits[i], best_idx);
377 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
381 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
384 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
385 const AVFrame *frame, int *got_packet_ptr)
387 NellyMoserEncodeContext *s = avctx->priv_data;
393 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
395 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
396 frame->nb_samples * sizeof(*s->buf));
397 if (frame->nb_samples < NELLY_SAMPLES) {
398 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
399 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
400 if (frame->nb_samples >= NELLY_BUF_LEN)
403 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
406 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
410 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
412 encode_block(s, avpkt->data, avpkt->size);
414 /* Get the next frame pts/duration */
415 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
422 AVCodec ff_nellymoser_encoder = {
423 .name = "nellymoser",
424 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
425 .type = AVMEDIA_TYPE_AUDIO,
426 .id = AV_CODEC_ID_NELLYMOSER,
427 .priv_data_size = sizeof(NellyMoserEncodeContext),
429 .encode2 = encode_frame,
431 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
432 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
433 AV_SAMPLE_FMT_NONE },