3 * This code is developed as part of Google Summer of Code 2008 Program.
5 * Copyright (c) 2008 Bartlomiej Wolowiec
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * by Bartlomiej Wolowiec
29 * Generic codec information: libavcodec/nellymoserdec.c
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
38 #include "libavutil/float_dsp.h"
39 #include "libavutil/mathematics.h"
40 #include "nellymoser.h"
42 #include "audio_frame_queue.h"
48 #define BITSTREAM_WRITER_LE
51 #define POW_TABLE_SIZE (1<<11)
52 #define POW_TABLE_OFFSET 3
53 #define OPT_SIZE ((1<<15) + 3000)
55 typedef struct NellyMoserEncodeContext {
56 AVCodecContext *avctx;
58 AVFloatDSPContext fdsp;
61 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
62 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
63 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
64 float (*opt )[OPT_SIZE];
65 uint8_t (*path)[OPT_SIZE];
66 } NellyMoserEncodeContext;
68 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
70 static const uint8_t sf_lut[96] = {
71 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
72 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
73 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
74 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
75 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
76 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
79 static const uint8_t sf_delta_lut[78] = {
80 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
81 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
82 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
83 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
84 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
87 static const uint8_t quant_lut[230] = {
94 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
97 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
98 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
99 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
102 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
103 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
104 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
105 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
106 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
107 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
108 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
109 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
110 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
114 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
115 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
116 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
118 static void apply_mdct(NellyMoserEncodeContext *s)
121 float *in1 = s->buf + NELLY_BUF_LEN;
122 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
124 s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
125 s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
126 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
128 s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
129 s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
130 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
133 static av_cold int encode_end(AVCodecContext *avctx)
135 NellyMoserEncodeContext *s = avctx->priv_data;
137 ff_mdct_end(&s->mdct_ctx);
139 if (s->avctx->trellis) {
143 ff_af_queue_close(&s->afq);
144 #if FF_API_OLD_ENCODE_AUDIO
145 av_freep(&avctx->coded_frame);
151 static av_cold int encode_init(AVCodecContext *avctx)
153 NellyMoserEncodeContext *s = avctx->priv_data;
156 if (avctx->channels != 1) {
157 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
158 return AVERROR(EINVAL);
161 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
162 avctx->sample_rate != 11025 &&
163 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
164 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
165 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
166 return AVERROR(EINVAL);
169 avctx->frame_size = NELLY_SAMPLES;
170 avctx->delay = NELLY_BUF_LEN;
171 ff_af_queue_init(avctx, &s->afq);
173 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
175 avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
177 /* Generate overlap window */
178 ff_init_ff_sine_windows(7);
179 for (i = 0; i < POW_TABLE_SIZE; i++)
180 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
182 if (s->avctx->trellis) {
183 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
184 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
185 if (!s->opt || !s->path) {
186 ret = AVERROR(ENOMEM);
191 #if FF_API_OLD_ENCODE_AUDIO
192 avctx->coded_frame = avcodec_alloc_frame();
193 if (!avctx->coded_frame) {
194 ret = AVERROR(ENOMEM);
205 #define find_best(val, table, LUT, LUT_add, LUT_size) \
207 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
208 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
211 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
213 int band, best_idx, power_idx = 0;
214 float power_candidate;
217 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
218 idx_table[0] = best_idx;
219 power_idx = ff_nelly_init_table[best_idx];
221 for (band = 1; band < NELLY_BANDS; band++) {
222 power_candidate = cand[band] - power_idx;
223 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
224 idx_table[band] = best_idx;
225 power_idx += ff_nelly_delta_table[best_idx];
229 static inline float distance(float x, float y, int band)
231 //return pow(fabs(x-y), 2.0);
236 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
238 int i, j, band, best_idx;
239 float power_candidate, best_val;
241 float (*opt )[OPT_SIZE] = s->opt ;
242 uint8_t(*path)[OPT_SIZE] = s->path;
244 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
245 opt[0][i] = INFINITY;
248 for (i = 0; i < 64; i++) {
249 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
250 path[0][ff_nelly_init_table[i]] = i;
253 for (band = 1; band < NELLY_BANDS; band++) {
256 int idx_min, idx_max, idx;
257 power_candidate = cand[band];
258 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
259 idx_min = FFMAX(0, cand[band] - q);
260 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
261 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
262 if ( isinf(opt[band - 1][i]) )
264 for (j = 0; j < 32; j++) {
265 idx = i + ff_nelly_delta_table[j];
268 if (idx >= idx_min) {
269 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
270 if (opt[band][idx] > tmp) {
271 opt[band][idx] = tmp;
284 band = NELLY_BANDS - 1;
285 for (i = 0; i < OPT_SIZE; i++) {
286 if (best_val > opt[band][i]) {
287 best_val = opt[band][i];
291 for (band = NELLY_BANDS - 1; band >= 0; band--) {
292 idx_table[band] = path[band][best_idx];
294 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
300 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
301 * @param s encoder context
302 * @param output output buffer
303 * @param output_size size of output buffer
305 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
308 int i, j, band, block, best_idx, power_idx = 0;
309 float power_val, coeff, coeff_sum;
310 float pows[NELLY_FILL_LEN];
311 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
312 float cand[NELLY_BANDS];
316 init_put_bits(&pb, output, output_size * 8);
319 for (band = 0; band < NELLY_BANDS; band++) {
321 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
322 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
323 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
326 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
329 if (s->avctx->trellis) {
330 get_exponent_dynamic(s, cand, idx_table);
332 get_exponent_greedy(s, cand, idx_table);
336 for (band = 0; band < NELLY_BANDS; band++) {
338 power_idx += ff_nelly_delta_table[idx_table[band]];
339 put_bits(&pb, 5, idx_table[band]);
341 power_idx = ff_nelly_init_table[idx_table[0]];
342 put_bits(&pb, 6, idx_table[0]);
344 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
345 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
346 s->mdct_out[i] *= power_val;
347 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
352 ff_nelly_get_sample_bits(pows, bits);
354 for (block = 0; block < 2; block++) {
355 for (i = 0; i < NELLY_FILL_LEN; i++) {
357 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
358 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
361 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
362 quant_lut_offset[bits[i]],
363 quant_lut_offset[bits[i]+1] - 1
365 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
368 put_bits(&pb, bits[i], best_idx);
372 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
376 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
379 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
380 const AVFrame *frame, int *got_packet_ptr)
382 NellyMoserEncodeContext *s = avctx->priv_data;
388 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
390 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
391 frame->nb_samples * sizeof(*s->buf));
392 if (frame->nb_samples < NELLY_SAMPLES) {
393 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
394 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
395 if (frame->nb_samples >= NELLY_BUF_LEN)
398 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
401 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
405 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)))
407 encode_block(s, avpkt->data, avpkt->size);
409 /* Get the next frame pts/duration */
410 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
417 AVCodec ff_nellymoser_encoder = {
418 .name = "nellymoser",
419 .type = AVMEDIA_TYPE_AUDIO,
420 .id = AV_CODEC_ID_NELLYMOSER,
421 .priv_data_size = sizeof(NellyMoserEncodeContext),
423 .encode2 = encode_frame,
425 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
426 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
427 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
428 AV_SAMPLE_FMT_NONE },