3 * Copyright (C) 2008 Alexander E. Patrakov
4 * 2010 Benjamin Larsson
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/common.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/audioconvert.h"
35 #define MAX_CHANNELS 6
36 #define DCA_SUBBANDS_32 32
37 #define DCA_MAX_FRAME_SIZE 16383
38 #define DCA_HEADER_SIZE 13
40 #define DCA_SUBBANDS 32 ///< Subband activity count
41 #define QUANTIZER_BITS 16
43 #define SUBSUBFRAMES 4
44 #define PCM_SAMPLES (SUBFRAMES*SUBSUBFRAMES*8)
46 #define LFE_INTERPOLATION 64
50 static const int8_t dca_lfe_index[] = {
51 1,2,2,2,2,3,2,3,2,3,2,3,1,3,2,3
54 static const int8_t dca_channel_reorder_lfe[][9] = {
55 { 0, -1, -1, -1, -1, -1, -1, -1, -1 },
56 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
57 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
58 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
59 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
60 { 1, 2, 0, -1, -1, -1, -1, -1, -1 },
61 { 0, 1, -1, 2, -1, -1, -1, -1, -1 },
62 { 1, 2, 0, -1, 3, -1, -1, -1, -1 },
63 { 0, 1, -1, 2, 3, -1, -1, -1, -1 },
64 { 1, 2, 0, -1, 3, 4, -1, -1, -1 },
65 { 2, 3, -1, 0, 1, 4, 5, -1, -1 },
66 { 1, 2, 0, -1, 3, 4, 5, -1, -1 },
67 { 0, -1, 4, 5, 2, 3, 1, -1, -1 },
68 { 3, 4, 1, -1, 0, 2, 5, 6, -1 },
69 { 2, 3, -1, 5, 7, 0, 1, 4, 6 },
70 { 3, 4, 1, -1, 0, 2, 5, 7, 6 },
73 static const int8_t dca_channel_reorder_nolfe[][9] = {
74 { 0, -1, -1, -1, -1, -1, -1, -1, -1 },
75 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
76 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
77 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
78 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
79 { 1, 2, 0, -1, -1, -1, -1, -1, -1 },
80 { 0, 1, 2, -1, -1, -1, -1, -1, -1 },
81 { 1, 2, 0, 3, -1, -1, -1, -1, -1 },
82 { 0, 1, 2, 3, -1, -1, -1, -1, -1 },
83 { 1, 2, 0, 3, 4, -1, -1, -1, -1 },
84 { 2, 3, 0, 1, 4, 5, -1, -1, -1 },
85 { 1, 2, 0, 3, 4, 5, -1, -1, -1 },
86 { 0, 4, 5, 2, 3, 1, -1, -1, -1 },
87 { 3, 4, 1, 0, 2, 5, 6, -1, -1 },
88 { 2, 3, 5, 7, 0, 1, 4, 6, -1 },
89 { 3, 4, 1, 0, 2, 5, 7, 6, -1 },
94 int32_t history[MAX_CHANNELS][512]; /* This is a circular buffer */
95 int start[MAX_CHANNELS];
100 int scale_factor[MAX_CHANNELS][DCA_SUBBANDS_32];
101 int lfe_scale_factor;
102 int lfe_data[SUBFRAMES*SUBSUBFRAMES*4];
104 int a_mode; ///< audio channels arrangement
108 const int8_t *channel_order_tab; ///< channel reordering table, lfe and non lfe
110 int32_t pcm[FFMAX(LFE_INTERPOLATION, DCA_SUBBANDS_32)];
111 int32_t subband[PCM_SAMPLES][MAX_CHANNELS][DCA_SUBBANDS_32]; /* [sample][channel][subband] */
114 static int32_t cos_table[128];
116 static inline int32_t mul32(int32_t a, int32_t b)
118 int64_t r = (int64_t) a * b;
119 /* round the result before truncating - improves accuracy */
120 return (r + 0x80000000) >> 32;
123 /* Integer version of the cosine modulated Pseudo QMF */
125 static void qmf_init(void)
128 int32_t c[17], s[17];
129 s[0] = 0; /* sin(index * PI / 64) * 0x7fffffff */
130 c[0] = 0x7fffffff; /* cos(index * PI / 64) * 0x7fffffff */
132 for (i = 1; i <= 16; i++) {
133 s[i] = 2 * (mul32(c[i - 1], 105372028) + mul32(s[i - 1], 2144896908));
134 c[i] = 2 * (mul32(c[i - 1], 2144896908) - mul32(s[i - 1], 105372028));
137 for (i = 0; i < 16; i++) {
138 cos_table[i ] = c[i] >> 3; /* avoid output overflow */
139 cos_table[i + 16] = s[16 - i] >> 3;
140 cos_table[i + 32] = -s[i] >> 3;
141 cos_table[i + 48] = -c[16 - i] >> 3;
142 cos_table[i + 64] = -c[i] >> 3;
143 cos_table[i + 80] = -s[16 - i] >> 3;
144 cos_table[i + 96] = s[i] >> 3;
145 cos_table[i + 112] = c[16 - i] >> 3;
149 static int32_t band_delta_factor(int band, int sample_num)
151 int index = band * (2 * sample_num + 1);
155 return cos_table[index & 127];
158 static void add_new_samples(DCAContext *c, const int32_t *in,
159 int count, int channel)
163 /* Place new samples into the history buffer */
164 for (i = 0; i < count; i++) {
165 c->history[channel][c->start[channel] + i] = in[i];
166 av_assert0(c->start[channel] + i < 512);
168 c->start[channel] += count;
169 if (c->start[channel] == 512)
170 c->start[channel] = 0;
171 av_assert0(c->start[channel] < 512);
174 static void qmf_decompose(DCAContext *c, int32_t in[32], int32_t out[32],
179 int32_t accum[DCA_SUBBANDS_32] = {0};
181 add_new_samples(c, in, DCA_SUBBANDS_32, channel);
183 /* Calculate the dot product of the signal with the (possibly inverted)
184 reference decoder's response to this vector:
185 (0.0, 0.0, ..., 0.0, -1.0, 1.0, 0.0, ..., 0.0)
186 so that -1.0 cancels 1.0 from the previous step */
188 for (k = 48, j = 0, i = c->start[channel]; i < 512; k++, j++, i++)
189 accum[(k & 32) ? (31 - (k & 31)) : (k & 31)] += mul32(c->history[channel][i], UnQMF[j]);
190 for (i = 0; i < c->start[channel]; k++, j++, i++)
191 accum[(k & 32) ? (31 - (k & 31)) : (k & 31)] += mul32(c->history[channel][i], UnQMF[j]);
194 /* TODO: implement FFT instead of this naive calculation */
195 for (band = 0; band < DCA_SUBBANDS_32; band++) {
196 for (j = 0; j < 32; j++)
197 resp += mul32(accum[j], band_delta_factor(band, j));
199 out[band] = (band & 2) ? (-resp) : resp;
203 static int32_t lfe_fir_64i[512];
204 static int lfe_downsample(DCAContext *c, int32_t in[LFE_INTERPOLATION])
207 int channel = c->prim_channels;
210 add_new_samples(c, in, LFE_INTERPOLATION, channel);
211 for (i = c->start[channel], j = 0; i < 512; i++, j++)
212 accum += mul32(c->history[channel][i], lfe_fir_64i[j]);
213 for (i = 0; i < c->start[channel]; i++, j++)
214 accum += mul32(c->history[channel][i], lfe_fir_64i[j]);
218 static void init_lfe_fir(void)
220 static int initialized = 0;
225 for (i = 0; i < 512; i++)
226 lfe_fir_64i[i] = lfe_fir_64[i] * (1 << 25); //float -> int32_t
230 static void put_frame_header(DCAContext *c)
233 put_bits(&c->pb, 16, 0x7ffe);
234 put_bits(&c->pb, 16, 0x8001);
236 /* Frame type: normal */
237 put_bits(&c->pb, 1, 1);
239 /* Deficit sample count: none */
240 put_bits(&c->pb, 5, 31);
242 /* CRC is not present */
243 put_bits(&c->pb, 1, 0);
245 /* Number of PCM sample blocks */
246 put_bits(&c->pb, 7, PCM_SAMPLES-1);
248 /* Primary frame byte size */
249 put_bits(&c->pb, 14, c->frame_size-1);
251 /* Audio channel arrangement: L + R (stereo) */
252 put_bits(&c->pb, 6, c->num_channel);
254 /* Core audio sampling frequency */
255 put_bits(&c->pb, 4, c->sample_rate_code);
257 /* Transmission bit rate: 1411.2 kbps */
258 put_bits(&c->pb, 5, 0x16); /* FIXME: magic number */
260 /* Embedded down mix: disabled */
261 put_bits(&c->pb, 1, 0);
263 /* Embedded dynamic range flag: not present */
264 put_bits(&c->pb, 1, 0);
266 /* Embedded time stamp flag: not present */
267 put_bits(&c->pb, 1, 0);
269 /* Auxiliary data flag: not present */
270 put_bits(&c->pb, 1, 0);
272 /* HDCD source: no */
273 put_bits(&c->pb, 1, 0);
275 /* Extension audio ID: N/A */
276 put_bits(&c->pb, 3, 0);
278 /* Extended audio data: not present */
279 put_bits(&c->pb, 1, 0);
281 /* Audio sync word insertion flag: after each sub-frame */
282 put_bits(&c->pb, 1, 0);
284 /* Low frequency effects flag: not present or interpolation factor=64 */
285 put_bits(&c->pb, 2, c->lfe_state);
287 /* Predictor history switch flag: on */
288 put_bits(&c->pb, 1, 1);
291 /* Multirate interpolator switch: non-perfect reconstruction */
292 put_bits(&c->pb, 1, 0);
294 /* Encoder software revision: 7 */
295 put_bits(&c->pb, 4, 7);
297 /* Copy history: 0 */
298 put_bits(&c->pb, 2, 0);
300 /* Source PCM resolution: 16 bits, not DTS ES */
301 put_bits(&c->pb, 3, 0);
303 /* Front sum/difference coding: no */
304 put_bits(&c->pb, 1, 0);
306 /* Surrounds sum/difference coding: no */
307 put_bits(&c->pb, 1, 0);
309 /* Dialog normalization: 0 dB */
310 put_bits(&c->pb, 4, 0);
313 static void put_primary_audio_header(DCAContext *c)
315 static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
316 static const int thr[11] = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
319 /* Number of subframes */
320 put_bits(&c->pb, 4, SUBFRAMES - 1);
322 /* Number of primary audio channels */
323 put_bits(&c->pb, 3, c->prim_channels - 1);
325 /* Subband activity count */
326 for (ch = 0; ch < c->prim_channels; ch++)
327 put_bits(&c->pb, 5, DCA_SUBBANDS - 2);
329 /* High frequency VQ start subband */
330 for (ch = 0; ch < c->prim_channels; ch++)
331 put_bits(&c->pb, 5, DCA_SUBBANDS - 1);
333 /* Joint intensity coding index: 0, 0 */
334 for (ch = 0; ch < c->prim_channels; ch++)
335 put_bits(&c->pb, 3, 0);
337 /* Transient mode codebook: A4, A4 (arbitrary) */
338 for (ch = 0; ch < c->prim_channels; ch++)
339 put_bits(&c->pb, 2, 0);
341 /* Scale factor code book: 7 bit linear, 7-bit sqrt table (for each channel) */
342 for (ch = 0; ch < c->prim_channels; ch++)
343 put_bits(&c->pb, 3, 6);
345 /* Bit allocation quantizer select: linear 5-bit */
346 for (ch = 0; ch < c->prim_channels; ch++)
347 put_bits(&c->pb, 3, 6);
349 /* Quantization index codebook select: dummy data
350 to avoid transmission of scale factor adjustment */
352 for (i = 1; i < 11; i++)
353 for (ch = 0; ch < c->prim_channels; ch++)
354 put_bits(&c->pb, bitlen[i], thr[i]);
356 /* Scale factor adjustment index: not transmitted */
360 * 8-23 bits quantization
364 static inline uint32_t quantize(int32_t sample, int bits)
366 av_assert0(sample < 1 << (bits - 1));
367 av_assert0(sample >= -(1 << (bits - 1)));
368 return sample & ((1 << bits) - 1);
371 static inline int find_scale_factor7(int64_t max_value, int bits)
373 int i = 0, j = 128, q;
374 max_value = ((max_value << 15) / lossy_quant[bits + 3]) >> (bits - 1);
377 if (max_value < scale_factor_quant7[q])
386 static inline void put_sample7(DCAContext *c, int64_t sample, int bits,
389 sample = (sample << 15) / ((int64_t) lossy_quant[bits + 3] * scale_factor_quant7[scale_factor]);
390 put_bits(&c->pb, bits, quantize((int) sample, bits));
393 static void put_subframe(DCAContext *c,
394 int32_t subband_data[8 * SUBSUBFRAMES][MAX_CHANNELS][32],
397 int i, sub, ss, ch, max_value;
398 int32_t *lfe_data = c->lfe_data + 4 * SUBSUBFRAMES * subframe;
400 /* Subsubframes count */
401 put_bits(&c->pb, 2, SUBSUBFRAMES -1);
403 /* Partial subsubframe sample count: dummy */
404 put_bits(&c->pb, 3, 0);
406 /* Prediction mode: no ADPCM, in each channel and subband */
407 for (ch = 0; ch < c->prim_channels; ch++)
408 for (sub = 0; sub < DCA_SUBBANDS; sub++)
409 put_bits(&c->pb, 1, 0);
411 /* Prediction VQ addres: not transmitted */
412 /* Bit allocation index */
413 for (ch = 0; ch < c->prim_channels; ch++)
414 for (sub = 0; sub < DCA_SUBBANDS; sub++)
415 put_bits(&c->pb, 5, QUANTIZER_BITS+3);
417 if (SUBSUBFRAMES > 1) {
418 /* Transition mode: none for each channel and subband */
419 for (ch = 0; ch < c->prim_channels; ch++)
420 for (sub = 0; sub < DCA_SUBBANDS; sub++)
421 put_bits(&c->pb, 1, 0); /* codebook A4 */
424 /* Determine scale_factor */
425 for (ch = 0; ch < c->prim_channels; ch++)
426 for (sub = 0; sub < DCA_SUBBANDS; sub++) {
428 for (i = 0; i < 8 * SUBSUBFRAMES; i++)
429 max_value = FFMAX(max_value, FFABS(subband_data[i][ch][sub]));
430 c->scale_factor[ch][sub] = find_scale_factor7(max_value, QUANTIZER_BITS);
433 if (c->lfe_channel) {
435 for (i = 0; i < 4 * SUBSUBFRAMES; i++)
436 max_value = FFMAX(max_value, FFABS(lfe_data[i]));
437 c->lfe_scale_factor = find_scale_factor7(max_value, LFE_BITS);
440 /* Scale factors: the same for each channel and subband,
441 encoded according to Table D.1.2 */
442 for (ch = 0; ch < c->prim_channels; ch++)
443 for (sub = 0; sub < DCA_SUBBANDS; sub++)
444 put_bits(&c->pb, 7, c->scale_factor[ch][sub]);
446 /* Joint subband scale factor codebook select: not transmitted */
447 /* Scale factors for joint subband coding: not transmitted */
448 /* Stereo down-mix coefficients: not transmitted */
449 /* Dynamic range coefficient: not transmitted */
450 /* Stde information CRC check word: not transmitted */
451 /* VQ encoded high frequency subbands: not transmitted */
454 if (c->lfe_channel) {
455 for (i = 0; i < 4 * SUBSUBFRAMES; i++)
456 put_sample7(c, lfe_data[i], LFE_BITS, c->lfe_scale_factor);
457 put_bits(&c->pb, 8, c->lfe_scale_factor);
460 /* Audio data (subsubframes) */
462 for (ss = 0; ss < SUBSUBFRAMES ; ss++)
463 for (ch = 0; ch < c->prim_channels; ch++)
464 for (sub = 0; sub < DCA_SUBBANDS; sub++)
465 for (i = 0; i < 8; i++)
466 put_sample7(c, subband_data[ss * 8 + i][ch][sub], QUANTIZER_BITS, c->scale_factor[ch][sub]);
469 put_bits(&c->pb, 16, 0xffff);
472 static void put_frame(DCAContext *c,
473 int32_t subband_data[PCM_SAMPLES][MAX_CHANNELS][32],
477 init_put_bits(&c->pb, frame + DCA_HEADER_SIZE, DCA_MAX_FRAME_SIZE-DCA_HEADER_SIZE);
479 put_primary_audio_header(c);
480 for (i = 0; i < SUBFRAMES; i++)
481 put_subframe(c, &subband_data[SUBSUBFRAMES * 8 * i], i);
483 flush_put_bits(&c->pb);
484 c->frame_size = (put_bits_count(&c->pb) >> 3) + DCA_HEADER_SIZE;
486 init_put_bits(&c->pb, frame, DCA_HEADER_SIZE);
488 flush_put_bits(&c->pb);
491 static int encode_frame(AVCodecContext *avctx, uint8_t *frame,
492 int buf_size, void *data)
495 DCAContext *c = avctx->priv_data;
496 int16_t *samples = data;
497 int real_channel = 0;
499 for (i = 0; i < PCM_SAMPLES; i ++) { /* i is the decimated sample number */
500 for (channel = 0; channel < c->prim_channels + 1; channel++) {
501 /* Get 32 PCM samples */
502 for (k = 0; k < 32; k++) { /* k is the sample number in a 32-sample block */
503 c->pcm[k] = samples[avctx->channels * (32 * i + k) + channel] << 16;
505 /* Put subband samples into the proper place */
506 real_channel = c->channel_order_tab[channel];
507 if (real_channel >= 0) {
508 qmf_decompose(c, c->pcm, &c->subband[i][real_channel][0], real_channel);
513 if (c->lfe_channel) {
514 for (i = 0; i < PCM_SAMPLES / 2; i++) {
515 for (k = 0; k < LFE_INTERPOLATION; k++) /* k is the sample number in a 32-sample block */
516 c->pcm[k] = samples[avctx->channels * (LFE_INTERPOLATION*i+k) + c->lfe_offset] << 16;
517 c->lfe_data[i] = lfe_downsample(c, c->pcm);
521 put_frame(c, c->subband, frame);
523 return c->frame_size;
526 static int encode_init(AVCodecContext *avctx)
528 DCAContext *c = avctx->priv_data;
531 c->prim_channels = avctx->channels;
532 c->lfe_channel = (avctx->channels == 3 || avctx->channels == 6);
534 switch (avctx->channel_layout) {
535 case AV_CH_LAYOUT_STEREO: c->a_mode = 2; c->num_channel = 2; break;
536 case AV_CH_LAYOUT_5POINT0: c->a_mode = 9; c->num_channel = 9; break;
537 case AV_CH_LAYOUT_5POINT1: c->a_mode = 9; c->num_channel = 9; break;
538 case AV_CH_LAYOUT_5POINT0_BACK: c->a_mode = 9; c->num_channel = 9; break;
539 case AV_CH_LAYOUT_5POINT1_BACK: c->a_mode = 9; c->num_channel = 9; break;
541 av_log(avctx, AV_LOG_ERROR,
542 "Only stereo, 5.0, 5.1 channel layouts supported at the moment!\n");
543 return AVERROR_PATCHWELCOME;
546 if (c->lfe_channel) {
549 c->channel_order_tab = dca_channel_reorder_lfe[c->a_mode];
550 c->lfe_state = LFE_PRESENT;
551 c->lfe_offset = dca_lfe_index[c->a_mode];
553 c->channel_order_tab = dca_channel_reorder_nolfe[c->a_mode];
554 c->lfe_state = LFE_MISSING;
557 for (i = 0; i < 16; i++) {
558 if (dca_sample_rates[i] && (dca_sample_rates[i] == avctx->sample_rate))
562 av_log(avctx, AV_LOG_ERROR, "Sample rate %iHz not supported, only ", avctx->sample_rate);
563 for (i = 0; i < 16; i++)
564 av_log(avctx, AV_LOG_ERROR, "%d, ", dca_sample_rates[i]);
565 av_log(avctx, AV_LOG_ERROR, "supported.\n");
568 c->sample_rate_code = i;
570 avctx->frame_size = 32 * PCM_SAMPLES;
577 AVCodec ff_dca_encoder = {
579 .type = AVMEDIA_TYPE_AUDIO,
581 .priv_data_size = sizeof(DCAContext),
583 .encode = encode_frame,
584 .capabilities = CODEC_CAP_EXPERIMENTAL,
585 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
586 .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),