3 * Copyright (C) 2008 Alexander E. Patrakov
4 * 2010 Benjamin Larsson
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/common.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/audioconvert.h"
37 #define MAX_CHANNELS 6
38 #define DCA_SUBBANDS_32 32
39 #define DCA_MAX_FRAME_SIZE 16383
40 #define DCA_HEADER_SIZE 13
42 #define DCA_SUBBANDS 32 ///< Subband activity count
43 #define QUANTIZER_BITS 16
45 #define SUBSUBFRAMES 4
46 #define PCM_SAMPLES (SUBFRAMES*SUBSUBFRAMES*8)
48 #define LFE_INTERPOLATION 64
52 static const int8_t dca_lfe_index[] = {
53 1,2,2,2,2,3,2,3,2,3,2,3,1,3,2,3
56 static const int8_t dca_channel_reorder_lfe[][9] = {
57 { 0, -1, -1, -1, -1, -1, -1, -1, -1 },
58 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
59 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
60 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
61 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
62 { 1, 2, 0, -1, -1, -1, -1, -1, -1 },
63 { 0, 1, -1, 2, -1, -1, -1, -1, -1 },
64 { 1, 2, 0, -1, 3, -1, -1, -1, -1 },
65 { 0, 1, -1, 2, 3, -1, -1, -1, -1 },
66 { 1, 2, 0, -1, 3, 4, -1, -1, -1 },
67 { 2, 3, -1, 0, 1, 4, 5, -1, -1 },
68 { 1, 2, 0, -1, 3, 4, 5, -1, -1 },
69 { 0, -1, 4, 5, 2, 3, 1, -1, -1 },
70 { 3, 4, 1, -1, 0, 2, 5, 6, -1 },
71 { 2, 3, -1, 5, 7, 0, 1, 4, 6 },
72 { 3, 4, 1, -1, 0, 2, 5, 7, 6 },
75 static const int8_t dca_channel_reorder_nolfe[][9] = {
76 { 0, -1, -1, -1, -1, -1, -1, -1, -1 },
77 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
78 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
79 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
80 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
81 { 1, 2, 0, -1, -1, -1, -1, -1, -1 },
82 { 0, 1, 2, -1, -1, -1, -1, -1, -1 },
83 { 1, 2, 0, 3, -1, -1, -1, -1, -1 },
84 { 0, 1, 2, 3, -1, -1, -1, -1, -1 },
85 { 1, 2, 0, 3, 4, -1, -1, -1, -1 },
86 { 2, 3, 0, 1, 4, 5, -1, -1, -1 },
87 { 1, 2, 0, 3, 4, 5, -1, -1, -1 },
88 { 0, 4, 5, 2, 3, 1, -1, -1, -1 },
89 { 3, 4, 1, 0, 2, 5, 6, -1, -1 },
90 { 2, 3, 5, 7, 0, 1, 4, 6, -1 },
91 { 3, 4, 1, 0, 2, 5, 7, 6, -1 },
96 int32_t history[MAX_CHANNELS][512]; /* This is a circular buffer */
97 int start[MAX_CHANNELS];
101 int sample_rate_code;
102 int scale_factor[MAX_CHANNELS][DCA_SUBBANDS_32];
103 int lfe_scale_factor;
104 int lfe_data[SUBFRAMES*SUBSUBFRAMES*4];
106 int a_mode; ///< audio channels arrangement
110 const int8_t *channel_order_tab; ///< channel reordering table, lfe and non lfe
112 int32_t pcm[FFMAX(LFE_INTERPOLATION, DCA_SUBBANDS_32)];
113 int32_t subband[PCM_SAMPLES][MAX_CHANNELS][DCA_SUBBANDS_32]; /* [sample][channel][subband] */
116 static int32_t cos_table[128];
118 static inline int32_t mul32(int32_t a, int32_t b)
120 int64_t r = (int64_t) a * b;
121 /* round the result before truncating - improves accuracy */
122 return (r + 0x80000000) >> 32;
125 /* Integer version of the cosine modulated Pseudo QMF */
127 static void qmf_init(void)
130 int32_t c[17], s[17];
131 s[0] = 0; /* sin(index * PI / 64) * 0x7fffffff */
132 c[0] = 0x7fffffff; /* cos(index * PI / 64) * 0x7fffffff */
134 for (i = 1; i <= 16; i++) {
135 s[i] = 2 * (mul32(c[i - 1], 105372028) + mul32(s[i - 1], 2144896908));
136 c[i] = 2 * (mul32(c[i - 1], 2144896908) - mul32(s[i - 1], 105372028));
139 for (i = 0; i < 16; i++) {
140 cos_table[i ] = c[i] >> 3; /* avoid output overflow */
141 cos_table[i + 16] = s[16 - i] >> 3;
142 cos_table[i + 32] = -s[i] >> 3;
143 cos_table[i + 48] = -c[16 - i] >> 3;
144 cos_table[i + 64] = -c[i] >> 3;
145 cos_table[i + 80] = -s[16 - i] >> 3;
146 cos_table[i + 96] = s[i] >> 3;
147 cos_table[i + 112] = c[16 - i] >> 3;
151 static int32_t band_delta_factor(int band, int sample_num)
153 int index = band * (2 * sample_num + 1);
157 return cos_table[index & 127];
160 static void add_new_samples(DCAContext *c, const int32_t *in,
161 int count, int channel)
165 /* Place new samples into the history buffer */
166 for (i = 0; i < count; i++) {
167 c->history[channel][c->start[channel] + i] = in[i];
168 av_assert0(c->start[channel] + i < 512);
170 c->start[channel] += count;
171 if (c->start[channel] == 512)
172 c->start[channel] = 0;
173 av_assert0(c->start[channel] < 512);
176 static void qmf_decompose(DCAContext *c, int32_t in[32], int32_t out[32],
181 int32_t accum[DCA_SUBBANDS_32] = {0};
183 add_new_samples(c, in, DCA_SUBBANDS_32, channel);
185 /* Calculate the dot product of the signal with the (possibly inverted)
186 reference decoder's response to this vector:
187 (0.0, 0.0, ..., 0.0, -1.0, 1.0, 0.0, ..., 0.0)
188 so that -1.0 cancels 1.0 from the previous step */
190 for (k = 48, j = 0, i = c->start[channel]; i < 512; k++, j++, i++)
191 accum[(k & 32) ? (31 - (k & 31)) : (k & 31)] += mul32(c->history[channel][i], UnQMF[j]);
192 for (i = 0; i < c->start[channel]; k++, j++, i++)
193 accum[(k & 32) ? (31 - (k & 31)) : (k & 31)] += mul32(c->history[channel][i], UnQMF[j]);
196 /* TODO: implement FFT instead of this naive calculation */
197 for (band = 0; band < DCA_SUBBANDS_32; band++) {
198 for (j = 0; j < 32; j++)
199 resp += mul32(accum[j], band_delta_factor(band, j));
201 out[band] = (band & 2) ? (-resp) : resp;
205 static int32_t lfe_fir_64i[512];
206 static int lfe_downsample(DCAContext *c, int32_t in[LFE_INTERPOLATION])
209 int channel = c->prim_channels;
212 add_new_samples(c, in, LFE_INTERPOLATION, channel);
213 for (i = c->start[channel], j = 0; i < 512; i++, j++)
214 accum += mul32(c->history[channel][i], lfe_fir_64i[j]);
215 for (i = 0; i < c->start[channel]; i++, j++)
216 accum += mul32(c->history[channel][i], lfe_fir_64i[j]);
220 static void init_lfe_fir(void)
222 static int initialized = 0;
227 for (i = 0; i < 512; i++)
228 lfe_fir_64i[i] = lfe_fir_64[i] * (1 << 25); //float -> int32_t
232 static void put_frame_header(DCAContext *c)
235 put_bits(&c->pb, 16, 0x7ffe);
236 put_bits(&c->pb, 16, 0x8001);
238 /* Frame type: normal */
239 put_bits(&c->pb, 1, 1);
241 /* Deficit sample count: none */
242 put_bits(&c->pb, 5, 31);
244 /* CRC is not present */
245 put_bits(&c->pb, 1, 0);
247 /* Number of PCM sample blocks */
248 put_bits(&c->pb, 7, PCM_SAMPLES-1);
250 /* Primary frame byte size */
251 put_bits(&c->pb, 14, c->frame_size-1);
253 /* Audio channel arrangement: L + R (stereo) */
254 put_bits(&c->pb, 6, c->num_channel);
256 /* Core audio sampling frequency */
257 put_bits(&c->pb, 4, c->sample_rate_code);
259 /* Transmission bit rate: 1411.2 kbps */
260 put_bits(&c->pb, 5, 0x16); /* FIXME: magic number */
262 /* Embedded down mix: disabled */
263 put_bits(&c->pb, 1, 0);
265 /* Embedded dynamic range flag: not present */
266 put_bits(&c->pb, 1, 0);
268 /* Embedded time stamp flag: not present */
269 put_bits(&c->pb, 1, 0);
271 /* Auxiliary data flag: not present */
272 put_bits(&c->pb, 1, 0);
274 /* HDCD source: no */
275 put_bits(&c->pb, 1, 0);
277 /* Extension audio ID: N/A */
278 put_bits(&c->pb, 3, 0);
280 /* Extended audio data: not present */
281 put_bits(&c->pb, 1, 0);
283 /* Audio sync word insertion flag: after each sub-frame */
284 put_bits(&c->pb, 1, 0);
286 /* Low frequency effects flag: not present or interpolation factor=64 */
287 put_bits(&c->pb, 2, c->lfe_state);
289 /* Predictor history switch flag: on */
290 put_bits(&c->pb, 1, 1);
293 /* Multirate interpolator switch: non-perfect reconstruction */
294 put_bits(&c->pb, 1, 0);
296 /* Encoder software revision: 7 */
297 put_bits(&c->pb, 4, 7);
299 /* Copy history: 0 */
300 put_bits(&c->pb, 2, 0);
302 /* Source PCM resolution: 16 bits, not DTS ES */
303 put_bits(&c->pb, 3, 0);
305 /* Front sum/difference coding: no */
306 put_bits(&c->pb, 1, 0);
308 /* Surrounds sum/difference coding: no */
309 put_bits(&c->pb, 1, 0);
311 /* Dialog normalization: 0 dB */
312 put_bits(&c->pb, 4, 0);
315 static void put_primary_audio_header(DCAContext *c)
317 static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
318 static const int thr[11] = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
321 /* Number of subframes */
322 put_bits(&c->pb, 4, SUBFRAMES - 1);
324 /* Number of primary audio channels */
325 put_bits(&c->pb, 3, c->prim_channels - 1);
327 /* Subband activity count */
328 for (ch = 0; ch < c->prim_channels; ch++)
329 put_bits(&c->pb, 5, DCA_SUBBANDS - 2);
331 /* High frequency VQ start subband */
332 for (ch = 0; ch < c->prim_channels; ch++)
333 put_bits(&c->pb, 5, DCA_SUBBANDS - 1);
335 /* Joint intensity coding index: 0, 0 */
336 for (ch = 0; ch < c->prim_channels; ch++)
337 put_bits(&c->pb, 3, 0);
339 /* Transient mode codebook: A4, A4 (arbitrary) */
340 for (ch = 0; ch < c->prim_channels; ch++)
341 put_bits(&c->pb, 2, 0);
343 /* Scale factor code book: 7 bit linear, 7-bit sqrt table (for each channel) */
344 for (ch = 0; ch < c->prim_channels; ch++)
345 put_bits(&c->pb, 3, 6);
347 /* Bit allocation quantizer select: linear 5-bit */
348 for (ch = 0; ch < c->prim_channels; ch++)
349 put_bits(&c->pb, 3, 6);
351 /* Quantization index codebook select: dummy data
352 to avoid transmission of scale factor adjustment */
354 for (i = 1; i < 11; i++)
355 for (ch = 0; ch < c->prim_channels; ch++)
356 put_bits(&c->pb, bitlen[i], thr[i]);
358 /* Scale factor adjustment index: not transmitted */
362 * 8-23 bits quantization
366 static inline uint32_t quantize(int32_t sample, int bits)
368 av_assert0(sample < 1 << (bits - 1));
369 av_assert0(sample >= -(1 << (bits - 1)));
370 return sample & ((1 << bits) - 1);
373 static inline int find_scale_factor7(int64_t max_value, int bits)
375 int i = 0, j = 128, q;
376 max_value = ((max_value << 15) / lossy_quant[bits + 3]) >> (bits - 1);
379 if (max_value < scale_factor_quant7[q])
388 static inline void put_sample7(DCAContext *c, int64_t sample, int bits,
391 sample = (sample << 15) / ((int64_t) lossy_quant[bits + 3] * scale_factor_quant7[scale_factor]);
392 put_bits(&c->pb, bits, quantize((int) sample, bits));
395 static void put_subframe(DCAContext *c,
396 int32_t subband_data[8 * SUBSUBFRAMES][MAX_CHANNELS][32],
399 int i, sub, ss, ch, max_value;
400 int32_t *lfe_data = c->lfe_data + 4 * SUBSUBFRAMES * subframe;
402 /* Subsubframes count */
403 put_bits(&c->pb, 2, SUBSUBFRAMES -1);
405 /* Partial subsubframe sample count: dummy */
406 put_bits(&c->pb, 3, 0);
408 /* Prediction mode: no ADPCM, in each channel and subband */
409 for (ch = 0; ch < c->prim_channels; ch++)
410 for (sub = 0; sub < DCA_SUBBANDS; sub++)
411 put_bits(&c->pb, 1, 0);
413 /* Prediction VQ addres: not transmitted */
414 /* Bit allocation index */
415 for (ch = 0; ch < c->prim_channels; ch++)
416 for (sub = 0; sub < DCA_SUBBANDS; sub++)
417 put_bits(&c->pb, 5, QUANTIZER_BITS+3);
419 if (SUBSUBFRAMES > 1) {
420 /* Transition mode: none for each channel and subband */
421 for (ch = 0; ch < c->prim_channels; ch++)
422 for (sub = 0; sub < DCA_SUBBANDS; sub++)
423 put_bits(&c->pb, 1, 0); /* codebook A4 */
426 /* Determine scale_factor */
427 for (ch = 0; ch < c->prim_channels; ch++)
428 for (sub = 0; sub < DCA_SUBBANDS; sub++) {
430 for (i = 0; i < 8 * SUBSUBFRAMES; i++)
431 max_value = FFMAX(max_value, FFABS(subband_data[i][ch][sub]));
432 c->scale_factor[ch][sub] = find_scale_factor7(max_value, QUANTIZER_BITS);
435 if (c->lfe_channel) {
437 for (i = 0; i < 4 * SUBSUBFRAMES; i++)
438 max_value = FFMAX(max_value, FFABS(lfe_data[i]));
439 c->lfe_scale_factor = find_scale_factor7(max_value, LFE_BITS);
442 /* Scale factors: the same for each channel and subband,
443 encoded according to Table D.1.2 */
444 for (ch = 0; ch < c->prim_channels; ch++)
445 for (sub = 0; sub < DCA_SUBBANDS; sub++)
446 put_bits(&c->pb, 7, c->scale_factor[ch][sub]);
448 /* Joint subband scale factor codebook select: not transmitted */
449 /* Scale factors for joint subband coding: not transmitted */
450 /* Stereo down-mix coefficients: not transmitted */
451 /* Dynamic range coefficient: not transmitted */
452 /* Stde information CRC check word: not transmitted */
453 /* VQ encoded high frequency subbands: not transmitted */
456 if (c->lfe_channel) {
457 for (i = 0; i < 4 * SUBSUBFRAMES; i++)
458 put_sample7(c, lfe_data[i], LFE_BITS, c->lfe_scale_factor);
459 put_bits(&c->pb, 8, c->lfe_scale_factor);
462 /* Audio data (subsubframes) */
464 for (ss = 0; ss < SUBSUBFRAMES ; ss++)
465 for (ch = 0; ch < c->prim_channels; ch++)
466 for (sub = 0; sub < DCA_SUBBANDS; sub++)
467 for (i = 0; i < 8; i++)
468 put_sample7(c, subband_data[ss * 8 + i][ch][sub], QUANTIZER_BITS, c->scale_factor[ch][sub]);
471 put_bits(&c->pb, 16, 0xffff);
474 static void put_frame(DCAContext *c,
475 int32_t subband_data[PCM_SAMPLES][MAX_CHANNELS][32],
479 init_put_bits(&c->pb, frame + DCA_HEADER_SIZE, DCA_MAX_FRAME_SIZE-DCA_HEADER_SIZE);
481 put_primary_audio_header(c);
482 for (i = 0; i < SUBFRAMES; i++)
483 put_subframe(c, &subband_data[SUBSUBFRAMES * 8 * i], i);
485 flush_put_bits(&c->pb);
486 c->frame_size = (put_bits_count(&c->pb) >> 3) + DCA_HEADER_SIZE;
488 init_put_bits(&c->pb, frame, DCA_HEADER_SIZE);
490 flush_put_bits(&c->pb);
493 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
494 const AVFrame *frame, int *got_packet_ptr)
497 DCAContext *c = avctx->priv_data;
498 const int16_t *samples;
499 int ret, real_channel = 0;
501 if ((ret = ff_alloc_packet2(avctx, avpkt, DCA_MAX_FRAME_SIZE + DCA_HEADER_SIZE)))
504 samples = (const int16_t *)frame->data[0];
505 for (i = 0; i < PCM_SAMPLES; i ++) { /* i is the decimated sample number */
506 for (channel = 0; channel < c->prim_channels + 1; channel++) {
507 real_channel = c->channel_order_tab[channel];
508 if (real_channel >= 0) {
509 /* Get 32 PCM samples */
510 for (k = 0; k < 32; k++) { /* k is the sample number in a 32-sample block */
511 c->pcm[k] = samples[avctx->channels * (32 * i + k) + channel] << 16;
513 /* Put subband samples into the proper place */
514 qmf_decompose(c, c->pcm, &c->subband[i][real_channel][0], real_channel);
519 if (c->lfe_channel) {
520 for (i = 0; i < PCM_SAMPLES / 2; i++) {
521 for (k = 0; k < LFE_INTERPOLATION; k++) /* k is the sample number in a 32-sample block */
522 c->pcm[k] = samples[avctx->channels * (LFE_INTERPOLATION*i+k) + c->lfe_offset] << 16;
523 c->lfe_data[i] = lfe_downsample(c, c->pcm);
527 put_frame(c, c->subband, avpkt->data);
529 avpkt->size = c->frame_size;
534 static int encode_init(AVCodecContext *avctx)
536 DCAContext *c = avctx->priv_data;
538 uint64_t layout = avctx->channel_layout;
540 c->prim_channels = avctx->channels;
541 c->lfe_channel = (avctx->channels == 3 || avctx->channels == 6);
544 av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The "
545 "encoder will guess the layout, but it "
546 "might be incorrect.\n");
547 layout = av_get_default_channel_layout(avctx->channels);
550 case AV_CH_LAYOUT_STEREO: c->a_mode = 2; c->num_channel = 2; break;
551 case AV_CH_LAYOUT_5POINT0: c->a_mode = 9; c->num_channel = 9; break;
552 case AV_CH_LAYOUT_5POINT1: c->a_mode = 9; c->num_channel = 9; break;
553 case AV_CH_LAYOUT_5POINT0_BACK: c->a_mode = 9; c->num_channel = 9; break;
554 case AV_CH_LAYOUT_5POINT1_BACK: c->a_mode = 9; c->num_channel = 9; break;
556 av_log(avctx, AV_LOG_ERROR,
557 "Only stereo, 5.0, 5.1 channel layouts supported at the moment!\n");
558 return AVERROR_PATCHWELCOME;
561 if (c->lfe_channel) {
564 c->channel_order_tab = dca_channel_reorder_lfe[c->a_mode];
565 c->lfe_state = LFE_PRESENT;
566 c->lfe_offset = dca_lfe_index[c->a_mode];
568 c->channel_order_tab = dca_channel_reorder_nolfe[c->a_mode];
569 c->lfe_state = LFE_MISSING;
572 for (i = 0; i < 16; i++) {
573 if (avpriv_dca_sample_rates[i] && (avpriv_dca_sample_rates[i] == avctx->sample_rate))
577 av_log(avctx, AV_LOG_ERROR, "Sample rate %iHz not supported, only ", avctx->sample_rate);
578 for (i = 0; i < 16; i++)
579 av_log(avctx, AV_LOG_ERROR, "%d, ", avpriv_dca_sample_rates[i]);
580 av_log(avctx, AV_LOG_ERROR, "supported.\n");
583 c->sample_rate_code = i;
585 avctx->frame_size = 32 * PCM_SAMPLES;
592 AVCodec ff_dca_encoder = {
594 .type = AVMEDIA_TYPE_AUDIO,
595 .id = AV_CODEC_ID_DTS,
596 .priv_data_size = sizeof(DCAContext),
598 .encode2 = encode_frame,
599 .capabilities = CODEC_CAP_EXPERIMENTAL,
600 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
601 AV_SAMPLE_FMT_NONE },
602 .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),