3 * Copyright (C) 2008 Alexander E. Patrakov
4 * 2010 Benjamin Larsson
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/channel_layout.h"
25 #include "libavutil/common.h"
26 #include "libavutil/avassert.h"
36 #define MAX_CHANNELS 6
37 #define DCA_SUBBANDS_32 32
38 #define DCA_MAX_FRAME_SIZE 16383
39 #define DCA_HEADER_SIZE 13
41 #define DCA_SUBBANDS 32 ///< Subband activity count
42 #define QUANTIZER_BITS 16
44 #define SUBSUBFRAMES 4
45 #define PCM_SAMPLES (SUBFRAMES*SUBSUBFRAMES*8)
47 #define LFE_INTERPOLATION 64
51 static const int8_t dca_lfe_index[] = {
52 1,2,2,2,2,3,2,3,2,3,2,3,1,3,2,3
55 static const int8_t dca_channel_reorder_lfe[][9] = {
56 { 0, -1, -1, -1, -1, -1, -1, -1, -1 },
57 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
58 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
59 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
60 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
61 { 1, 2, 0, -1, -1, -1, -1, -1, -1 },
62 { 0, 1, -1, 2, -1, -1, -1, -1, -1 },
63 { 1, 2, 0, -1, 3, -1, -1, -1, -1 },
64 { 0, 1, -1, 2, 3, -1, -1, -1, -1 },
65 { 1, 2, 0, -1, 3, 4, -1, -1, -1 },
66 { 2, 3, -1, 0, 1, 4, 5, -1, -1 },
67 { 1, 2, 0, -1, 3, 4, 5, -1, -1 },
68 { 0, -1, 4, 5, 2, 3, 1, -1, -1 },
69 { 3, 4, 1, -1, 0, 2, 5, 6, -1 },
70 { 2, 3, -1, 5, 7, 0, 1, 4, 6 },
71 { 3, 4, 1, -1, 0, 2, 5, 7, 6 },
74 static const int8_t dca_channel_reorder_nolfe[][9] = {
75 { 0, -1, -1, -1, -1, -1, -1, -1, -1 },
76 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
77 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
78 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
79 { 0, 1, -1, -1, -1, -1, -1, -1, -1 },
80 { 1, 2, 0, -1, -1, -1, -1, -1, -1 },
81 { 0, 1, 2, -1, -1, -1, -1, -1, -1 },
82 { 1, 2, 0, 3, -1, -1, -1, -1, -1 },
83 { 0, 1, 2, 3, -1, -1, -1, -1, -1 },
84 { 1, 2, 0, 3, 4, -1, -1, -1, -1 },
85 { 2, 3, 0, 1, 4, 5, -1, -1, -1 },
86 { 1, 2, 0, 3, 4, 5, -1, -1, -1 },
87 { 0, 4, 5, 2, 3, 1, -1, -1, -1 },
88 { 3, 4, 1, 0, 2, 5, 6, -1, -1 },
89 { 2, 3, 5, 7, 0, 1, 4, 6, -1 },
90 { 3, 4, 1, 0, 2, 5, 7, 6, -1 },
95 int32_t history[MAX_CHANNELS][512]; /* This is a circular buffer */
96 int start[MAX_CHANNELS];
100 int sample_rate_code;
101 int scale_factor[MAX_CHANNELS][DCA_SUBBANDS_32];
102 int lfe_scale_factor;
103 int lfe_data[SUBFRAMES*SUBSUBFRAMES*4];
105 int a_mode; ///< audio channels arrangement
109 const int8_t *channel_order_tab; ///< channel reordering table, lfe and non lfe
111 int32_t pcm[FFMAX(LFE_INTERPOLATION, DCA_SUBBANDS_32)];
112 int32_t subband[PCM_SAMPLES][MAX_CHANNELS][DCA_SUBBANDS_32]; /* [sample][channel][subband] */
115 static int32_t cos_table[128];
117 static inline int32_t mul32(int32_t a, int32_t b)
119 int64_t r = (int64_t) a * b;
120 /* round the result before truncating - improves accuracy */
121 return (r + 0x80000000) >> 32;
124 /* Integer version of the cosine modulated Pseudo QMF */
126 static void qmf_init(void)
129 int32_t c[17], s[17];
130 s[0] = 0; /* sin(index * PI / 64) * 0x7fffffff */
131 c[0] = 0x7fffffff; /* cos(index * PI / 64) * 0x7fffffff */
133 for (i = 1; i <= 16; i++) {
134 s[i] = 2 * (mul32(c[i - 1], 105372028) + mul32(s[i - 1], 2144896908));
135 c[i] = 2 * (mul32(c[i - 1], 2144896908) - mul32(s[i - 1], 105372028));
138 for (i = 0; i < 16; i++) {
139 cos_table[i ] = c[i] >> 3; /* avoid output overflow */
140 cos_table[i + 16] = s[16 - i] >> 3;
141 cos_table[i + 32] = -s[i] >> 3;
142 cos_table[i + 48] = -c[16 - i] >> 3;
143 cos_table[i + 64] = -c[i] >> 3;
144 cos_table[i + 80] = -s[16 - i] >> 3;
145 cos_table[i + 96] = s[i] >> 3;
146 cos_table[i + 112] = c[16 - i] >> 3;
150 static int32_t band_delta_factor(int band, int sample_num)
152 int index = band * (2 * sample_num + 1);
156 return cos_table[index & 127];
159 static void add_new_samples(DCAContext *c, const int32_t *in,
160 int count, int channel)
164 /* Place new samples into the history buffer */
165 for (i = 0; i < count; i++) {
166 c->history[channel][c->start[channel] + i] = in[i];
167 av_assert0(c->start[channel] + i < 512);
169 c->start[channel] += count;
170 if (c->start[channel] == 512)
171 c->start[channel] = 0;
172 av_assert0(c->start[channel] < 512);
175 static void qmf_decompose(DCAContext *c, int32_t in[32], int32_t out[32],
180 int32_t accum[DCA_SUBBANDS_32] = {0};
182 add_new_samples(c, in, DCA_SUBBANDS_32, channel);
184 /* Calculate the dot product of the signal with the (possibly inverted)
185 reference decoder's response to this vector:
186 (0.0, 0.0, ..., 0.0, -1.0, 1.0, 0.0, ..., 0.0)
187 so that -1.0 cancels 1.0 from the previous step */
189 for (k = 48, j = 0, i = c->start[channel]; i < 512; k++, j++, i++)
190 accum[(k & 32) ? (31 - (k & 31)) : (k & 31)] += mul32(c->history[channel][i], UnQMF[j]);
191 for (i = 0; i < c->start[channel]; k++, j++, i++)
192 accum[(k & 32) ? (31 - (k & 31)) : (k & 31)] += mul32(c->history[channel][i], UnQMF[j]);
195 /* TODO: implement FFT instead of this naive calculation */
196 for (band = 0; band < DCA_SUBBANDS_32; band++) {
197 for (j = 0; j < 32; j++)
198 resp += mul32(accum[j], band_delta_factor(band, j));
200 out[band] = (band & 2) ? (-resp) : resp;
204 static int32_t lfe_fir_64i[512];
205 static int lfe_downsample(DCAContext *c, int32_t in[LFE_INTERPOLATION])
208 int channel = c->prim_channels;
211 add_new_samples(c, in, LFE_INTERPOLATION, channel);
212 for (i = c->start[channel], j = 0; i < 512; i++, j++)
213 accum += mul32(c->history[channel][i], lfe_fir_64i[j]);
214 for (i = 0; i < c->start[channel]; i++, j++)
215 accum += mul32(c->history[channel][i], lfe_fir_64i[j]);
219 static void init_lfe_fir(void)
221 static int initialized = 0;
226 for (i = 0; i < 512; i++)
227 lfe_fir_64i[i] = lfe_fir_64[i] * (1 << 25); //float -> int32_t
231 static void put_frame_header(DCAContext *c)
234 put_bits(&c->pb, 16, 0x7ffe);
235 put_bits(&c->pb, 16, 0x8001);
237 /* Frame type: normal */
238 put_bits(&c->pb, 1, 1);
240 /* Deficit sample count: none */
241 put_bits(&c->pb, 5, 31);
243 /* CRC is not present */
244 put_bits(&c->pb, 1, 0);
246 /* Number of PCM sample blocks */
247 put_bits(&c->pb, 7, PCM_SAMPLES-1);
249 /* Primary frame byte size */
250 put_bits(&c->pb, 14, c->frame_size-1);
252 /* Audio channel arrangement: L + R (stereo) */
253 put_bits(&c->pb, 6, c->num_channel);
255 /* Core audio sampling frequency */
256 put_bits(&c->pb, 4, c->sample_rate_code);
258 /* Transmission bit rate: 1411.2 kbps */
259 put_bits(&c->pb, 5, 0x16); /* FIXME: magic number */
261 /* Embedded down mix: disabled */
262 put_bits(&c->pb, 1, 0);
264 /* Embedded dynamic range flag: not present */
265 put_bits(&c->pb, 1, 0);
267 /* Embedded time stamp flag: not present */
268 put_bits(&c->pb, 1, 0);
270 /* Auxiliary data flag: not present */
271 put_bits(&c->pb, 1, 0);
273 /* HDCD source: no */
274 put_bits(&c->pb, 1, 0);
276 /* Extension audio ID: N/A */
277 put_bits(&c->pb, 3, 0);
279 /* Extended audio data: not present */
280 put_bits(&c->pb, 1, 0);
282 /* Audio sync word insertion flag: after each sub-frame */
283 put_bits(&c->pb, 1, 0);
285 /* Low frequency effects flag: not present or interpolation factor=64 */
286 put_bits(&c->pb, 2, c->lfe_state);
288 /* Predictor history switch flag: on */
289 put_bits(&c->pb, 1, 1);
292 /* Multirate interpolator switch: non-perfect reconstruction */
293 put_bits(&c->pb, 1, 0);
295 /* Encoder software revision: 7 */
296 put_bits(&c->pb, 4, 7);
298 /* Copy history: 0 */
299 put_bits(&c->pb, 2, 0);
301 /* Source PCM resolution: 16 bits, not DTS ES */
302 put_bits(&c->pb, 3, 0);
304 /* Front sum/difference coding: no */
305 put_bits(&c->pb, 1, 0);
307 /* Surrounds sum/difference coding: no */
308 put_bits(&c->pb, 1, 0);
310 /* Dialog normalization: 0 dB */
311 put_bits(&c->pb, 4, 0);
314 static void put_primary_audio_header(DCAContext *c)
316 static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
317 static const int thr[11] = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
320 /* Number of subframes */
321 put_bits(&c->pb, 4, SUBFRAMES - 1);
323 /* Number of primary audio channels */
324 put_bits(&c->pb, 3, c->prim_channels - 1);
326 /* Subband activity count */
327 for (ch = 0; ch < c->prim_channels; ch++)
328 put_bits(&c->pb, 5, DCA_SUBBANDS - 2);
330 /* High frequency VQ start subband */
331 for (ch = 0; ch < c->prim_channels; ch++)
332 put_bits(&c->pb, 5, DCA_SUBBANDS - 1);
334 /* Joint intensity coding index: 0, 0 */
335 for (ch = 0; ch < c->prim_channels; ch++)
336 put_bits(&c->pb, 3, 0);
338 /* Transient mode codebook: A4, A4 (arbitrary) */
339 for (ch = 0; ch < c->prim_channels; ch++)
340 put_bits(&c->pb, 2, 0);
342 /* Scale factor code book: 7 bit linear, 7-bit sqrt table (for each channel) */
343 for (ch = 0; ch < c->prim_channels; ch++)
344 put_bits(&c->pb, 3, 6);
346 /* Bit allocation quantizer select: linear 5-bit */
347 for (ch = 0; ch < c->prim_channels; ch++)
348 put_bits(&c->pb, 3, 6);
350 /* Quantization index codebook select: dummy data
351 to avoid transmission of scale factor adjustment */
353 for (i = 1; i < 11; i++)
354 for (ch = 0; ch < c->prim_channels; ch++)
355 put_bits(&c->pb, bitlen[i], thr[i]);
357 /* Scale factor adjustment index: not transmitted */
361 * 8-23 bits quantization
365 static inline uint32_t quantize(int32_t sample, int bits)
367 av_assert0(sample < 1 << (bits - 1));
368 av_assert0(sample >= -(1 << (bits - 1)));
369 return sample & ((1 << bits) - 1);
372 static inline int find_scale_factor7(int64_t max_value, int bits)
374 int i = 0, j = 128, q;
375 max_value = ((max_value << 15) / lossy_quant[bits + 3]) >> (bits - 1);
378 if (max_value < scale_factor_quant7[q])
387 static inline void put_sample7(DCAContext *c, int64_t sample, int bits,
390 sample = (sample << 15) / ((int64_t) lossy_quant[bits + 3] * scale_factor_quant7[scale_factor]);
391 put_bits(&c->pb, bits, quantize((int) sample, bits));
394 static void put_subframe(DCAContext *c,
395 int32_t subband_data[8 * SUBSUBFRAMES][MAX_CHANNELS][32],
398 int i, sub, ss, ch, max_value;
399 int32_t *lfe_data = c->lfe_data + 4 * SUBSUBFRAMES * subframe;
401 /* Subsubframes count */
402 put_bits(&c->pb, 2, SUBSUBFRAMES -1);
404 /* Partial subsubframe sample count: dummy */
405 put_bits(&c->pb, 3, 0);
407 /* Prediction mode: no ADPCM, in each channel and subband */
408 for (ch = 0; ch < c->prim_channels; ch++)
409 for (sub = 0; sub < DCA_SUBBANDS; sub++)
410 put_bits(&c->pb, 1, 0);
412 /* Prediction VQ addres: not transmitted */
413 /* Bit allocation index */
414 for (ch = 0; ch < c->prim_channels; ch++)
415 for (sub = 0; sub < DCA_SUBBANDS; sub++)
416 put_bits(&c->pb, 5, QUANTIZER_BITS+3);
418 if (SUBSUBFRAMES > 1) {
419 /* Transition mode: none for each channel and subband */
420 for (ch = 0; ch < c->prim_channels; ch++)
421 for (sub = 0; sub < DCA_SUBBANDS; sub++)
422 put_bits(&c->pb, 1, 0); /* codebook A4 */
425 /* Determine scale_factor */
426 for (ch = 0; ch < c->prim_channels; ch++)
427 for (sub = 0; sub < DCA_SUBBANDS; sub++) {
429 for (i = 0; i < 8 * SUBSUBFRAMES; i++)
430 max_value = FFMAX(max_value, FFABS(subband_data[i][ch][sub]));
431 c->scale_factor[ch][sub] = find_scale_factor7(max_value, QUANTIZER_BITS);
434 if (c->lfe_channel) {
436 for (i = 0; i < 4 * SUBSUBFRAMES; i++)
437 max_value = FFMAX(max_value, FFABS(lfe_data[i]));
438 c->lfe_scale_factor = find_scale_factor7(max_value, LFE_BITS);
441 /* Scale factors: the same for each channel and subband,
442 encoded according to Table D.1.2 */
443 for (ch = 0; ch < c->prim_channels; ch++)
444 for (sub = 0; sub < DCA_SUBBANDS; sub++)
445 put_bits(&c->pb, 7, c->scale_factor[ch][sub]);
447 /* Joint subband scale factor codebook select: not transmitted */
448 /* Scale factors for joint subband coding: not transmitted */
449 /* Stereo down-mix coefficients: not transmitted */
450 /* Dynamic range coefficient: not transmitted */
451 /* Stde information CRC check word: not transmitted */
452 /* VQ encoded high frequency subbands: not transmitted */
455 if (c->lfe_channel) {
456 for (i = 0; i < 4 * SUBSUBFRAMES; i++)
457 put_sample7(c, lfe_data[i], LFE_BITS, c->lfe_scale_factor);
458 put_bits(&c->pb, 8, c->lfe_scale_factor);
461 /* Audio data (subsubframes) */
463 for (ss = 0; ss < SUBSUBFRAMES ; ss++)
464 for (ch = 0; ch < c->prim_channels; ch++)
465 for (sub = 0; sub < DCA_SUBBANDS; sub++)
466 for (i = 0; i < 8; i++)
467 put_sample7(c, subband_data[ss * 8 + i][ch][sub], QUANTIZER_BITS, c->scale_factor[ch][sub]);
470 put_bits(&c->pb, 16, 0xffff);
473 static void put_frame(DCAContext *c,
474 int32_t subband_data[PCM_SAMPLES][MAX_CHANNELS][32],
478 init_put_bits(&c->pb, frame + DCA_HEADER_SIZE, DCA_MAX_FRAME_SIZE-DCA_HEADER_SIZE);
480 put_primary_audio_header(c);
481 for (i = 0; i < SUBFRAMES; i++)
482 put_subframe(c, &subband_data[SUBSUBFRAMES * 8 * i], i);
484 flush_put_bits(&c->pb);
485 c->frame_size = (put_bits_count(&c->pb) >> 3) + DCA_HEADER_SIZE;
487 init_put_bits(&c->pb, frame, DCA_HEADER_SIZE);
489 flush_put_bits(&c->pb);
492 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
493 const AVFrame *frame, int *got_packet_ptr)
496 DCAContext *c = avctx->priv_data;
497 const int16_t *samples;
498 int ret, real_channel = 0;
500 if ((ret = ff_alloc_packet2(avctx, avpkt, DCA_MAX_FRAME_SIZE + DCA_HEADER_SIZE)))
503 samples = (const int16_t *)frame->data[0];
504 for (i = 0; i < PCM_SAMPLES; i ++) { /* i is the decimated sample number */
505 for (channel = 0; channel < c->prim_channels + 1; channel++) {
506 real_channel = c->channel_order_tab[channel];
507 if (real_channel >= 0) {
508 /* Get 32 PCM samples */
509 for (k = 0; k < 32; k++) { /* k is the sample number in a 32-sample block */
510 c->pcm[k] = samples[avctx->channels * (32 * i + k) + channel] << 16;
512 /* Put subband samples into the proper place */
513 qmf_decompose(c, c->pcm, &c->subband[i][real_channel][0], real_channel);
518 if (c->lfe_channel) {
519 for (i = 0; i < PCM_SAMPLES / 2; i++) {
520 for (k = 0; k < LFE_INTERPOLATION; k++) /* k is the sample number in a 32-sample block */
521 c->pcm[k] = samples[avctx->channels * (LFE_INTERPOLATION*i+k) + c->lfe_offset] << 16;
522 c->lfe_data[i] = lfe_downsample(c, c->pcm);
526 put_frame(c, c->subband, avpkt->data);
528 avpkt->size = c->frame_size;
533 static int encode_init(AVCodecContext *avctx)
535 DCAContext *c = avctx->priv_data;
537 uint64_t layout = avctx->channel_layout;
539 c->prim_channels = avctx->channels;
540 c->lfe_channel = (avctx->channels == 3 || avctx->channels == 6);
543 av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The "
544 "encoder will guess the layout, but it "
545 "might be incorrect.\n");
546 layout = av_get_default_channel_layout(avctx->channels);
549 case AV_CH_LAYOUT_STEREO: c->a_mode = 2; c->num_channel = 2; break;
550 case AV_CH_LAYOUT_5POINT0: c->a_mode = 9; c->num_channel = 9; break;
551 case AV_CH_LAYOUT_5POINT1: c->a_mode = 9; c->num_channel = 9; break;
552 case AV_CH_LAYOUT_5POINT0_BACK: c->a_mode = 9; c->num_channel = 9; break;
553 case AV_CH_LAYOUT_5POINT1_BACK: c->a_mode = 9; c->num_channel = 9; break;
555 av_log(avctx, AV_LOG_ERROR,
556 "Only stereo, 5.0, 5.1 channel layouts supported at the moment!\n");
557 return AVERROR_PATCHWELCOME;
560 if (c->lfe_channel) {
563 c->channel_order_tab = dca_channel_reorder_lfe[c->a_mode];
564 c->lfe_state = LFE_PRESENT;
565 c->lfe_offset = dca_lfe_index[c->a_mode];
567 c->channel_order_tab = dca_channel_reorder_nolfe[c->a_mode];
568 c->lfe_state = LFE_MISSING;
571 for (i = 0; i < 16; i++) {
572 if (avpriv_dca_sample_rates[i] && (avpriv_dca_sample_rates[i] == avctx->sample_rate))
576 av_log(avctx, AV_LOG_ERROR, "Sample rate %iHz not supported, only ", avctx->sample_rate);
577 for (i = 0; i < 16; i++)
578 av_log(avctx, AV_LOG_ERROR, "%d, ", avpriv_dca_sample_rates[i]);
579 av_log(avctx, AV_LOG_ERROR, "supported.\n");
582 c->sample_rate_code = i;
584 avctx->frame_size = 32 * PCM_SAMPLES;
591 AVCodec ff_dca_encoder = {
593 .type = AVMEDIA_TYPE_AUDIO,
594 .id = AV_CODEC_ID_DTS,
595 .priv_data_size = sizeof(DCAContext),
597 .encode2 = encode_frame,
598 .capabilities = CODEC_CAP_EXPERIMENTAL,
599 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
600 AV_SAMPLE_FMT_NONE },
601 .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),