3 * Copyright (c) 2007 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
4 * Copyright (c) 2008 Justin Ruggles
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * There are several features of E-AC-3 that this decoder does not yet support.
27 * No known samples exist. If any ever surface, this feature should not be
28 * too difficult to implement.
30 * Reduced Sample Rates
31 * No known samples exist. The spec also does not give clear information
32 * on how this is to be implemented.
35 * Only the independent stream is currently decoded. Any dependent
36 * streams are skipped. We have only come across two examples of this, and
37 * they are both just test streams, one for HD-DVD and the other for
40 * Transient Pre-noise Processing
41 * This is side information which a decoder should use to reduce artifacts
42 * caused by transients. There are samples which are known to have this
43 * information, but this decoder currently ignores it.
49 #include "aac_ac3_parser.h"
52 #include "ac3dec_data.h"
53 #include "eac3_data.h"
55 /** gain adaptive quantization mode */
63 #define EAC3_SR_CODE_REDUCED 3
65 static void ff_eac3_apply_spectral_extension(AC3DecodeContext *s)
68 uint8_t wrapflag[SPX_MAX_BANDS]={1,0,}, num_copy_sections, copy_sizes[SPX_MAX_BANDS];
69 float rms_energy[SPX_MAX_BANDS];
71 /* Set copy index mapping table. Set wrap flags to apply a notch filter at
72 wrap points later on. */
73 bin = s->spx_dst_start_freq;
74 num_copy_sections = 0;
75 for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
77 int bandsize = s->spx_band_sizes[bnd];
78 if (bin + bandsize > s->spx_src_start_freq) {
79 copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
80 bin = s->spx_dst_start_freq;
83 for (i = 0; i < bandsize; i += copysize) {
84 if (bin == s->spx_src_start_freq) {
85 copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
86 bin = s->spx_dst_start_freq;
88 copysize = FFMIN(bandsize - i, s->spx_src_start_freq - bin);
92 copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
94 for (ch = 1; ch <= s->fbw_channels; ch++) {
95 if (!s->channel_uses_spx[ch])
98 /* Copy coeffs from normal bands to extension bands */
99 bin = s->spx_src_start_freq;
100 for (i = 0; i < num_copy_sections; i++) {
101 memcpy(&s->transform_coeffs[ch][bin],
102 &s->transform_coeffs[ch][s->spx_dst_start_freq],
103 copy_sizes[i]*sizeof(INTFLOAT));
104 bin += copy_sizes[i];
107 /* Calculate RMS energy for each SPX band. */
108 bin = s->spx_src_start_freq;
109 for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
110 int bandsize = s->spx_band_sizes[bnd];
112 for (i = 0; i < bandsize; i++) {
113 float coeff = s->transform_coeffs[ch][bin++];
114 accum += coeff * coeff;
116 rms_energy[bnd] = sqrtf(accum / bandsize);
119 /* Apply a notch filter at transitions between normal and extension
120 bands and at all wrap points. */
121 if (s->spx_atten_code[ch] >= 0) {
122 const float *atten_tab = ff_eac3_spx_atten_tab[s->spx_atten_code[ch]];
123 bin = s->spx_src_start_freq - 2;
124 for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
126 INTFLOAT *coeffs = &s->transform_coeffs[ch][bin];
127 coeffs[0] *= atten_tab[0];
128 coeffs[1] *= atten_tab[1];
129 coeffs[2] *= atten_tab[2];
130 coeffs[3] *= atten_tab[1];
131 coeffs[4] *= atten_tab[0];
133 bin += s->spx_band_sizes[bnd];
137 /* Apply noise-blended coefficient scaling based on previously
138 calculated RMS energy, blending factors, and SPX coordinates for
140 bin = s->spx_src_start_freq;
141 for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
142 float nscale = s->spx_noise_blend[ch][bnd] * rms_energy[bnd] * (1.0f / INT32_MIN);
143 float sscale = s->spx_signal_blend[ch][bnd];
145 // spx_noise_blend and spx_signal_blend are both FP.23
146 nscale *= 1.0 / (1<<23);
147 sscale *= 1.0 / (1<<23);
149 for (i = 0; i < s->spx_band_sizes[bnd]; i++) {
150 float noise = nscale * (int32_t)av_lfg_get(&s->dith_state);
151 s->transform_coeffs[ch][bin] *= sscale;
152 s->transform_coeffs[ch][bin++] += noise;
159 /** lrint(M_SQRT2*cos(2*M_PI/12)*(1<<23)) */
160 #define COEFF_0 10273905LL
162 /** lrint(M_SQRT2*cos(0*M_PI/12)*(1<<23)) = lrint(M_SQRT2*(1<<23)) */
163 #define COEFF_1 11863283LL
165 /** lrint(M_SQRT2*cos(5*M_PI/12)*(1<<23)) */
166 #define COEFF_2 3070444LL
169 * Calculate 6-point IDCT of the pre-mantissas.
170 * All calculations are 24-bit fixed-point.
172 static void idct6(int pre_mant[6])
175 int even0, even1, even2, odd0, odd1, odd2;
177 odd1 = pre_mant[1] - pre_mant[3] - pre_mant[5];
179 even2 = ( pre_mant[2] * COEFF_0) >> 23;
180 tmp = ( pre_mant[4] * COEFF_1) >> 23;
181 odd0 = ((pre_mant[1] + pre_mant[5]) * COEFF_2) >> 23;
183 even0 = pre_mant[0] + (tmp >> 1);
184 even1 = pre_mant[0] - tmp;
191 odd0 = tmp + pre_mant[1] + pre_mant[3];
192 odd2 = tmp + pre_mant[5] - pre_mant[3];
194 pre_mant[0] = even0 + odd0;
195 pre_mant[1] = even1 + odd1;
196 pre_mant[2] = even2 + odd2;
197 pre_mant[3] = even2 - odd2;
198 pre_mant[4] = even1 - odd1;
199 pre_mant[5] = even0 - odd0;
202 static void ff_eac3_decode_transform_coeffs_aht_ch(AC3DecodeContext *s, int ch)
205 int end_bap, gaq_mode;
206 GetBitContext *gbc = &s->gbc;
207 int gaq_gain[AC3_MAX_COEFS];
209 gaq_mode = get_bits(gbc, 2);
210 end_bap = (gaq_mode < 2) ? 12 : 17;
212 /* if GAQ gain is used, decode gain codes for bins with hebap between
215 if (gaq_mode == EAC3_GAQ_12 || gaq_mode == EAC3_GAQ_14) {
216 /* read 1-bit GAQ gain codes */
217 for (bin = s->start_freq[ch]; bin < s->end_freq[ch]; bin++) {
218 if (s->bap[ch][bin] > 7 && s->bap[ch][bin] < end_bap)
219 gaq_gain[gs++] = get_bits1(gbc) << (gaq_mode-1);
221 } else if (gaq_mode == EAC3_GAQ_124) {
222 /* read 1.67-bit GAQ gain codes (3 codes in 5 bits) */
224 for (bin = s->start_freq[ch]; bin < s->end_freq[ch]; bin++) {
225 if (s->bap[ch][bin] > 7 && s->bap[ch][bin] < 17) {
227 int group_code = get_bits(gbc, 5);
228 if (group_code > 26) {
229 av_log(s->avctx, AV_LOG_WARNING, "GAQ gain group code out-of-range\n");
232 gaq_gain[gs++] = ff_ac3_ungroup_3_in_5_bits_tab[group_code][0];
233 gaq_gain[gs++] = ff_ac3_ungroup_3_in_5_bits_tab[group_code][1];
234 gaq_gain[gs++] = ff_ac3_ungroup_3_in_5_bits_tab[group_code][2];
242 for (bin = s->start_freq[ch]; bin < s->end_freq[ch]; bin++) {
243 int hebap = s->bap[ch][bin];
244 int bits = ff_eac3_bits_vs_hebap[hebap];
246 /* zero-mantissa dithering */
247 for (blk = 0; blk < 6; blk++) {
248 s->pre_mantissa[ch][bin][blk] = (av_lfg_get(&s->dith_state) & 0x7FFFFF) - 0x400000;
250 } else if (hebap < 8) {
251 /* Vector Quantization */
252 int v = get_bits(gbc, bits);
253 for (blk = 0; blk < 6; blk++) {
254 s->pre_mantissa[ch][bin][blk] = ff_eac3_mantissa_vq[hebap][v][blk] * (1 << 8);
257 /* Gain Adaptive Quantization */
259 if (gaq_mode != EAC3_GAQ_NO && hebap < end_bap) {
260 log_gain = gaq_gain[gs++];
264 gbits = bits - log_gain;
266 for (blk = 0; blk < 6; blk++) {
267 int mant = get_sbits(gbc, gbits);
268 if (log_gain && mant == -(1 << (gbits-1))) {
271 int mbits = bits - (2 - log_gain);
272 mant = get_sbits(gbc, mbits);
273 mant = ((unsigned)mant) << (23 - (mbits - 1));
274 /* remap mantissa value to correct for asymmetric quantization */
276 b = 1 << (23 - log_gain);
278 b = ff_eac3_gaq_remap_2_4_b[hebap-8][log_gain-1] * (1 << 8);
279 mant += ((ff_eac3_gaq_remap_2_4_a[hebap-8][log_gain-1] * (int64_t)mant) >> 15) + b;
281 /* small mantissa, no GAQ, or Gk=1 */
282 mant *= (1 << 24 - bits);
284 /* remap mantissa value for no GAQ or Gk=1 */
285 mant += (ff_eac3_gaq_remap_1[hebap-8] * (int64_t)mant) >> 15;
288 s->pre_mantissa[ch][bin][blk] = mant;
291 idct6(s->pre_mantissa[ch][bin]);
295 static int ff_eac3_parse_header(AC3DecodeContext *s)
298 int ac3_exponent_strategy, parse_aht_info, parse_spx_atten_data;
299 int parse_transient_proc_info;
301 GetBitContext *gbc = &s->gbc;
303 /* An E-AC-3 stream can have multiple independent streams which the
304 application can select from. each independent stream can also contain
305 dependent streams which are used to add or replace channels. */
306 if (s->frame_type == EAC3_FRAME_TYPE_RESERVED) {
307 av_log(s->avctx, AV_LOG_ERROR, "Reserved frame type\n");
308 return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
311 /* The substream id indicates which substream this frame belongs to. each
312 independent stream has its own substream id, and the dependent streams
313 associated to an independent stream have matching substream id's. */
314 if (s->substreamid) {
315 /* only decode substream with id=0. skip any additional substreams. */
316 if (!s->eac3_subsbtreamid_found) {
317 s->eac3_subsbtreamid_found = 1;
318 avpriv_request_sample(s->avctx, "Additional substreams");
320 return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
323 if (s->bit_alloc_params.sr_code == EAC3_SR_CODE_REDUCED) {
324 /* The E-AC-3 specification does not tell how to handle reduced sample
325 rates in bit allocation. The best assumption would be that it is
326 handled like AC-3 DolbyNet, but we cannot be sure until we have a
327 sample which utilizes this feature. */
328 avpriv_request_sample(s->avctx, "Reduced sampling rate");
329 return AVERROR_PATCHWELCOME;
331 skip_bits(gbc, 5); // skip bitstream id
333 /* volume control params */
334 for (i = 0; i < (s->channel_mode ? 1 : 2); i++) {
335 s->dialog_normalization[i] = -get_bits(gbc, 5);
336 if (s->dialog_normalization[i] == 0) {
337 s->dialog_normalization[i] = -31;
339 if (s->target_level != 0) {
340 s->level_gain[i] = powf(2.0f,
341 (float)(s->target_level - s->dialog_normalization[i])/6.0f);
343 s->compression_exists[i] = get_bits1(gbc);
344 if (s->compression_exists[i]) {
345 s->heavy_dynamic_range[i] = AC3_HEAVY_RANGE(get_bits(gbc, 8));
349 /* dependent stream channel map */
350 if (s->frame_type == EAC3_FRAME_TYPE_DEPENDENT) {
351 if (get_bits1(gbc)) {
352 int64_t channel_layout = 0;
353 int channel_map = get_bits(gbc, 16);
354 av_log(s->avctx, AV_LOG_DEBUG, "channel_map: %0X\n", channel_map);
356 for (i = 0; i < 16; i++)
357 if (channel_map & (1 << (EAC3_MAX_CHANNELS - i - 1)))
358 channel_layout |= ff_eac3_custom_channel_map_locations[i][1];
360 if (av_popcount64(channel_layout) > EAC3_MAX_CHANNELS) {
361 return AVERROR_INVALIDDATA;
363 s->channel_map = channel_map;
367 /* mixing metadata */
368 if (get_bits1(gbc)) {
369 /* center and surround mix levels */
370 if (s->channel_mode > AC3_CHMODE_STEREO) {
371 s->preferred_downmix = get_bits(gbc, 2);
372 if (s->channel_mode & 1) {
373 /* if three front channels exist */
374 s->center_mix_level_ltrt = get_bits(gbc, 3);
375 s->center_mix_level = get_bits(gbc, 3);
377 if (s->channel_mode & 4) {
378 /* if a surround channel exists */
379 s->surround_mix_level_ltrt = av_clip(get_bits(gbc, 3), 3, 7);
380 s->surround_mix_level = av_clip(get_bits(gbc, 3), 3, 7);
385 if (s->lfe_on && (s->lfe_mix_level_exists = get_bits1(gbc))) {
386 s->lfe_mix_level = get_bits(gbc, 5);
389 /* info for mixing with other streams and substreams */
390 if (s->frame_type == EAC3_FRAME_TYPE_INDEPENDENT) {
391 for (i = 0; i < (s->channel_mode ? 1 : 2); i++) {
392 // TODO: apply program scale factor
393 if (get_bits1(gbc)) {
394 skip_bits(gbc, 6); // skip program scale factor
397 if (get_bits1(gbc)) {
398 skip_bits(gbc, 6); // skip external program scale factor
400 /* skip mixing parameter data */
401 switch(get_bits(gbc, 2)) {
402 case 1: skip_bits(gbc, 5); break;
403 case 2: skip_bits(gbc, 12); break;
405 int mix_data_size = (get_bits(gbc, 5) + 2) << 3;
406 skip_bits_long(gbc, mix_data_size);
410 /* skip pan information for mono or dual mono source */
411 if (s->channel_mode < AC3_CHMODE_STEREO) {
412 for (i = 0; i < (s->channel_mode ? 1 : 2); i++) {
413 if (get_bits1(gbc)) {
414 /* note: this is not in the ATSC A/52B specification
415 reference: ETSI TS 102 366 V1.1.1
416 section: E.1.3.1.25 */
417 skip_bits(gbc, 8); // skip pan mean direction index
418 skip_bits(gbc, 6); // skip reserved paninfo bits
422 /* skip mixing configuration information */
423 if (get_bits1(gbc)) {
424 for (blk = 0; blk < s->num_blocks; blk++) {
425 if (s->num_blocks == 1 || get_bits1(gbc)) {
433 /* informational metadata */
434 if (get_bits1(gbc)) {
435 s->bitstream_mode = get_bits(gbc, 3);
436 skip_bits(gbc, 2); // skip copyright bit and original bitstream bit
437 if (s->channel_mode == AC3_CHMODE_STEREO) {
438 s->dolby_surround_mode = get_bits(gbc, 2);
439 s->dolby_headphone_mode = get_bits(gbc, 2);
441 if (s->channel_mode >= AC3_CHMODE_2F2R) {
442 s->dolby_surround_ex_mode = get_bits(gbc, 2);
444 for (i = 0; i < (s->channel_mode ? 1 : 2); i++) {
445 if (get_bits1(gbc)) {
446 skip_bits(gbc, 8); // skip mix level, room type, and A/D converter type
449 if (s->bit_alloc_params.sr_code != EAC3_SR_CODE_REDUCED) {
450 skip_bits1(gbc); // skip source sample rate code
454 /* converter synchronization flag
455 If frames are less than six blocks, this bit should be turned on
456 once every 6 blocks to indicate the start of a frame set.
457 reference: RFC 4598, Section 2.1.3 Frame Sets */
458 if (s->frame_type == EAC3_FRAME_TYPE_INDEPENDENT && s->num_blocks != 6) {
459 skip_bits1(gbc); // skip converter synchronization flag
462 /* original frame size code if this stream was converted from AC-3 */
463 if (s->frame_type == EAC3_FRAME_TYPE_AC3_CONVERT &&
464 (s->num_blocks == 6 || get_bits1(gbc))) {
465 skip_bits(gbc, 6); // skip frame size code
468 /* additional bitstream info */
469 if (get_bits1(gbc)) {
470 int addbsil = get_bits(gbc, 6);
471 for (i = 0; i < addbsil + 1; i++) {
472 skip_bits(gbc, 8); // skip additional bit stream info
476 /* audio frame syntax flags, strategy data, and per-frame data */
478 if (s->num_blocks == 6) {
479 ac3_exponent_strategy = get_bits1(gbc);
480 parse_aht_info = get_bits1(gbc);
482 /* less than 6 blocks, so use AC-3-style exponent strategy syntax, and
484 ac3_exponent_strategy = 1;
488 s->snr_offset_strategy = get_bits(gbc, 2);
489 parse_transient_proc_info = get_bits1(gbc);
491 s->block_switch_syntax = get_bits1(gbc);
492 if (!s->block_switch_syntax)
493 memset(s->block_switch, 0, sizeof(s->block_switch));
495 s->dither_flag_syntax = get_bits1(gbc);
496 if (!s->dither_flag_syntax) {
497 for (ch = 1; ch <= s->fbw_channels; ch++)
498 s->dither_flag[ch] = 1;
500 s->dither_flag[CPL_CH] = s->dither_flag[s->lfe_ch] = 0;
502 s->bit_allocation_syntax = get_bits1(gbc);
503 if (!s->bit_allocation_syntax) {
504 /* set default bit allocation parameters */
505 s->bit_alloc_params.slow_decay = ff_ac3_slow_decay_tab[2];
506 s->bit_alloc_params.fast_decay = ff_ac3_fast_decay_tab[1];
507 s->bit_alloc_params.slow_gain = ff_ac3_slow_gain_tab [1];
508 s->bit_alloc_params.db_per_bit = ff_ac3_db_per_bit_tab[2];
509 s->bit_alloc_params.floor = ff_ac3_floor_tab [7];
512 s->fast_gain_syntax = get_bits1(gbc);
513 s->dba_syntax = get_bits1(gbc);
514 s->skip_syntax = get_bits1(gbc);
515 parse_spx_atten_data = get_bits1(gbc);
517 /* coupling strategy occurrence and coupling use per block */
519 if (s->channel_mode > 1) {
520 for (blk = 0; blk < s->num_blocks; blk++) {
521 s->cpl_strategy_exists[blk] = (!blk || get_bits1(gbc));
522 if (s->cpl_strategy_exists[blk]) {
523 s->cpl_in_use[blk] = get_bits1(gbc);
525 s->cpl_in_use[blk] = s->cpl_in_use[blk-1];
527 num_cpl_blocks += s->cpl_in_use[blk];
530 memset(s->cpl_in_use, 0, sizeof(s->cpl_in_use));
533 /* exponent strategy data */
534 if (ac3_exponent_strategy) {
535 /* AC-3-style exponent strategy syntax */
536 for (blk = 0; blk < s->num_blocks; blk++) {
537 for (ch = !s->cpl_in_use[blk]; ch <= s->fbw_channels; ch++) {
538 s->exp_strategy[blk][ch] = get_bits(gbc, 2);
542 /* LUT-based exponent strategy syntax */
543 for (ch = !((s->channel_mode > 1) && num_cpl_blocks); ch <= s->fbw_channels; ch++) {
544 int frmchexpstr = get_bits(gbc, 5);
545 for (blk = 0; blk < 6; blk++) {
546 s->exp_strategy[blk][ch] = ff_eac3_frm_expstr[frmchexpstr][blk];
550 /* LFE exponent strategy */
552 for (blk = 0; blk < s->num_blocks; blk++) {
553 s->exp_strategy[blk][s->lfe_ch] = get_bits1(gbc);
556 /* original exponent strategies if this stream was converted from AC-3 */
557 if (s->frame_type == EAC3_FRAME_TYPE_INDEPENDENT &&
558 (s->num_blocks == 6 || get_bits1(gbc))) {
559 skip_bits(gbc, 5 * s->fbw_channels); // skip converter channel exponent strategy
562 /* determine which channels use AHT */
563 if (parse_aht_info) {
564 /* For AHT to be used, all non-zero blocks must reuse exponents from
565 the first block. Furthermore, for AHT to be used in the coupling
566 channel, all blocks must use coupling and use the same coupling
568 s->channel_uses_aht[CPL_CH]=0;
569 for (ch = (num_cpl_blocks != 6); ch <= s->channels; ch++) {
571 for (blk = 1; blk < 6; blk++) {
572 if ((s->exp_strategy[blk][ch] != EXP_REUSE) ||
573 (!ch && s->cpl_strategy_exists[blk])) {
578 s->channel_uses_aht[ch] = use_aht && get_bits1(gbc);
581 memset(s->channel_uses_aht, 0, sizeof(s->channel_uses_aht));
584 /* per-frame SNR offset */
585 if (!s->snr_offset_strategy) {
586 int csnroffst = (get_bits(gbc, 6) - 15) << 4;
587 int snroffst = (csnroffst + get_bits(gbc, 4)) << 2;
588 for (ch = 0; ch <= s->channels; ch++)
589 s->snr_offset[ch] = snroffst;
592 /* transient pre-noise processing data */
593 if (parse_transient_proc_info) {
594 for (ch = 1; ch <= s->fbw_channels; ch++) {
595 if (get_bits1(gbc)) { // channel in transient processing
596 skip_bits(gbc, 10); // skip transient processing location
597 skip_bits(gbc, 8); // skip transient processing length
602 /* spectral extension attenuation data */
603 for (ch = 1; ch <= s->fbw_channels; ch++) {
604 if (parse_spx_atten_data && get_bits1(gbc)) {
605 s->spx_atten_code[ch] = get_bits(gbc, 5);
607 s->spx_atten_code[ch] = -1;
611 /* block start information */
612 if (s->num_blocks > 1 && get_bits1(gbc)) {
613 /* reference: Section E2.3.2.27
614 nblkstrtbits = (numblks - 1) * (4 + ceiling(log2(words_per_frame)))
615 The spec does not say what this data is or what it's used for.
616 It is likely the offset of each block within the frame. */
617 int block_start_bits = (s->num_blocks-1) * (4 + av_log2(s->frame_size-2));
618 skip_bits_long(gbc, block_start_bits);
619 avpriv_request_sample(s->avctx, "Block start info");
622 /* syntax state initialization */
623 for (ch = 1; ch <= s->fbw_channels; ch++) {
624 s->first_spx_coords[ch] = 1;
625 s->first_cpl_coords[ch] = 1;
627 s->first_cpl_leak = 1;