2 * G.722 ADPCM audio encoder/decoder
4 * Copyright (c) CMU 1993 Computer Science, Speech Group
5 * Chengxiang Lu and Alex Hauptmann
6 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
7 * Copyright (c) 2009 Kenan Gillet
8 * Copyright (c) 2010 Martin Storsjo
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 * G.722 ADPCM audio codec
31 * This G.722 decoder is a bit-exact implementation of the ITU G.722
32 * specification for all three specified bitrates - 64000bps, 56000bps
33 * and 48000bps. It passes the ITU tests.
35 * @note For the 56000bps and 48000bps bitrates, the lowest 1 or 2 bits
36 * respectively of each byte are ignored.
43 #define PREV_SAMPLES_BUF_SIZE 1024
45 #define FREEZE_INTERVAL 128
48 int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]; ///< memory of past decoded samples
49 int prev_samples_pos; ///< the number of values in prev_samples
52 * The band[0] and band[1] correspond respectively to the lower band and higher band.
55 int16_t s_predictor; ///< predictor output value
56 int32_t s_zero; ///< previous output signal from zero predictor
57 int8_t part_reconst_mem[2]; ///< signs of previous partially reconstructed signals
58 int16_t prev_qtzd_reconst; ///< previous quantized reconstructed signal (internal value, using low_inv_quant4)
59 int16_t pole_mem[2]; ///< second-order pole section coefficient buffer
60 int32_t diff_mem[6]; ///< quantizer difference signal memory
61 int16_t zero_mem[6]; ///< Seventh-order zero section coefficient buffer
62 int16_t log_factor; ///< delayed 2-logarithmic quantizer factor
63 int16_t scale_factor; ///< delayed quantizer scale factor
67 struct G722Band state;
70 } *node_buf[2], **nodep_buf[2];
79 static const int8_t sign_lookup[2] = { -1, 1 };
81 static const int16_t inv_log2_table[32] = {
82 2048, 2093, 2139, 2186, 2233, 2282, 2332, 2383,
83 2435, 2489, 2543, 2599, 2656, 2714, 2774, 2834,
84 2896, 2960, 3025, 3091, 3158, 3228, 3298, 3371,
85 3444, 3520, 3597, 3676, 3756, 3838, 3922, 4008
87 static const int16_t high_log_factor_step[2] = { 798, -214 };
88 static const int16_t high_inv_quant[4] = { -926, -202, 926, 202 };
90 * low_log_factor_step[index] == wl[rl42[index]]
92 static const int16_t low_log_factor_step[16] = {
93 -60, 3042, 1198, 538, 334, 172, 58, -30,
94 3042, 1198, 538, 334, 172, 58, -30, -60
96 static const int16_t low_inv_quant4[16] = {
97 0, -2557, -1612, -1121, -786, -530, -323, -150,
98 2557, 1612, 1121, 786, 530, 323, 150, 0
100 static const int16_t low_inv_quant6[64] = {
101 -17, -17, -17, -17, -3101, -2738, -2376, -2088,
102 -1873, -1689, -1535, -1399, -1279, -1170, -1072, -982,
103 -899, -822, -750, -682, -618, -558, -501, -447,
104 -396, -347, -300, -254, -211, -170, -130, -91,
105 3101, 2738, 2376, 2088, 1873, 1689, 1535, 1399,
106 1279, 1170, 1072, 982, 899, 822, 750, 682,
107 618, 558, 501, 447, 396, 347, 300, 254,
108 211, 170, 130, 91, 54, 17, -54, -17
112 * quadrature mirror filter (QMF) coefficients
114 * ITU-T G.722 Table 11
116 static const int16_t qmf_coeffs[12] = {
117 3, -11, 12, 32, -210, 951, 3876, -805, 362, -156, 53, -11,
124 * @param cur_diff the dequantized and scaled delta calculated from the
127 static void do_adaptive_prediction(struct G722Band *band, const int cur_diff)
129 int sg[2], limit, i, cur_qtzd_reconst;
131 const int cur_part_reconst = band->s_zero + cur_diff < 0;
133 sg[0] = sign_lookup[cur_part_reconst != band->part_reconst_mem[0]];
134 sg[1] = sign_lookup[cur_part_reconst == band->part_reconst_mem[1]];
135 band->part_reconst_mem[1] = band->part_reconst_mem[0];
136 band->part_reconst_mem[0] = cur_part_reconst;
138 band->pole_mem[1] = av_clip((sg[0] * av_clip(band->pole_mem[0], -8191, 8191) >> 5) +
139 (sg[1] << 7) + (band->pole_mem[1] * 127 >> 7), -12288, 12288);
141 limit = 15360 - band->pole_mem[1];
142 band->pole_mem[0] = av_clip(-192 * sg[0] + (band->pole_mem[0] * 255 >> 8), -limit, limit);
146 for (i = 0; i < 6; i++)
147 band->zero_mem[i] = ((band->zero_mem[i]*255) >> 8) +
148 ((band->diff_mem[i]^cur_diff) < 0 ? -128 : 128);
150 for (i = 0; i < 6; i++)
151 band->zero_mem[i] = (band->zero_mem[i]*255) >> 8;
153 for (i = 5; i > 0; i--)
154 band->diff_mem[i] = band->diff_mem[i-1];
155 band->diff_mem[0] = av_clip_int16(cur_diff << 1);
158 for (i = 5; i >= 0; i--)
159 band->s_zero += (band->zero_mem[i]*band->diff_mem[i]) >> 15;
162 cur_qtzd_reconst = av_clip_int16((band->s_predictor + cur_diff) << 1);
163 band->s_predictor = av_clip_int16(band->s_zero +
164 (band->pole_mem[0] * cur_qtzd_reconst >> 15) +
165 (band->pole_mem[1] * band->prev_qtzd_reconst >> 15));
166 band->prev_qtzd_reconst = cur_qtzd_reconst;
169 static int inline linear_scale_factor(const int log_factor)
171 const int wd1 = inv_log2_table[(log_factor >> 6) & 31];
172 const int shift = log_factor >> 11;
173 return shift < 0 ? wd1 >> -shift : wd1 << shift;
176 static void update_low_predictor(struct G722Band *band, const int ilow)
178 do_adaptive_prediction(band,
179 band->scale_factor * low_inv_quant4[ilow] >> 10);
181 // quantizer adaptation
182 band->log_factor = av_clip((band->log_factor * 127 >> 7) +
183 low_log_factor_step[ilow], 0, 18432);
184 band->scale_factor = linear_scale_factor(band->log_factor - (8 << 11));
187 static void update_high_predictor(struct G722Band *band, const int dhigh,
190 do_adaptive_prediction(band, dhigh);
192 // quantizer adaptation
193 band->log_factor = av_clip((band->log_factor * 127 >> 7) +
194 high_log_factor_step[ihigh&1], 0, 22528);
195 band->scale_factor = linear_scale_factor(band->log_factor - (10 << 11));
198 static void apply_qmf(const int16_t *prev_samples, int *xout1, int *xout2)
204 for (i = 0; i < 12; i++) {
205 MAC16(*xout2, prev_samples[2*i ], qmf_coeffs[i ]);
206 MAC16(*xout1, prev_samples[2*i+1], qmf_coeffs[11-i]);
210 static av_cold int g722_init(AVCodecContext * avctx)
212 G722Context *c = avctx->priv_data;
214 if (avctx->channels != 1) {
215 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
216 return AVERROR_INVALIDDATA;
218 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
220 switch (avctx->bits_per_coded_sample) {
226 av_log(avctx, AV_LOG_WARNING, "Unsupported bits_per_coded_sample [%d], "
228 avctx->bits_per_coded_sample);
230 avctx->bits_per_coded_sample = 8;
234 c->band[0].scale_factor = 8;
235 c->band[1].scale_factor = 2;
236 c->prev_samples_pos = 22;
239 avctx->sample_rate /= 2;
241 if (avctx->trellis) {
242 int frontier = 1 << avctx->trellis;
243 int max_paths = frontier * FREEZE_INTERVAL;
245 for (i = 0; i < 2; i++) {
246 c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
247 c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
248 c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
255 static av_cold int g722_close(AVCodecContext *avctx)
257 G722Context *c = avctx->priv_data;
259 for (i = 0; i < 2; i++) {
260 av_freep(&c->paths[i]);
261 av_freep(&c->node_buf[i]);
262 av_freep(&c->nodep_buf[i]);
267 #if CONFIG_ADPCM_G722_DECODER
268 static const int16_t low_inv_quant5[32] = {
269 -35, -35, -2919, -2195, -1765, -1458, -1219, -1023,
270 -858, -714, -587, -473, -370, -276, -190, -110,
271 2919, 2195, 1765, 1458, 1219, 1023, 858, 714,
272 587, 473, 370, 276, 190, 110, 35, -35
275 static const int16_t *low_inv_quants[3] = { low_inv_quant6, low_inv_quant5,
278 static int g722_decode_frame(AVCodecContext *avctx, void *data,
279 int *data_size, AVPacket *avpkt)
281 G722Context *c = avctx->priv_data;
282 int16_t *out_buf = data;
284 const int skip = 8 - avctx->bits_per_coded_sample;
285 const int16_t *quantizer_table = low_inv_quants[skip];
288 init_get_bits(&gb, avpkt->data, avpkt->size * 8);
290 for (j = 0; j < avpkt->size; j++) {
291 int ilow, ihigh, rlow;
293 ihigh = get_bits(&gb, 2);
294 ilow = get_bits(&gb, 6 - skip);
295 skip_bits(&gb, skip);
297 rlow = av_clip((c->band[0].scale_factor * quantizer_table[ilow] >> 10)
298 + c->band[0].s_predictor, -16384, 16383);
300 update_low_predictor(&c->band[0], ilow >> (2 - skip));
302 if (!avctx->lowres) {
303 const int dhigh = c->band[1].scale_factor *
304 high_inv_quant[ihigh] >> 10;
305 const int rhigh = av_clip(dhigh + c->band[1].s_predictor,
309 update_high_predictor(&c->band[1], dhigh, ihigh);
311 c->prev_samples[c->prev_samples_pos++] = rlow + rhigh;
312 c->prev_samples[c->prev_samples_pos++] = rlow - rhigh;
313 apply_qmf(c->prev_samples + c->prev_samples_pos - 24,
315 out_buf[out_len++] = av_clip_int16(xout1 >> 12);
316 out_buf[out_len++] = av_clip_int16(xout2 >> 12);
317 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
318 memmove(c->prev_samples,
319 c->prev_samples + c->prev_samples_pos - 22,
320 22 * sizeof(c->prev_samples[0]));
321 c->prev_samples_pos = 22;
324 out_buf[out_len++] = rlow;
326 *data_size = out_len << 1;
330 AVCodec ff_adpcm_g722_decoder = {
332 .type = AVMEDIA_TYPE_AUDIO,
333 .id = CODEC_ID_ADPCM_G722,
334 .priv_data_size = sizeof(G722Context),
336 .decode = g722_decode_frame,
337 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
342 #if CONFIG_ADPCM_G722_ENCODER
343 static const int16_t low_quant[33] = {
344 35, 72, 110, 150, 190, 233, 276, 323,
345 370, 422, 473, 530, 587, 650, 714, 786,
346 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
347 1765, 1980, 2195, 2557, 2919
350 static inline void filter_samples(G722Context *c, const int16_t *samples,
351 int *xlow, int *xhigh)
354 c->prev_samples[c->prev_samples_pos++] = samples[0];
355 c->prev_samples[c->prev_samples_pos++] = samples[1];
356 apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
357 *xlow = xout1 + xout2 >> 13;
358 *xhigh = xout1 - xout2 >> 13;
359 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
360 memmove(c->prev_samples,
361 c->prev_samples + c->prev_samples_pos - 22,
362 22 * sizeof(c->prev_samples[0]));
363 c->prev_samples_pos = 22;
367 static inline int encode_high(const struct G722Band *state, int xhigh)
369 int diff = av_clip_int16(xhigh - state->s_predictor);
370 int pred = 141 * state->scale_factor >> 8;
371 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
372 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
375 static inline int encode_low(const struct G722Band* state, int xlow)
377 int diff = av_clip_int16(xlow - state->s_predictor);
378 /* = diff >= 0 ? diff : -(diff + 1) */
379 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
381 limit = limit + 1 << 10;
382 if (limit > low_quant[8] * state->scale_factor)
384 while (i < 29 && limit > low_quant[i] * state->scale_factor)
386 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
389 static int g722_encode_trellis(AVCodecContext *avctx,
390 uint8_t *dst, int buf_size, void *data)
392 G722Context *c = avctx->priv_data;
393 const int16_t *samples = data;
395 int frontier = 1 << avctx->trellis;
396 struct TrellisNode **nodes[2];
397 struct TrellisNode **nodes_next[2];
398 int pathn[2] = {0, 0}, froze = -1;
399 struct TrellisPath *p[2];
401 for (i = 0; i < 2; i++) {
402 nodes[i] = c->nodep_buf[i];
403 nodes_next[i] = c->nodep_buf[i] + frontier;
404 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
405 nodes[i][0] = c->node_buf[i] + frontier;
406 nodes[i][0]->ssd = 0;
407 nodes[i][0]->path = 0;
408 nodes[i][0]->state = c->band[i];
411 for (i = 0; i < buf_size >> 1; i++) {
413 struct TrellisNode *next[2];
414 int heap_pos[2] = {0, 0};
416 for (j = 0; j < 2; j++) {
417 next[j] = c->node_buf[j] + frontier*(i & 1);
418 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
421 filter_samples(c, &samples[2*i], &xlow, &xhigh);
423 for (j = 0; j < frontier && nodes[0][j]; j++) {
424 /* Only k >> 2 affects the future adaptive state, therefore testing
425 * small steps that don't change k >> 2 is useless, the orignal
426 * value from encode_low is better than them. Since we step k
427 * in steps of 4, make sure range is a multiple of 4, so that
428 * we don't miss the original value from encode_low. */
429 int range = j < frontier/2 ? 4 : 0;
430 struct TrellisNode *cur_node = nodes[0][j];
432 int ilow = encode_low(&cur_node->state, xlow);
434 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
435 int decoded, dec_diff, pos;
437 struct TrellisNode* node;
442 decoded = av_clip((cur_node->state.scale_factor *
443 low_inv_quant6[k] >> 10)
444 + cur_node->state.s_predictor, -16384, 16383);
445 dec_diff = xlow - decoded;
447 #define STORE_NODE(index, UPDATE, VALUE)\
448 ssd = cur_node->ssd + dec_diff*dec_diff;\
449 /* Check for wraparound. Using 64 bit ssd counters would \
450 * be simpler, but is slower on x86 32 bit. */\
451 if (ssd < cur_node->ssd)\
453 if (heap_pos[index] < frontier) {\
454 pos = heap_pos[index]++;\
455 assert(pathn[index] < FREEZE_INTERVAL * frontier);\
456 node = nodes_next[index][pos] = next[index]++;\
457 node->path = pathn[index]++;\
459 /* Try to replace one of the leaf nodes with the new \
460 * one, but not always testing the same leaf position */\
461 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
462 if (ssd >= nodes_next[index][pos]->ssd)\
465 node = nodes_next[index][pos];\
468 node->state = cur_node->state;\
470 c->paths[index][node->path].value = VALUE;\
471 c->paths[index][node->path].prev = cur_node->path;\
472 /* Sift the newly inserted node up in the heap to restore \
473 * the heap property */\
475 int parent = (pos - 1) >> 1;\
476 if (nodes_next[index][parent]->ssd <= ssd)\
478 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
479 nodes_next[index][pos]);\
482 STORE_NODE(0, update_low_predictor(&node->state, k >> 2), k);
486 for (j = 0; j < frontier && nodes[1][j]; j++) {
488 struct TrellisNode *cur_node = nodes[1][j];
490 /* We don't try to get any initial guess for ihigh via
491 * encode_high - since there's only 4 possible values, test
492 * them all. Testing all of these gives a much, much larger
493 * gain than testing a larger range around ilow. */
494 for (ihigh = 0; ihigh < 4; ihigh++) {
495 int dhigh, decoded, dec_diff, pos;
497 struct TrellisNode* node;
499 dhigh = cur_node->state.scale_factor *
500 high_inv_quant[ihigh] >> 10;
501 decoded = av_clip(dhigh + cur_node->state.s_predictor,
503 dec_diff = xhigh - decoded;
505 STORE_NODE(1, update_high_predictor(&node->state, dhigh, ihigh), ihigh);
509 for (j = 0; j < 2; j++) {
510 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
512 if (nodes[j][0]->ssd > (1 << 16)) {
513 for (k = 1; k < frontier && nodes[j][k]; k++)
514 nodes[j][k]->ssd -= nodes[j][0]->ssd;
515 nodes[j][0]->ssd = 0;
519 if (i == froze + FREEZE_INTERVAL) {
520 p[0] = &c->paths[0][nodes[0][0]->path];
521 p[1] = &c->paths[1][nodes[1][0]->path];
522 for (j = i; j > froze; j--) {
523 dst[j] = p[1]->value << 6 | p[0]->value;
524 p[0] = &c->paths[0][p[0]->prev];
525 p[1] = &c->paths[1][p[1]->prev];
528 pathn[0] = pathn[1] = 0;
529 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
530 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
534 p[0] = &c->paths[0][nodes[0][0]->path];
535 p[1] = &c->paths[1][nodes[1][0]->path];
536 for (j = i; j > froze; j--) {
537 dst[j] = p[1]->value << 6 | p[0]->value;
538 p[0] = &c->paths[0][p[0]->prev];
539 p[1] = &c->paths[1][p[1]->prev];
541 c->band[0] = nodes[0][0]->state;
542 c->band[1] = nodes[1][0]->state;
547 static int g722_encode_frame(AVCodecContext *avctx,
548 uint8_t *dst, int buf_size, void *data)
550 G722Context *c = avctx->priv_data;
551 const int16_t *samples = data;
555 return g722_encode_trellis(avctx, dst, buf_size, data);
557 for (i = 0; i < buf_size >> 1; i++) {
558 int xlow, xhigh, ihigh, ilow;
559 filter_samples(c, &samples[2*i], &xlow, &xhigh);
560 ihigh = encode_high(&c->band[1], xhigh);
561 ilow = encode_low(&c->band[0], xlow);
562 update_high_predictor(&c->band[1], c->band[1].scale_factor *
563 high_inv_quant[ihigh] >> 10, ihigh);
564 update_low_predictor(&c->band[0], ilow >> 2);
565 *dst++ = ihigh << 6 | ilow;
570 AVCodec ff_adpcm_g722_encoder = {
572 .type = AVMEDIA_TYPE_AUDIO,
573 .id = CODEC_ID_ADPCM_G722,
574 .priv_data_size = sizeof(G722Context),
577 .encode = g722_encode_frame,
578 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
579 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},