2 * G.722 ADPCM audio encoder/decoder
4 * Copyright (c) CMU 1993 Computer Science, Speech Group
5 * Chengxiang Lu and Alex Hauptmann
6 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
7 * Copyright (c) 2009 Kenan Gillet
8 * Copyright (c) 2010 Martin Storsjo
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 * G.722 ADPCM audio codec
32 * This G.722 decoder is a bit-exact implementation of the ITU G.722
33 * specification for all three specified bitrates - 64000bps, 56000bps
34 * and 48000bps. It passes the ITU tests.
36 * @note For the 56000bps and 48000bps bitrates, the lowest 1 or 2 bits
37 * respectively of each byte are ignored.
44 #define PREV_SAMPLES_BUF_SIZE 1024
46 #define FREEZE_INTERVAL 128
49 int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]; ///< memory of past decoded samples
50 int prev_samples_pos; ///< the number of values in prev_samples
53 * The band[0] and band[1] correspond respectively to the lower band and higher band.
56 int16_t s_predictor; ///< predictor output value
57 int32_t s_zero; ///< previous output signal from zero predictor
58 int8_t part_reconst_mem[2]; ///< signs of previous partially reconstructed signals
59 int16_t prev_qtzd_reconst; ///< previous quantized reconstructed signal (internal value, using low_inv_quant4)
60 int16_t pole_mem[2]; ///< second-order pole section coefficient buffer
61 int32_t diff_mem[6]; ///< quantizer difference signal memory
62 int16_t zero_mem[6]; ///< Seventh-order zero section coefficient buffer
63 int16_t log_factor; ///< delayed 2-logarithmic quantizer factor
64 int16_t scale_factor; ///< delayed quantizer scale factor
68 struct G722Band state;
71 } *node_buf[2], **nodep_buf[2];
80 static const int8_t sign_lookup[2] = { -1, 1 };
82 static const int16_t inv_log2_table[32] = {
83 2048, 2093, 2139, 2186, 2233, 2282, 2332, 2383,
84 2435, 2489, 2543, 2599, 2656, 2714, 2774, 2834,
85 2896, 2960, 3025, 3091, 3158, 3228, 3298, 3371,
86 3444, 3520, 3597, 3676, 3756, 3838, 3922, 4008
88 static const int16_t high_log_factor_step[2] = { 798, -214 };
89 static const int16_t high_inv_quant[4] = { -926, -202, 926, 202 };
91 * low_log_factor_step[index] == wl[rl42[index]]
93 static const int16_t low_log_factor_step[16] = {
94 -60, 3042, 1198, 538, 334, 172, 58, -30,
95 3042, 1198, 538, 334, 172, 58, -30, -60
97 static const int16_t low_inv_quant4[16] = {
98 0, -2557, -1612, -1121, -786, -530, -323, -150,
99 2557, 1612, 1121, 786, 530, 323, 150, 0
101 static const int16_t low_inv_quant6[64] = {
102 -17, -17, -17, -17, -3101, -2738, -2376, -2088,
103 -1873, -1689, -1535, -1399, -1279, -1170, -1072, -982,
104 -899, -822, -750, -682, -618, -558, -501, -447,
105 -396, -347, -300, -254, -211, -170, -130, -91,
106 3101, 2738, 2376, 2088, 1873, 1689, 1535, 1399,
107 1279, 1170, 1072, 982, 899, 822, 750, 682,
108 618, 558, 501, 447, 396, 347, 300, 254,
109 211, 170, 130, 91, 54, 17, -54, -17
113 * quadrature mirror filter (QMF) coefficients
115 * ITU-T G.722 Table 11
117 static const int16_t qmf_coeffs[12] = {
118 3, -11, 12, 32, -210, 951, 3876, -805, 362, -156, 53, -11,
125 * @param cur_diff the dequantized and scaled delta calculated from the
128 static void do_adaptive_prediction(struct G722Band *band, const int cur_diff)
130 int sg[2], limit, i, cur_qtzd_reconst;
132 const int cur_part_reconst = band->s_zero + cur_diff < 0;
134 sg[0] = sign_lookup[cur_part_reconst != band->part_reconst_mem[0]];
135 sg[1] = sign_lookup[cur_part_reconst == band->part_reconst_mem[1]];
136 band->part_reconst_mem[1] = band->part_reconst_mem[0];
137 band->part_reconst_mem[0] = cur_part_reconst;
139 band->pole_mem[1] = av_clip((sg[0] * av_clip(band->pole_mem[0], -8191, 8191) >> 5) +
140 (sg[1] << 7) + (band->pole_mem[1] * 127 >> 7), -12288, 12288);
142 limit = 15360 - band->pole_mem[1];
143 band->pole_mem[0] = av_clip(-192 * sg[0] + (band->pole_mem[0] * 255 >> 8), -limit, limit);
147 for (i = 0; i < 6; i++)
148 band->zero_mem[i] = ((band->zero_mem[i]*255) >> 8) +
149 ((band->diff_mem[i]^cur_diff) < 0 ? -128 : 128);
151 for (i = 0; i < 6; i++)
152 band->zero_mem[i] = (band->zero_mem[i]*255) >> 8;
154 for (i = 5; i > 0; i--)
155 band->diff_mem[i] = band->diff_mem[i-1];
156 band->diff_mem[0] = av_clip_int16(cur_diff << 1);
159 for (i = 5; i >= 0; i--)
160 band->s_zero += (band->zero_mem[i]*band->diff_mem[i]) >> 15;
163 cur_qtzd_reconst = av_clip_int16((band->s_predictor + cur_diff) << 1);
164 band->s_predictor = av_clip_int16(band->s_zero +
165 (band->pole_mem[0] * cur_qtzd_reconst >> 15) +
166 (band->pole_mem[1] * band->prev_qtzd_reconst >> 15));
167 band->prev_qtzd_reconst = cur_qtzd_reconst;
170 static int inline linear_scale_factor(const int log_factor)
172 const int wd1 = inv_log2_table[(log_factor >> 6) & 31];
173 const int shift = log_factor >> 11;
174 return shift < 0 ? wd1 >> -shift : wd1 << shift;
177 static void update_low_predictor(struct G722Band *band, const int ilow)
179 do_adaptive_prediction(band,
180 band->scale_factor * low_inv_quant4[ilow] >> 10);
182 // quantizer adaptation
183 band->log_factor = av_clip((band->log_factor * 127 >> 7) +
184 low_log_factor_step[ilow], 0, 18432);
185 band->scale_factor = linear_scale_factor(band->log_factor - (8 << 11));
188 static void update_high_predictor(struct G722Band *band, const int dhigh,
191 do_adaptive_prediction(band, dhigh);
193 // quantizer adaptation
194 band->log_factor = av_clip((band->log_factor * 127 >> 7) +
195 high_log_factor_step[ihigh&1], 0, 22528);
196 band->scale_factor = linear_scale_factor(band->log_factor - (10 << 11));
199 static void apply_qmf(const int16_t *prev_samples, int *xout1, int *xout2)
205 for (i = 0; i < 12; i++) {
206 MAC16(*xout2, prev_samples[2*i ], qmf_coeffs[i ]);
207 MAC16(*xout1, prev_samples[2*i+1], qmf_coeffs[11-i]);
211 static av_cold int g722_init(AVCodecContext * avctx)
213 G722Context *c = avctx->priv_data;
215 if (avctx->channels != 1) {
216 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
217 return AVERROR_INVALIDDATA;
219 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
221 switch (avctx->bits_per_coded_sample) {
227 av_log(avctx, AV_LOG_WARNING, "Unsupported bits_per_coded_sample [%d], "
229 avctx->bits_per_coded_sample);
231 avctx->bits_per_coded_sample = 8;
235 c->band[0].scale_factor = 8;
236 c->band[1].scale_factor = 2;
237 c->prev_samples_pos = 22;
240 avctx->sample_rate /= 2;
242 if (avctx->trellis) {
243 int frontier = 1 << avctx->trellis;
244 int max_paths = frontier * FREEZE_INTERVAL;
246 for (i = 0; i < 2; i++) {
247 c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
248 c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
249 c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
256 static av_cold int g722_close(AVCodecContext *avctx)
258 G722Context *c = avctx->priv_data;
260 for (i = 0; i < 2; i++) {
261 av_freep(&c->paths[i]);
262 av_freep(&c->node_buf[i]);
263 av_freep(&c->nodep_buf[i]);
268 #if CONFIG_ADPCM_G722_DECODER
269 static const int16_t low_inv_quant5[32] = {
270 -35, -35, -2919, -2195, -1765, -1458, -1219, -1023,
271 -858, -714, -587, -473, -370, -276, -190, -110,
272 2919, 2195, 1765, 1458, 1219, 1023, 858, 714,
273 587, 473, 370, 276, 190, 110, 35, -35
276 static const int16_t *low_inv_quants[3] = { low_inv_quant6, low_inv_quant5,
279 static int g722_decode_frame(AVCodecContext *avctx, void *data,
280 int *data_size, AVPacket *avpkt)
282 G722Context *c = avctx->priv_data;
283 int16_t *out_buf = data;
285 const int skip = 8 - avctx->bits_per_coded_sample;
286 const int16_t *quantizer_table = low_inv_quants[skip];
289 init_get_bits(&gb, avpkt->data, avpkt->size * 8);
291 for (j = 0; j < avpkt->size; j++) {
292 int ilow, ihigh, rlow;
294 ihigh = get_bits(&gb, 2);
295 ilow = get_bits(&gb, 6 - skip);
296 skip_bits(&gb, skip);
298 rlow = av_clip((c->band[0].scale_factor * quantizer_table[ilow] >> 10)
299 + c->band[0].s_predictor, -16384, 16383);
301 update_low_predictor(&c->band[0], ilow >> (2 - skip));
303 if (!avctx->lowres) {
304 const int dhigh = c->band[1].scale_factor *
305 high_inv_quant[ihigh] >> 10;
306 const int rhigh = av_clip(dhigh + c->band[1].s_predictor,
310 update_high_predictor(&c->band[1], dhigh, ihigh);
312 c->prev_samples[c->prev_samples_pos++] = rlow + rhigh;
313 c->prev_samples[c->prev_samples_pos++] = rlow - rhigh;
314 apply_qmf(c->prev_samples + c->prev_samples_pos - 24,
316 out_buf[out_len++] = av_clip_int16(xout1 >> 12);
317 out_buf[out_len++] = av_clip_int16(xout2 >> 12);
318 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
319 memmove(c->prev_samples,
320 c->prev_samples + c->prev_samples_pos - 22,
321 22 * sizeof(c->prev_samples[0]));
322 c->prev_samples_pos = 22;
325 out_buf[out_len++] = rlow;
327 *data_size = out_len << 1;
331 AVCodec adpcm_g722_decoder = {
333 .type = AVMEDIA_TYPE_AUDIO,
334 .id = CODEC_ID_ADPCM_G722,
335 .priv_data_size = sizeof(G722Context),
337 .decode = g722_decode_frame,
338 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
343 #if CONFIG_ADPCM_G722_ENCODER
344 static const int16_t low_quant[33] = {
345 35, 72, 110, 150, 190, 233, 276, 323,
346 370, 422, 473, 530, 587, 650, 714, 786,
347 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
348 1765, 1980, 2195, 2557, 2919
351 static inline void filter_samples(G722Context *c, const int16_t *samples,
352 int *xlow, int *xhigh)
355 c->prev_samples[c->prev_samples_pos++] = samples[0];
356 c->prev_samples[c->prev_samples_pos++] = samples[1];
357 apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
358 *xlow = xout1 + xout2 >> 13;
359 *xhigh = xout1 - xout2 >> 13;
360 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
361 memmove(c->prev_samples,
362 c->prev_samples + c->prev_samples_pos - 22,
363 22 * sizeof(c->prev_samples[0]));
364 c->prev_samples_pos = 22;
368 static inline int encode_high(const struct G722Band *state, int xhigh)
370 int diff = av_clip_int16(xhigh - state->s_predictor);
371 int pred = 141 * state->scale_factor >> 8;
372 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
373 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
376 static inline int encode_low(const struct G722Band* state, int xlow)
378 int diff = av_clip_int16(xlow - state->s_predictor);
379 /* = diff >= 0 ? diff : -(diff + 1) */
380 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
382 limit = limit + 1 << 10;
383 if (limit > low_quant[8] * state->scale_factor)
385 while (i < 29 && limit > low_quant[i] * state->scale_factor)
387 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
390 static int g722_encode_trellis(AVCodecContext *avctx,
391 uint8_t *dst, int buf_size, void *data)
393 G722Context *c = avctx->priv_data;
394 const int16_t *samples = data;
396 int frontier = 1 << avctx->trellis;
397 struct TrellisNode **nodes[2];
398 struct TrellisNode **nodes_next[2];
399 int pathn[2] = {0, 0}, froze = -1;
400 struct TrellisPath *p[2];
402 for (i = 0; i < 2; i++) {
403 nodes[i] = c->nodep_buf[i];
404 nodes_next[i] = c->nodep_buf[i] + frontier;
405 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
406 nodes[i][0] = c->node_buf[i] + frontier;
407 nodes[i][0]->ssd = 0;
408 nodes[i][0]->path = 0;
409 nodes[i][0]->state = c->band[i];
412 for (i = 0; i < buf_size >> 1; i++) {
414 struct TrellisNode *next[2];
415 int heap_pos[2] = {0, 0};
417 for (j = 0; j < 2; j++) {
418 next[j] = c->node_buf[j] + frontier*(i & 1);
419 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
422 filter_samples(c, &samples[2*i], &xlow, &xhigh);
424 for (j = 0; j < frontier && nodes[0][j]; j++) {
425 /* Only k >> 2 affects the future adaptive state, therefore testing
426 * small steps that don't change k >> 2 is useless, the orignal
427 * value from encode_low is better than them. Since we step k
428 * in steps of 4, make sure range is a multiple of 4, so that
429 * we don't miss the original value from encode_low. */
430 int range = j < frontier/2 ? 4 : 0;
431 struct TrellisNode *cur_node = nodes[0][j];
433 int ilow = encode_low(&cur_node->state, xlow);
435 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
436 int decoded, dec_diff, pos;
438 struct TrellisNode* node;
443 decoded = av_clip((cur_node->state.scale_factor *
444 low_inv_quant6[k] >> 10)
445 + cur_node->state.s_predictor, -16384, 16383);
446 dec_diff = xlow - decoded;
448 #define STORE_NODE(index, UPDATE, VALUE)\
449 ssd = cur_node->ssd + dec_diff*dec_diff;\
450 /* Check for wraparound. Using 64 bit ssd counters would \
451 * be simpler, but is slower on x86 32 bit. */\
452 if (ssd < cur_node->ssd)\
454 if (heap_pos[index] < frontier) {\
455 pos = heap_pos[index]++;\
456 assert(pathn[index] < FREEZE_INTERVAL * frontier);\
457 node = nodes_next[index][pos] = next[index]++;\
458 node->path = pathn[index]++;\
460 /* Try to replace one of the leaf nodes with the new \
461 * one, but not always testing the same leaf position */\
462 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
463 if (ssd >= nodes_next[index][pos]->ssd)\
466 node = nodes_next[index][pos];\
469 node->state = cur_node->state;\
471 c->paths[index][node->path].value = VALUE;\
472 c->paths[index][node->path].prev = cur_node->path;\
473 /* Sift the newly inserted node up in the heap to restore \
474 * the heap property */\
476 int parent = (pos - 1) >> 1;\
477 if (nodes_next[index][parent]->ssd <= ssd)\
479 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
480 nodes_next[index][pos]);\
483 STORE_NODE(0, update_low_predictor(&node->state, k >> 2), k);
487 for (j = 0; j < frontier && nodes[1][j]; j++) {
489 struct TrellisNode *cur_node = nodes[1][j];
491 /* We don't try to get any initial guess for ihigh via
492 * encode_high - since there's only 4 possible values, test
493 * them all. Testing all of these gives a much, much larger
494 * gain than testing a larger range around ilow. */
495 for (ihigh = 0; ihigh < 4; ihigh++) {
496 int dhigh, decoded, dec_diff, pos;
498 struct TrellisNode* node;
500 dhigh = cur_node->state.scale_factor *
501 high_inv_quant[ihigh] >> 10;
502 decoded = av_clip(dhigh + cur_node->state.s_predictor,
504 dec_diff = xhigh - decoded;
506 STORE_NODE(1, update_high_predictor(&node->state, dhigh, ihigh), ihigh);
510 for (j = 0; j < 2; j++) {
511 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
513 if (nodes[j][0]->ssd > (1 << 16)) {
514 for (k = 1; k < frontier && nodes[j][k]; k++)
515 nodes[j][k]->ssd -= nodes[j][0]->ssd;
516 nodes[j][0]->ssd = 0;
520 if (i == froze + FREEZE_INTERVAL) {
521 p[0] = &c->paths[0][nodes[0][0]->path];
522 p[1] = &c->paths[1][nodes[1][0]->path];
523 for (j = i; j > froze; j--) {
524 dst[j] = p[1]->value << 6 | p[0]->value;
525 p[0] = &c->paths[0][p[0]->prev];
526 p[1] = &c->paths[1][p[1]->prev];
529 pathn[0] = pathn[1] = 0;
530 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
531 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
535 p[0] = &c->paths[0][nodes[0][0]->path];
536 p[1] = &c->paths[1][nodes[1][0]->path];
537 for (j = i; j > froze; j--) {
538 dst[j] = p[1]->value << 6 | p[0]->value;
539 p[0] = &c->paths[0][p[0]->prev];
540 p[1] = &c->paths[1][p[1]->prev];
542 c->band[0] = nodes[0][0]->state;
543 c->band[1] = nodes[1][0]->state;
548 static int g722_encode_frame(AVCodecContext *avctx,
549 uint8_t *dst, int buf_size, void *data)
551 G722Context *c = avctx->priv_data;
552 const int16_t *samples = data;
556 return g722_encode_trellis(avctx, dst, buf_size, data);
558 for (i = 0; i < buf_size >> 1; i++) {
559 int xlow, xhigh, ihigh, ilow;
560 filter_samples(c, &samples[2*i], &xlow, &xhigh);
561 ihigh = encode_high(&c->band[1], xhigh);
562 ilow = encode_low(&c->band[0], xlow);
563 update_high_predictor(&c->band[1], c->band[1].scale_factor *
564 high_inv_quant[ihigh] >> 10, ihigh);
565 update_low_predictor(&c->band[0], ilow >> 2);
566 *dst++ = ihigh << 6 | ilow;
571 AVCodec adpcm_g722_encoder = {
573 .type = AVMEDIA_TYPE_AUDIO,
574 .id = CODEC_ID_ADPCM_G722,
575 .priv_data_size = sizeof(G722Context),
578 .encode = g722_encode_frame,
579 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
580 .sample_fmts = (enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},