2 * G.722 ADPCM audio encoder/decoder
4 * Copyright (c) CMU 1993 Computer Science, Speech Group
5 * Chengxiang Lu and Alex Hauptmann
6 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
7 * Copyright (c) 2009 Kenan Gillet
8 * Copyright (c) 2010 Martin Storsjo
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 * G.722 ADPCM audio codec
32 * This G.722 decoder is a bit-exact implementation of the ITU G.722
33 * specification for all three specified bitrates - 64000bps, 56000bps
34 * and 48000bps. It passes the ITU tests.
36 * @note For the 56000bps and 48000bps bitrates, the lowest 1 or 2 bits
37 * respectively of each byte are ignored.
44 #define PREV_SAMPLES_BUF_SIZE 1024
46 #define FREEZE_INTERVAL 128
49 int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]; ///< memory of past decoded samples
50 int prev_samples_pos; ///< the number of values in prev_samples
53 * The band[0] and band[1] correspond respectively to the lower band and higher band.
56 int16_t s_predictor; ///< predictor output value
57 int32_t s_zero; ///< previous output signal from zero predictor
58 int8_t part_reconst_mem[2]; ///< signs of previous partially reconstructed signals
59 int16_t prev_qtzd_reconst; ///< previous quantized reconstructed signal (internal value, using low_inv_quant4)
60 int16_t pole_mem[2]; ///< second-order pole section coefficient buffer
61 int32_t diff_mem[6]; ///< quantizer difference signal memory
62 int16_t zero_mem[6]; ///< Seventh-order zero section coefficient buffer
63 int16_t log_factor; ///< delayed 2-logarithmic quantizer factor
64 int16_t scale_factor; ///< delayed quantizer scale factor
68 struct G722Band state;
71 } *node_buf[2], **nodep_buf[2];
80 static const int8_t sign_lookup[2] = { -1, 1 };
82 static const int16_t inv_log2_table[32] = {
83 2048, 2093, 2139, 2186, 2233, 2282, 2332, 2383,
84 2435, 2489, 2543, 2599, 2656, 2714, 2774, 2834,
85 2896, 2960, 3025, 3091, 3158, 3228, 3298, 3371,
86 3444, 3520, 3597, 3676, 3756, 3838, 3922, 4008
88 static const int16_t high_log_factor_step[2] = { 798, -214 };
89 static const int16_t high_inv_quant[4] = { -926, -202, 926, 202 };
91 * low_log_factor_step[index] == wl[rl42[index]]
93 static const int16_t low_log_factor_step[16] = {
94 -60, 3042, 1198, 538, 334, 172, 58, -30,
95 3042, 1198, 538, 334, 172, 58, -30, -60
97 static const int16_t low_inv_quant4[16] = {
98 0, -2557, -1612, -1121, -786, -530, -323, -150,
99 2557, 1612, 1121, 786, 530, 323, 150, 0
103 * quadrature mirror filter (QMF) coefficients
105 * ITU-T G.722 Table 11
107 static const int16_t qmf_coeffs[12] = {
108 3, -11, 12, 32, -210, 951, 3876, -805, 362, -156, 53, -11,
115 * @param cur_diff the dequantized and scaled delta calculated from the
118 static void do_adaptive_prediction(struct G722Band *band, const int cur_diff)
120 int sg[2], limit, i, cur_qtzd_reconst;
122 const int cur_part_reconst = band->s_zero + cur_diff < 0;
124 sg[0] = sign_lookup[cur_part_reconst != band->part_reconst_mem[0]];
125 sg[1] = sign_lookup[cur_part_reconst == band->part_reconst_mem[1]];
126 band->part_reconst_mem[1] = band->part_reconst_mem[0];
127 band->part_reconst_mem[0] = cur_part_reconst;
129 band->pole_mem[1] = av_clip((sg[0] * av_clip(band->pole_mem[0], -8191, 8191) >> 5) +
130 (sg[1] << 7) + (band->pole_mem[1] * 127 >> 7), -12288, 12288);
132 limit = 15360 - band->pole_mem[1];
133 band->pole_mem[0] = av_clip(-192 * sg[0] + (band->pole_mem[0] * 255 >> 8), -limit, limit);
137 for (i = 0; i < 6; i++)
138 band->zero_mem[i] = ((band->zero_mem[i]*255) >> 8) +
139 ((band->diff_mem[i]^cur_diff) < 0 ? -128 : 128);
141 for (i = 0; i < 6; i++)
142 band->zero_mem[i] = (band->zero_mem[i]*255) >> 8;
144 for (i = 5; i > 0; i--)
145 band->diff_mem[i] = band->diff_mem[i-1];
146 band->diff_mem[0] = av_clip_int16(cur_diff << 1);
149 for (i = 5; i >= 0; i--)
150 band->s_zero += (band->zero_mem[i]*band->diff_mem[i]) >> 15;
153 cur_qtzd_reconst = av_clip_int16((band->s_predictor + cur_diff) << 1);
154 band->s_predictor = av_clip_int16(band->s_zero +
155 (band->pole_mem[0] * cur_qtzd_reconst >> 15) +
156 (band->pole_mem[1] * band->prev_qtzd_reconst >> 15));
157 band->prev_qtzd_reconst = cur_qtzd_reconst;
160 static int inline linear_scale_factor(const int log_factor)
162 const int wd1 = inv_log2_table[(log_factor >> 6) & 31];
163 const int shift = log_factor >> 11;
164 return shift < 0 ? wd1 >> -shift : wd1 << shift;
167 static void update_low_predictor(struct G722Band *band, const int ilow)
169 do_adaptive_prediction(band,
170 band->scale_factor * low_inv_quant4[ilow] >> 10);
172 // quantizer adaptation
173 band->log_factor = av_clip((band->log_factor * 127 >> 7) +
174 low_log_factor_step[ilow], 0, 18432);
175 band->scale_factor = linear_scale_factor(band->log_factor - (8 << 11));
178 static void update_high_predictor(struct G722Band *band, const int dhigh,
181 do_adaptive_prediction(band, dhigh);
183 // quantizer adaptation
184 band->log_factor = av_clip((band->log_factor * 127 >> 7) +
185 high_log_factor_step[ihigh&1], 0, 22528);
186 band->scale_factor = linear_scale_factor(band->log_factor - (10 << 11));
189 static void apply_qmf(const int16_t *prev_samples, int *xout1, int *xout2)
195 for (i = 0; i < 12; i++) {
196 MAC16(*xout2, prev_samples[2*i ], qmf_coeffs[i ]);
197 MAC16(*xout1, prev_samples[2*i+1], qmf_coeffs[11-i]);
201 static av_cold int g722_init(AVCodecContext * avctx)
203 G722Context *c = avctx->priv_data;
205 if (avctx->channels != 1) {
206 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
207 return AVERROR_INVALIDDATA;
209 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
211 switch (avctx->bits_per_coded_sample) {
217 av_log(avctx, AV_LOG_WARNING, "Unsupported bits_per_coded_sample [%d], "
219 avctx->bits_per_coded_sample);
221 avctx->bits_per_coded_sample = 8;
225 c->band[0].scale_factor = 8;
226 c->band[1].scale_factor = 2;
227 c->prev_samples_pos = 22;
230 avctx->sample_rate /= 2;
232 if (avctx->trellis) {
233 int frontier = 1 << avctx->trellis;
234 int max_paths = frontier * FREEZE_INTERVAL;
236 for (i = 0; i < 2; i++) {
237 c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
238 c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
239 c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
246 static av_cold int g722_close(AVCodecContext *avctx)
248 G722Context *c = avctx->priv_data;
250 for (i = 0; i < 2; i++) {
251 av_freep(&c->paths[i]);
252 av_freep(&c->node_buf[i]);
253 av_freep(&c->nodep_buf[i]);
258 #if CONFIG_ADPCM_G722_DECODER
259 static const int16_t low_inv_quant5[32] = {
260 -35, -35, -2919, -2195, -1765, -1458, -1219, -1023,
261 -858, -714, -587, -473, -370, -276, -190, -110,
262 2919, 2195, 1765, 1458, 1219, 1023, 858, 714,
263 587, 473, 370, 276, 190, 110, 35, -35
265 static const int16_t low_inv_quant6[64] = {
266 -17, -17, -17, -17, -3101, -2738, -2376, -2088,
267 -1873, -1689, -1535, -1399, -1279, -1170, -1072, -982,
268 -899, -822, -750, -682, -618, -558, -501, -447,
269 -396, -347, -300, -254, -211, -170, -130, -91,
270 3101, 2738, 2376, 2088, 1873, 1689, 1535, 1399,
271 1279, 1170, 1072, 982, 899, 822, 750, 682,
272 618, 558, 501, 447, 396, 347, 300, 254,
273 211, 170, 130, 91, 54, 17, -54, -17
276 static const int16_t *low_inv_quants[3] = { low_inv_quant6, low_inv_quant5,
279 static int g722_decode_frame(AVCodecContext *avctx, void *data,
280 int *data_size, AVPacket *avpkt)
282 G722Context *c = avctx->priv_data;
283 int16_t *out_buf = data;
285 const int skip = 8 - avctx->bits_per_coded_sample;
286 const int16_t *quantizer_table = low_inv_quants[skip];
289 init_get_bits(&gb, avpkt->data, avpkt->size * 8);
291 for (j = 0; j < avpkt->size; j++) {
292 int ilow, ihigh, rlow;
294 ihigh = get_bits(&gb, 2);
295 ilow = get_bits(&gb, 6 - skip);
296 skip_bits(&gb, skip);
298 rlow = av_clip((c->band[0].scale_factor * quantizer_table[ilow] >> 10)
299 + c->band[0].s_predictor, -16384, 16383);
301 update_low_predictor(&c->band[0], ilow >> (2 - skip));
303 if (!avctx->lowres) {
304 const int dhigh = c->band[1].scale_factor *
305 high_inv_quant[ihigh] >> 10;
306 const int rhigh = av_clip(dhigh + c->band[1].s_predictor,
310 update_high_predictor(&c->band[1], dhigh, ihigh);
312 c->prev_samples[c->prev_samples_pos++] = rlow + rhigh;
313 c->prev_samples[c->prev_samples_pos++] = rlow - rhigh;
314 apply_qmf(c->prev_samples + c->prev_samples_pos - 24,
316 out_buf[out_len++] = av_clip_int16(xout1 >> 12);
317 out_buf[out_len++] = av_clip_int16(xout2 >> 12);
318 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
319 memmove(c->prev_samples,
320 c->prev_samples + c->prev_samples_pos - 22,
321 22 * sizeof(c->prev_samples[0]));
322 c->prev_samples_pos = 22;
325 out_buf[out_len++] = rlow;
327 *data_size = out_len << 1;
331 AVCodec adpcm_g722_decoder = {
333 .type = AVMEDIA_TYPE_AUDIO,
334 .id = CODEC_ID_ADPCM_G722,
335 .priv_data_size = sizeof(G722Context),
337 .decode = g722_decode_frame,
338 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
343 #if CONFIG_ADPCM_G722_ENCODER
344 static const int16_t low_quant[33] = {
345 35, 72, 110, 150, 190, 233, 276, 323,
346 370, 422, 473, 530, 587, 650, 714, 786,
347 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
348 1765, 1980, 2195, 2557, 2919
351 static inline void filter_samples(G722Context *c, const int16_t *samples,
352 int *xlow, int *xhigh)
355 c->prev_samples[c->prev_samples_pos++] = samples[0];
356 c->prev_samples[c->prev_samples_pos++] = samples[1];
357 apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
358 *xlow = xout1 + xout2 >> 13;
359 *xhigh = xout1 - xout2 >> 13;
360 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
361 memmove(c->prev_samples,
362 c->prev_samples + c->prev_samples_pos - 22,
363 22 * sizeof(c->prev_samples[0]));
364 c->prev_samples_pos = 22;
368 static inline int encode_high(const struct G722Band *state, int xhigh)
370 int diff = av_clip_int16(xhigh - state->s_predictor);
371 int pred = 141 * state->scale_factor >> 8;
372 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
373 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
376 static inline int encode_low(const struct G722Band* state, int xlow)
378 int diff = av_clip_int16(xlow - state->s_predictor);
379 /* = diff >= 0 ? diff : -(diff + 1) */
380 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
382 limit = limit + 1 << 10;
383 if (limit > low_quant[8] * state->scale_factor)
385 while (i < 29 && limit > low_quant[i] * state->scale_factor)
387 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
390 static int g722_encode_trellis(AVCodecContext *avctx,
391 uint8_t *dst, int buf_size, void *data)
393 G722Context *c = avctx->priv_data;
394 const int16_t *samples = data;
396 int frontier = 1 << avctx->trellis;
397 struct TrellisNode **nodes[2];
398 struct TrellisNode **nodes_next[2];
399 int pathn[2] = {0, 0}, froze = -1;
400 struct TrellisPath *p[2];
402 for (i = 0; i < 2; i++) {
403 nodes[i] = c->nodep_buf[i];
404 nodes_next[i] = c->nodep_buf[i] + frontier;
405 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
406 nodes[i][0] = c->node_buf[i] + frontier;
407 nodes[i][0]->ssd = 0;
408 nodes[i][0]->path = 0;
409 nodes[i][0]->state = c->band[i];
412 for (i = 0; i < buf_size >> 1; i++) {
414 struct TrellisNode *next[2];
415 int heap_pos[2] = {0, 0};
417 for (j = 0; j < 2; j++) {
418 next[j] = c->node_buf[j] + frontier*(i & 1);
419 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
422 filter_samples(c, &samples[2*i], &xlow, &xhigh);
424 for (j = 0; j < frontier && nodes[0][j]; j++) {
425 /* Only k >> 2 affects the future adaptive state, therefore testing
426 * small steps that don't change k >> 2 is useless, the orignal
427 * value from encode_low is better than them. Since we step k
428 * in steps of 4, make sure range is a multiple of 4, so that
429 * we don't miss the original value from encode_low. */
430 int range = j < frontier/2 ? 4 : 0;
431 struct TrellisNode *cur_node = nodes[0][j];
433 int ilow = encode_low(&cur_node->state, xlow);
435 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
436 int decoded, dec_diff, pos;
438 struct TrellisNode* node;
443 decoded = av_clip((cur_node->state.scale_factor *
444 low_inv_quant6[k] >> 10)
445 + cur_node->state.s_predictor, -16384, 16383);
446 dec_diff = xlow - decoded;
448 #define STORE_NODE(index, UPDATE, VALUE)\
449 ssd = cur_node->ssd + dec_diff*dec_diff;\
450 /* Check for wraparound. Using 64 bit ssd counters would \
451 * be simpler, but is slower on x86 32 bit. */\
452 if (ssd < cur_node->ssd)\
454 if (heap_pos[index] < frontier) {\
455 pos = heap_pos[index]++;\
456 assert(pathn[index] < FREEZE_INTERVAL * frontier);\
457 node = nodes_next[index][pos] = next[index]++;\
458 node->path = pathn[index]++;\
460 /* Try to replace one of the leaf nodes with the new \
461 * one, but not always testing the same leaf position */\
462 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
463 if (ssd >= nodes_next[index][pos]->ssd)\
466 node = nodes_next[index][pos];\
469 node->state = cur_node->state;\
471 c->paths[index][node->path].value = VALUE;\
472 c->paths[index][node->path].prev = cur_node->path;\
473 /* Sift the newly inserted node up in the heap to restore \
474 * the heap property */\
476 int parent = (pos - 1) >> 1;\
477 if (nodes_next[index][parent]->ssd <= ssd)\
479 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
480 nodes_next[index][pos]);\
483 STORE_NODE(0, update_low_predictor(&node->state, k >> 2), k);
487 for (j = 0; j < frontier && nodes[1][j]; j++) {
489 struct TrellisNode *cur_node = nodes[1][j];
491 /* We don't try to get any initial guess for ihigh via
492 * encode_high - since there's only 4 possible values, test
493 * them all. Testing all of these gives a much, much larger
494 * gain than testing a larger range around ilow. */
495 for (ihigh = 0; ihigh < 4; ihigh++) {
496 int dhigh, decoded, dec_diff, pos;
498 struct TrellisNode* node;
500 dhigh = cur_node->state.scale_factor *
501 high_inv_quant[ihigh] >> 10;
502 decoded = av_clip(dhigh + cur_node->state.s_predictor,
504 dec_diff = xhigh - decoded;
506 STORE_NODE(1, update_high_predictor(&node->state, dhigh, ihigh), ihigh);
510 for (j = 0; j < 2; j++) {
511 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
513 if (nodes[j][0]->ssd > (1 << 16)) {
514 for (k = 1; k < frontier && nodes[j][k]; k++)
515 nodes[j][k]->ssd -= nodes[j][0]->ssd;
516 nodes[j][0]->ssd = 0;
520 if (i == froze + FREEZE_INTERVAL) {
521 p[0] = &c->paths[0][nodes[0][0]->path];
522 p[1] = &c->paths[1][nodes[1][0]->path];
523 for (j = i; j > froze; j--) {
524 dst[j] = p[1]->value << 6 | p[0]->value;
525 p[0] = &c->paths[0][p[0]->prev];
526 p[1] = &c->paths[1][p[1]->prev];
529 pathn[0] = pathn[1] = 0;
530 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
531 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
535 p[0] = &c->paths[0][nodes[0][0]->path];
536 p[1] = &c->paths[1][nodes[1][0]->path];
537 for (j = i; j > froze; j--) {
538 dst[j] = p[1]->value << 6 | p[0]->value;
539 p[0] = &c->paths[0][p[0]->prev];
540 p[1] = &c->paths[1][p[1]->prev];
542 c->band[0] = nodes[0][0]->state;
543 c->band[1] = nodes[1][0]->state;
548 static int g722_encode_frame(AVCodecContext *avctx,
549 uint8_t *dst, int buf_size, void *data)
551 G722Context *c = avctx->priv_data;
552 const int16_t *samples = data;
556 return g722_encode_trellis(avctx, dst, buf_size, data);
558 for (i = 0; i < buf_size >> 1; i++) {
559 int xlow, xhigh, ihigh, ilow;
560 filter_samples(c, &samples[2*i], &xlow, &xhigh);
561 ihigh = encode_high(&c->band[1], xhigh);
562 ilow = encode_low(&c->band[0], xlow);
563 update_high_predictor(&c->band[1], c->band[1].scale_factor *
564 high_inv_quant[ihigh] >> 10, ihigh);
565 update_low_predictor(&c->band[0], ilow >> 2);
566 *dst++ = ihigh << 6 | ilow;
571 AVCodec adpcm_g722_encoder = {
573 .type = AVMEDIA_TYPE_AUDIO,
574 .id = CODEC_ID_ADPCM_G722,
575 .priv_data_size = sizeof(G722Context),
578 .encode = g722_encode_frame,
579 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
580 .sample_fmts = (enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},