2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 * Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * G.722 ADPCM audio encoder
30 #include "libavutil/avassert.h"
34 #include "libavutil/common.h"
36 #define FREEZE_INTERVAL 128
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39 problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43 undefined behavior. Using larger values is insanely slow anyway. */
45 #define MAX_TRELLIS 16
47 static av_cold int g722_encode_close(AVCodecContext *avctx)
49 G722Context *c = avctx->priv_data;
51 for (i = 0; i < 2; i++) {
52 av_freep(&c->paths[i]);
53 av_freep(&c->node_buf[i]);
54 av_freep(&c->nodep_buf[i]);
59 static av_cold int g722_encode_init(AVCodecContext * avctx)
61 G722Context *c = avctx->priv_data;
64 if (avctx->channels != 1) {
65 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
66 return AVERROR_INVALIDDATA;
69 c->band[0].scale_factor = 8;
70 c->band[1].scale_factor = 2;
71 c->prev_samples_pos = 22;
74 int frontier = 1 << avctx->trellis;
75 int max_paths = frontier * FREEZE_INTERVAL;
77 for (i = 0; i < 2; i++) {
78 c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
79 c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
80 c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
81 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
82 ret = AVERROR(ENOMEM);
88 if (avctx->frame_size) {
89 /* validate frame size */
90 if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
93 if (avctx->frame_size == 1)
95 else if (avctx->frame_size > MAX_FRAME_SIZE)
96 new_frame_size = MAX_FRAME_SIZE;
98 new_frame_size = avctx->frame_size - 1;
100 av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
101 "allowed. Using %d instead of %d\n", new_frame_size,
103 avctx->frame_size = new_frame_size;
106 /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
107 a common packet size for VoIP applications */
108 avctx->frame_size = 320;
110 avctx->initial_padding = 22;
112 if (avctx->trellis) {
113 /* validate trellis */
114 if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
115 int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
116 av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
117 "allowed. Using %d instead of %d\n", new_trellis,
119 avctx->trellis = new_trellis;
123 ff_g722dsp_init(&c->dsp);
127 g722_encode_close(avctx);
131 static const int16_t low_quant[33] = {
132 35, 72, 110, 150, 190, 233, 276, 323,
133 370, 422, 473, 530, 587, 650, 714, 786,
134 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
135 1765, 1980, 2195, 2557, 2919
138 static inline void filter_samples(G722Context *c, const int16_t *samples,
139 int *xlow, int *xhigh)
142 c->prev_samples[c->prev_samples_pos++] = samples[0];
143 c->prev_samples[c->prev_samples_pos++] = samples[1];
144 c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
145 *xlow = xout[0] + xout[1] >> 14;
146 *xhigh = xout[0] - xout[1] >> 14;
147 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
148 memmove(c->prev_samples,
149 c->prev_samples + c->prev_samples_pos - 22,
150 22 * sizeof(c->prev_samples[0]));
151 c->prev_samples_pos = 22;
155 static inline int encode_high(const struct G722Band *state, int xhigh)
157 int diff = av_clip_int16(xhigh - state->s_predictor);
158 int pred = 141 * state->scale_factor >> 8;
159 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
160 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
163 static inline int encode_low(const struct G722Band* state, int xlow)
165 int diff = av_clip_int16(xlow - state->s_predictor);
166 /* = diff >= 0 ? diff : -(diff + 1) */
167 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
169 limit = limit + 1 << 10;
170 if (limit > low_quant[8] * state->scale_factor)
172 while (i < 29 && limit > low_quant[i] * state->scale_factor)
174 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
177 static void g722_encode_trellis(G722Context *c, int trellis,
178 uint8_t *dst, int nb_samples,
179 const int16_t *samples)
182 int frontier = 1 << trellis;
183 struct TrellisNode **nodes[2];
184 struct TrellisNode **nodes_next[2];
185 int pathn[2] = {0, 0}, froze = -1;
186 struct TrellisPath *p[2];
188 for (i = 0; i < 2; i++) {
189 nodes[i] = c->nodep_buf[i];
190 nodes_next[i] = c->nodep_buf[i] + frontier;
191 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
192 nodes[i][0] = c->node_buf[i] + frontier;
193 nodes[i][0]->ssd = 0;
194 nodes[i][0]->path = 0;
195 nodes[i][0]->state = c->band[i];
198 for (i = 0; i < nb_samples >> 1; i++) {
200 struct TrellisNode *next[2];
201 int heap_pos[2] = {0, 0};
203 for (j = 0; j < 2; j++) {
204 next[j] = c->node_buf[j] + frontier*(i & 1);
205 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
208 filter_samples(c, &samples[2*i], &xlow, &xhigh);
210 for (j = 0; j < frontier && nodes[0][j]; j++) {
211 /* Only k >> 2 affects the future adaptive state, therefore testing
212 * small steps that don't change k >> 2 is useless, the original
213 * value from encode_low is better than them. Since we step k
214 * in steps of 4, make sure range is a multiple of 4, so that
215 * we don't miss the original value from encode_low. */
216 int range = j < frontier/2 ? 4 : 0;
217 struct TrellisNode *cur_node = nodes[0][j];
219 int ilow = encode_low(&cur_node->state, xlow);
221 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
222 int decoded, dec_diff, pos;
224 struct TrellisNode* node;
229 decoded = av_clip_intp2((cur_node->state.scale_factor *
230 ff_g722_low_inv_quant6[k] >> 10)
231 + cur_node->state.s_predictor, 14);
232 dec_diff = xlow - decoded;
234 #define STORE_NODE(index, UPDATE, VALUE)\
235 ssd = cur_node->ssd + dec_diff*dec_diff;\
236 /* Check for wraparound. Using 64 bit ssd counters would \
237 * be simpler, but is slower on x86 32 bit. */\
238 if (ssd < cur_node->ssd)\
240 if (heap_pos[index] < frontier) {\
241 pos = heap_pos[index]++;\
242 av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
243 node = nodes_next[index][pos] = next[index]++;\
244 node->path = pathn[index]++;\
246 /* Try to replace one of the leaf nodes with the new \
247 * one, but not always testing the same leaf position */\
248 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
249 if (ssd >= nodes_next[index][pos]->ssd)\
252 node = nodes_next[index][pos];\
255 node->state = cur_node->state;\
257 c->paths[index][node->path].value = VALUE;\
258 c->paths[index][node->path].prev = cur_node->path;\
259 /* Sift the newly inserted node up in the heap to restore \
260 * the heap property */\
262 int parent = (pos - 1) >> 1;\
263 if (nodes_next[index][parent]->ssd <= ssd)\
265 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
266 nodes_next[index][pos]);\
269 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
273 for (j = 0; j < frontier && nodes[1][j]; j++) {
275 struct TrellisNode *cur_node = nodes[1][j];
277 /* We don't try to get any initial guess for ihigh via
278 * encode_high - since there's only 4 possible values, test
279 * them all. Testing all of these gives a much, much larger
280 * gain than testing a larger range around ilow. */
281 for (ihigh = 0; ihigh < 4; ihigh++) {
282 int dhigh, decoded, dec_diff, pos;
284 struct TrellisNode* node;
286 dhigh = cur_node->state.scale_factor *
287 ff_g722_high_inv_quant[ihigh] >> 10;
288 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
289 dec_diff = xhigh - decoded;
291 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
295 for (j = 0; j < 2; j++) {
296 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
298 if (nodes[j][0]->ssd > (1 << 16)) {
299 for (k = 1; k < frontier && nodes[j][k]; k++)
300 nodes[j][k]->ssd -= nodes[j][0]->ssd;
301 nodes[j][0]->ssd = 0;
305 if (i == froze + FREEZE_INTERVAL) {
306 p[0] = &c->paths[0][nodes[0][0]->path];
307 p[1] = &c->paths[1][nodes[1][0]->path];
308 for (j = i; j > froze; j--) {
309 dst[j] = p[1]->value << 6 | p[0]->value;
310 p[0] = &c->paths[0][p[0]->prev];
311 p[1] = &c->paths[1][p[1]->prev];
314 pathn[0] = pathn[1] = 0;
315 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
316 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
320 p[0] = &c->paths[0][nodes[0][0]->path];
321 p[1] = &c->paths[1][nodes[1][0]->path];
322 for (j = i; j > froze; j--) {
323 dst[j] = p[1]->value << 6 | p[0]->value;
324 p[0] = &c->paths[0][p[0]->prev];
325 p[1] = &c->paths[1][p[1]->prev];
327 c->band[0] = nodes[0][0]->state;
328 c->band[1] = nodes[1][0]->state;
331 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
332 const int16_t *samples)
334 int xlow, xhigh, ilow, ihigh;
335 filter_samples(c, samples, &xlow, &xhigh);
336 ihigh = encode_high(&c->band[1], xhigh);
337 ilow = encode_low (&c->band[0], xlow);
338 ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
339 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
340 ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
341 *dst = ihigh << 6 | ilow;
344 static void g722_encode_no_trellis(G722Context *c,
345 uint8_t *dst, int nb_samples,
346 const int16_t *samples)
349 for (i = 0; i < nb_samples; i += 2)
350 encode_byte(c, dst++, &samples[i]);
353 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
354 const AVFrame *frame, int *got_packet_ptr)
356 G722Context *c = avctx->priv_data;
357 const int16_t *samples = (const int16_t *)frame->data[0];
358 int nb_samples, out_size, ret;
360 out_size = (frame->nb_samples + 1) / 2;
361 if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
364 nb_samples = frame->nb_samples - (frame->nb_samples & 1);
367 g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
369 g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
371 /* handle last frame with odd frame_size */
372 if (nb_samples < frame->nb_samples) {
373 int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
374 encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
377 if (frame->pts != AV_NOPTS_VALUE)
378 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
383 AVCodec ff_adpcm_g722_encoder = {
385 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
386 .type = AVMEDIA_TYPE_AUDIO,
387 .id = AV_CODEC_ID_ADPCM_G722,
388 .priv_data_size = sizeof(G722Context),
389 .init = g722_encode_init,
390 .close = g722_encode_close,
391 .encode2 = g722_encode_frame,
392 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
393 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
394 AV_SAMPLE_FMT_NONE },