2 * Copyright (c) CMU 1993 Computer Science, Speech Group
3 * Chengxiang Lu and Alex Hauptmann
4 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5 * Copyright (c) 2009 Kenan Gillet
6 * Copyright (c) 2010 Martin Storsjo
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 * G.722 ADPCM audio encoder
30 #include "libavutil/avassert.h"
34 #include "libavutil/common.h"
36 #define FREEZE_INTERVAL 128
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39 problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43 undefined behavior. Using larger values is insanely slow anyway. */
45 #define MAX_TRELLIS 16
47 static av_cold int g722_encode_close(AVCodecContext *avctx)
49 G722Context *c = avctx->priv_data;
51 for (i = 0; i < 2; i++) {
52 av_freep(&c->paths[i]);
53 av_freep(&c->node_buf[i]);
54 av_freep(&c->nodep_buf[i]);
59 static av_cold int g722_encode_init(AVCodecContext * avctx)
61 G722Context *c = avctx->priv_data;
63 c->band[0].scale_factor = 8;
64 c->band[1].scale_factor = 2;
65 c->prev_samples_pos = 22;
68 int frontier = 1 << avctx->trellis;
69 int max_paths = frontier * FREEZE_INTERVAL;
71 for (i = 0; i < 2; i++) {
72 c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
73 c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
74 c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
75 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
76 return AVERROR(ENOMEM);
80 if (avctx->frame_size) {
81 /* validate frame size */
82 if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
85 if (avctx->frame_size == 1)
87 else if (avctx->frame_size > MAX_FRAME_SIZE)
88 new_frame_size = MAX_FRAME_SIZE;
90 new_frame_size = avctx->frame_size - 1;
92 av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
93 "allowed. Using %d instead of %d\n", new_frame_size,
95 avctx->frame_size = new_frame_size;
98 /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
99 a common packet size for VoIP applications */
100 avctx->frame_size = 320;
102 avctx->initial_padding = 22;
104 if (avctx->trellis) {
105 /* validate trellis */
106 if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
107 int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
108 av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
109 "allowed. Using %d instead of %d\n", new_trellis,
111 avctx->trellis = new_trellis;
115 ff_g722dsp_init(&c->dsp);
120 static const int16_t low_quant[33] = {
121 35, 72, 110, 150, 190, 233, 276, 323,
122 370, 422, 473, 530, 587, 650, 714, 786,
123 858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
124 1765, 1980, 2195, 2557, 2919
127 static inline void filter_samples(G722Context *c, const int16_t *samples,
128 int *xlow, int *xhigh)
131 c->prev_samples[c->prev_samples_pos++] = samples[0];
132 c->prev_samples[c->prev_samples_pos++] = samples[1];
133 c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
134 *xlow = xout[0] + xout[1] >> 14;
135 *xhigh = xout[0] - xout[1] >> 14;
136 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
137 memmove(c->prev_samples,
138 c->prev_samples + c->prev_samples_pos - 22,
139 22 * sizeof(c->prev_samples[0]));
140 c->prev_samples_pos = 22;
144 static inline int encode_high(const struct G722Band *state, int xhigh)
146 int diff = av_clip_int16(xhigh - state->s_predictor);
147 int pred = 141 * state->scale_factor >> 8;
148 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
149 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
152 static inline int encode_low(const struct G722Band* state, int xlow)
154 int diff = av_clip_int16(xlow - state->s_predictor);
155 /* = diff >= 0 ? diff : -(diff + 1) */
156 int limit = diff ^ (diff >> (sizeof(diff)*8-1));
158 limit = limit + 1 << 10;
159 if (limit > low_quant[8] * state->scale_factor)
161 while (i < 29 && limit > low_quant[i] * state->scale_factor)
163 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
166 static void g722_encode_trellis(G722Context *c, int trellis,
167 uint8_t *dst, int nb_samples,
168 const int16_t *samples)
171 int frontier = 1 << trellis;
172 struct TrellisNode **nodes[2];
173 struct TrellisNode **nodes_next[2];
174 int pathn[2] = {0, 0}, froze = -1;
175 struct TrellisPath *p[2];
177 for (i = 0; i < 2; i++) {
178 nodes[i] = c->nodep_buf[i];
179 nodes_next[i] = c->nodep_buf[i] + frontier;
180 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
181 nodes[i][0] = c->node_buf[i] + frontier;
182 nodes[i][0]->ssd = 0;
183 nodes[i][0]->path = 0;
184 nodes[i][0]->state = c->band[i];
187 for (i = 0; i < nb_samples >> 1; i++) {
189 struct TrellisNode *next[2];
190 int heap_pos[2] = {0, 0};
192 for (j = 0; j < 2; j++) {
193 next[j] = c->node_buf[j] + frontier*(i & 1);
194 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
197 filter_samples(c, &samples[2*i], &xlow, &xhigh);
199 for (j = 0; j < frontier && nodes[0][j]; j++) {
200 /* Only k >> 2 affects the future adaptive state, therefore testing
201 * small steps that don't change k >> 2 is useless, the original
202 * value from encode_low is better than them. Since we step k
203 * in steps of 4, make sure range is a multiple of 4, so that
204 * we don't miss the original value from encode_low. */
205 int range = j < frontier/2 ? 4 : 0;
206 struct TrellisNode *cur_node = nodes[0][j];
208 int ilow = encode_low(&cur_node->state, xlow);
210 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
211 int decoded, dec_diff, pos;
213 struct TrellisNode* node;
218 decoded = av_clip_intp2((cur_node->state.scale_factor *
219 ff_g722_low_inv_quant6[k] >> 10)
220 + cur_node->state.s_predictor, 14);
221 dec_diff = xlow - decoded;
223 #define STORE_NODE(index, UPDATE, VALUE)\
224 ssd = cur_node->ssd + dec_diff*dec_diff;\
225 /* Check for wraparound. Using 64 bit ssd counters would \
226 * be simpler, but is slower on x86 32 bit. */\
227 if (ssd < cur_node->ssd)\
229 if (heap_pos[index] < frontier) {\
230 pos = heap_pos[index]++;\
231 av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
232 node = nodes_next[index][pos] = next[index]++;\
233 node->path = pathn[index]++;\
235 /* Try to replace one of the leaf nodes with the new \
236 * one, but not always testing the same leaf position */\
237 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
238 if (ssd >= nodes_next[index][pos]->ssd)\
241 node = nodes_next[index][pos];\
244 node->state = cur_node->state;\
246 c->paths[index][node->path].value = VALUE;\
247 c->paths[index][node->path].prev = cur_node->path;\
248 /* Sift the newly inserted node up in the heap to restore \
249 * the heap property */\
251 int parent = (pos - 1) >> 1;\
252 if (nodes_next[index][parent]->ssd <= ssd)\
254 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
255 nodes_next[index][pos]);\
258 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
262 for (j = 0; j < frontier && nodes[1][j]; j++) {
264 struct TrellisNode *cur_node = nodes[1][j];
266 /* We don't try to get any initial guess for ihigh via
267 * encode_high - since there's only 4 possible values, test
268 * them all. Testing all of these gives a much, much larger
269 * gain than testing a larger range around ilow. */
270 for (ihigh = 0; ihigh < 4; ihigh++) {
271 int dhigh, decoded, dec_diff, pos;
273 struct TrellisNode* node;
275 dhigh = cur_node->state.scale_factor *
276 ff_g722_high_inv_quant[ihigh] >> 10;
277 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
278 dec_diff = xhigh - decoded;
280 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
284 for (j = 0; j < 2; j++) {
285 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
287 if (nodes[j][0]->ssd > (1 << 16)) {
288 for (k = 1; k < frontier && nodes[j][k]; k++)
289 nodes[j][k]->ssd -= nodes[j][0]->ssd;
290 nodes[j][0]->ssd = 0;
294 if (i == froze + FREEZE_INTERVAL) {
295 p[0] = &c->paths[0][nodes[0][0]->path];
296 p[1] = &c->paths[1][nodes[1][0]->path];
297 for (j = i; j > froze; j--) {
298 dst[j] = p[1]->value << 6 | p[0]->value;
299 p[0] = &c->paths[0][p[0]->prev];
300 p[1] = &c->paths[1][p[1]->prev];
303 pathn[0] = pathn[1] = 0;
304 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
305 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
309 p[0] = &c->paths[0][nodes[0][0]->path];
310 p[1] = &c->paths[1][nodes[1][0]->path];
311 for (j = i; j > froze; j--) {
312 dst[j] = p[1]->value << 6 | p[0]->value;
313 p[0] = &c->paths[0][p[0]->prev];
314 p[1] = &c->paths[1][p[1]->prev];
316 c->band[0] = nodes[0][0]->state;
317 c->band[1] = nodes[1][0]->state;
320 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
321 const int16_t *samples)
323 int xlow, xhigh, ilow, ihigh;
324 filter_samples(c, samples, &xlow, &xhigh);
325 ihigh = encode_high(&c->band[1], xhigh);
326 ilow = encode_low (&c->band[0], xlow);
327 ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
328 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
329 ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
330 *dst = ihigh << 6 | ilow;
333 static void g722_encode_no_trellis(G722Context *c,
334 uint8_t *dst, int nb_samples,
335 const int16_t *samples)
338 for (i = 0; i < nb_samples; i += 2)
339 encode_byte(c, dst++, &samples[i]);
342 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
343 const AVFrame *frame, int *got_packet_ptr)
345 G722Context *c = avctx->priv_data;
346 const int16_t *samples = (const int16_t *)frame->data[0];
347 int nb_samples, out_size, ret;
349 out_size = (frame->nb_samples + 1) / 2;
350 if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
353 nb_samples = frame->nb_samples - (frame->nb_samples & 1);
356 g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
358 g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
360 /* handle last frame with odd frame_size */
361 if (nb_samples < frame->nb_samples) {
362 int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
363 encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
366 if (frame->pts != AV_NOPTS_VALUE)
367 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
372 AVCodec ff_adpcm_g722_encoder = {
374 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
375 .type = AVMEDIA_TYPE_AUDIO,
376 .id = AV_CODEC_ID_ADPCM_G722,
377 .priv_data_size = sizeof(G722Context),
378 .init = g722_encode_init,
379 .close = g722_encode_close,
380 .encode2 = g722_encode_frame,
381 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
382 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
383 .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
384 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,