git.sesse.net Git - ffmpeg/blob - libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "libavutil/avassert.h"
  31 #include "avcodec.h"
  32 #include "internal.h"
  33 #include "g722.h"
  34 #include "libavutil/common.h"
  35
  36 #define FREEZE_INTERVAL 128
  37
  38 /* This is an arbitrary value. Allowing insanely large values leads to strange
  39    problems, so we limit it to a reasonable value */
  40 #define MAX_FRAME_SIZE 32768
  41
  42 /* We clip the value of avctx->trellis to prevent data type overflows and
  43    undefined behavior. Using larger values is insanely slow anyway. */
  44 #define MIN_TRELLIS 0
  45 #define MAX_TRELLIS 16
  46
  47 static av_cold int g722_encode_close(AVCodecContext *avctx)
  48 {
  49     G722Context *c = avctx->priv_data;
  50     int i;
  51     for (i = 0; i < 2; i++) {
  52         av_freep(&c->paths[i]);
  53         av_freep(&c->node_buf[i]);
  54         av_freep(&c->nodep_buf[i]);
  55     }
  56     return 0;
  57 }
  58
  59 static av_cold int g722_encode_init(AVCodecContext * avctx)
  60 {
  61     G722Context *c = avctx->priv_data;
  62
  63     c->band[0].scale_factor = 8;
  64     c->band[1].scale_factor = 2;
  65     c->prev_samples_pos = 22;
  66
  67     if (avctx->frame_size) {
  68         /* validate frame size */
  69         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
  70             int new_frame_size;
  71
  72             if (avctx->frame_size == 1)
  73                 new_frame_size = 2;
  74             else if (avctx->frame_size > MAX_FRAME_SIZE)
  75                 new_frame_size = MAX_FRAME_SIZE;
  76             else
  77                 new_frame_size = avctx->frame_size - 1;
  78
  79             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
  80                    "allowed. Using %d instead of %d\n", new_frame_size,
  81                    avctx->frame_size);
  82             avctx->frame_size = new_frame_size;
  83         }
  84     } else {
  85         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
  86            a common packet size for VoIP applications */
  87         avctx->frame_size = 320;
  88     }
  89     avctx->initial_padding = 22;
  90
  91     if (avctx->trellis) {
  92         /* validate trellis */
  93         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
  94             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
  95             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
  96                    "allowed. Using %d instead of %d\n", new_trellis,
  97                    avctx->trellis);
  98             avctx->trellis = new_trellis;
  99         }
 100         if (avctx->trellis) {
 101             int frontier = 1 << avctx->trellis;
 102             int max_paths = frontier * FREEZE_INTERVAL;
 103
 104             for (int i = 0; i < 2; i++) {
 105                 c->paths[i]     = av_calloc(max_paths, sizeof(**c->paths));
 106                 c->node_buf[i]  = av_calloc(frontier, 2 * sizeof(**c->node_buf));
 107                 c->nodep_buf[i] = av_calloc(frontier, 2 * sizeof(**c->nodep_buf));
 108                 if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
 109                     return AVERROR(ENOMEM);
 110             }
 111         }
 112     }
 113
 114     ff_g722dsp_init(&c->dsp);
 115
 116     return 0;
 117 }
 118
 119 static const int16_t low_quant[33] = {
 120       35,   72,  110,  150,  190,  233,  276,  323,
 121      370,  422,  473,  530,  587,  650,  714,  786,
 122      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
 123     1765, 1980, 2195, 2557, 2919
 124 };
 125
 126 static inline void filter_samples(G722Context *c, const int16_t *samples,
 127                                   int *xlow, int *xhigh)
 128 {
 129     int xout[2];
 130     c->prev_samples[c->prev_samples_pos++] = samples[0];
 131     c->prev_samples[c->prev_samples_pos++] = samples[1];
 132     c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
 133     *xlow  = xout[0] + xout[1] >> 14;
 134     *xhigh = xout[0] - xout[1] >> 14;
 135     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
 136         memmove(c->prev_samples,
 137                 c->prev_samples + c->prev_samples_pos - 22,
 138                 22 * sizeof(c->prev_samples[0]));
 139         c->prev_samples_pos = 22;
 140     }
 141 }
 142
 143 static inline int encode_high(const struct G722Band *state, int xhigh)
 144 {
 145     int diff = av_clip_int16(xhigh - state->s_predictor);
 146     int pred = 141 * state->scale_factor >> 8;
 147            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 148     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 149 }
 150
 151 static inline int encode_low(const struct G722Band* state, int xlow)
 152 {
 153     int diff  = av_clip_int16(xlow - state->s_predictor);
 154            /* = diff >= 0 ? diff : -(diff + 1) */
 155     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 156     int i = 0;
 157     limit = limit + 1 << 10;
 158     if (limit > low_quant[8] * state->scale_factor)
 159         i = 9;
 160     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 161         i++;
 162     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 163 }
 164
 165 static void g722_encode_trellis(G722Context *c, int trellis,
 166                                 uint8_t *dst, int nb_samples,
 167                                 const int16_t *samples)
 168 {
 169     int i, j, k;
 170     int frontier = 1 << trellis;
 171     struct TrellisNode **nodes[2];
 172     struct TrellisNode **nodes_next[2];
 173     int pathn[2] = {0, 0}, froze = -1;
 174     struct TrellisPath *p[2];
 175
 176     for (i = 0; i < 2; i++) {
 177         nodes[i] = c->nodep_buf[i];
 178         nodes_next[i] = c->nodep_buf[i] + frontier;
 179         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
 180         nodes[i][0] = c->node_buf[i] + frontier;
 181         nodes[i][0]->ssd = 0;
 182         nodes[i][0]->path = 0;
 183         nodes[i][0]->state = c->band[i];
 184     }
 185
 186     for (i = 0; i < nb_samples >> 1; i++) {
 187         int xlow, xhigh;
 188         struct TrellisNode *next[2];
 189         int heap_pos[2] = {0, 0};
 190
 191         for (j = 0; j < 2; j++) {
 192             next[j] = c->node_buf[j] + frontier*(i & 1);
 193             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 194         }
 195
 196         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 197
 198         for (j = 0; j < frontier && nodes[0][j]; j++) {
 199             /* Only k >> 2 affects the future adaptive state, therefore testing
 200              * small steps that don't change k >> 2 is useless, the original
 201              * value from encode_low is better than them. Since we step k
 202              * in steps of 4, make sure range is a multiple of 4, so that
 203              * we don't miss the original value from encode_low. */
 204             int range = j < frontier/2 ? 4 : 0;
 205             struct TrellisNode *cur_node = nodes[0][j];
 206
 207             int ilow = encode_low(&cur_node->state, xlow);
 208
 209             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 210                 int decoded, dec_diff, pos;
 211                 uint32_t ssd;
 212                 struct TrellisNode* node;
 213
 214                 if (k < 0)
 215                     continue;
 216
 217                 decoded = av_clip_intp2((cur_node->state.scale_factor *
 218                                   ff_g722_low_inv_quant6[k] >> 10)
 219                                 + cur_node->state.s_predictor, 14);
 220                 dec_diff = xlow - decoded;
 221
 222 #define STORE_NODE(index, UPDATE, VALUE)\
 223                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 224                 /* Check for wraparound. Using 64 bit ssd counters would \
 225                  * be simpler, but is slower on x86 32 bit. */\
 226                 if (ssd < cur_node->ssd)\
 227                     continue;\
 228                 if (heap_pos[index] < frontier) {\
 229                     pos = heap_pos[index]++;\
 230                     av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
 231                     node = nodes_next[index][pos] = next[index]++;\
 232                     node->path = pathn[index]++;\
 233                 } else {\
 234                     /* Try to replace one of the leaf nodes with the new \
 235                      * one, but not always testing the same leaf position */\
 236                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 237                     if (ssd >= nodes_next[index][pos]->ssd)\
 238                         continue;\
 239                     heap_pos[index]++;\
 240                     node = nodes_next[index][pos];\
 241                 }\
 242                 node->ssd = ssd;\
 243                 node->state = cur_node->state;\
 244                 UPDATE;\
 245                 c->paths[index][node->path].value = VALUE;\
 246                 c->paths[index][node->path].prev = cur_node->path;\
 247                 /* Sift the newly inserted node up in the heap to restore \
 248                  * the heap property */\
 249                 while (pos > 0) {\
 250                     int parent = (pos - 1) >> 1;\
 251                     if (nodes_next[index][parent]->ssd <= ssd)\
 252                         break;\
 253                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 254                                                 nodes_next[index][pos]);\
 255                     pos = parent;\
 256                 }
 257                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 258             }
 259         }
 260
 261         for (j = 0; j < frontier && nodes[1][j]; j++) {
 262             int ihigh;
 263             struct TrellisNode *cur_node = nodes[1][j];
 264
 265             /* We don't try to get any initial guess for ihigh via
 266              * encode_high - since there's only 4 possible values, test
 267              * them all. Testing all of these gives a much, much larger
 268              * gain than testing a larger range around ilow. */
 269             for (ihigh = 0; ihigh < 4; ihigh++) {
 270                 int dhigh, decoded, dec_diff, pos;
 271                 uint32_t ssd;
 272                 struct TrellisNode* node;
 273
 274                 dhigh = cur_node->state.scale_factor *
 275                         ff_g722_high_inv_quant[ihigh] >> 10;
 276                 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
 277                 dec_diff = xhigh - decoded;
 278
 279                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 280             }
 281         }
 282
 283         for (j = 0; j < 2; j++) {
 284             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 285
 286             if (nodes[j][0]->ssd > (1 << 16)) {
 287                 for (k = 1; k < frontier && nodes[j][k]; k++)
 288                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 289                 nodes[j][0]->ssd = 0;
 290             }
 291         }
 292
 293         if (i == froze + FREEZE_INTERVAL) {
 294             p[0] = &c->paths[0][nodes[0][0]->path];
 295             p[1] = &c->paths[1][nodes[1][0]->path];
 296             for (j = i; j > froze; j--) {
 297                 dst[j] = p[1]->value << 6 | p[0]->value;
 298                 p[0] = &c->paths[0][p[0]->prev];
 299                 p[1] = &c->paths[1][p[1]->prev];
 300             }
 301             froze = i;
 302             pathn[0] = pathn[1] = 0;
 303             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 304             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 305         }
 306     }
 307
 308     p[0] = &c->paths[0][nodes[0][0]->path];
 309     p[1] = &c->paths[1][nodes[1][0]->path];
 310     for (j = i; j > froze; j--) {
 311         dst[j] = p[1]->value << 6 | p[0]->value;
 312         p[0] = &c->paths[0][p[0]->prev];
 313         p[1] = &c->paths[1][p[1]->prev];
 314     }
 315     c->band[0] = nodes[0][0]->state;
 316     c->band[1] = nodes[1][0]->state;
 317 }
 318
 319 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
 320                                          const int16_t *samples)
 321 {
 322     int xlow, xhigh, ilow, ihigh;
 323     filter_samples(c, samples, &xlow, &xhigh);
 324     ihigh = encode_high(&c->band[1], xhigh);
 325     ilow  = encode_low (&c->band[0], xlow);
 326     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 327                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 328     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 329     *dst = ihigh << 6 | ilow;
 330 }
 331
 332 static void g722_encode_no_trellis(G722Context *c,
 333                                    uint8_t *dst, int nb_samples,
 334                                    const int16_t *samples)
 335 {
 336     int i;
 337     for (i = 0; i < nb_samples; i += 2)
 338         encode_byte(c, dst++, &samples[i]);
 339 }
 340
 341 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 342                              const AVFrame *frame, int *got_packet_ptr)
 343 {
 344     G722Context *c = avctx->priv_data;
 345     const int16_t *samples = (const int16_t *)frame->data[0];
 346     int nb_samples, out_size, ret;
 347
 348     out_size = (frame->nb_samples + 1) / 2;
 349     if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
 350         return ret;
 351
 352     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
 353
 354     if (avctx->trellis)
 355         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
 356     else
 357         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
 358
 359     /* handle last frame with odd frame_size */
 360     if (nb_samples < frame->nb_samples) {
 361         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
 362         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
 363     }
 364
 365     if (frame->pts != AV_NOPTS_VALUE)
 366         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 367     *got_packet_ptr = 1;
 368     return 0;
 369 }
 370
 371 const AVCodec ff_adpcm_g722_encoder = {
 372     .name            = "g722",
 373     .long_name       = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 374     .type            = AVMEDIA_TYPE_AUDIO,
 375     .id              = AV_CODEC_ID_ADPCM_G722,
 376     .priv_data_size  = sizeof(G722Context),
 377     .init            = g722_encode_init,
 378     .close           = g722_encode_close,
 379     .encode2         = g722_encode_frame,
 380     .capabilities    = AV_CODEC_CAP_SMALL_LAST_FRAME,
 381     .sample_fmts     = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
 382     .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
 383     .caps_internal   = FF_CODEC_CAP_INIT_CLEANUP,
 384 };