git.sesse.net Git - ffmpeg/blob - libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "avcodec.h"
  31 #include "g722.h"
  32
  33 #define FREEZE_INTERVAL 128
  34
  35 /* This is an arbitrary value. Allowing insanely large values leads to strange
  36    problems, so we limit it to a reasonable value */
  37 #define MAX_FRAME_SIZE 32768
  38
  39 /* We clip the value of avctx->trellis to prevent data type overflows and
  40    undefined behavior. Using larger values is insanely slow anyway. */
  41 #define MIN_TRELLIS 0
  42 #define MAX_TRELLIS 16
  43
  44 static av_cold int g722_encode_close(AVCodecContext *avctx)
  45 {
  46     G722Context *c = avctx->priv_data;
  47     int i;
  48     for (i = 0; i < 2; i++) {
  49         av_freep(&c->paths[i]);
  50         av_freep(&c->node_buf[i]);
  51         av_freep(&c->nodep_buf[i]);
  52     }
  53     return 0;
  54 }
  55
  56 static av_cold int g722_encode_init(AVCodecContext * avctx)
  57 {
  58     G722Context *c = avctx->priv_data;
  59     int ret;
  60
  61     if (avctx->channels != 1) {
  62         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
  63         return AVERROR_INVALIDDATA;
  64     }
  65
  66     c->band[0].scale_factor = 8;
  67     c->band[1].scale_factor = 2;
  68     c->prev_samples_pos = 22;
  69
  70     if (avctx->trellis) {
  71         int frontier = 1 << avctx->trellis;
  72         int max_paths = frontier * FREEZE_INTERVAL;
  73         int i;
  74         for (i = 0; i < 2; i++) {
  75             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
  76             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
  77             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
  78             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
  79                 ret = AVERROR(ENOMEM);
  80                 goto error;
  81             }
  82         }
  83     }
  84
  85     if (avctx->frame_size) {
  86         /* validate frame size */
  87         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
  88             int new_frame_size;
  89
  90             if (avctx->frame_size == 1)
  91                 new_frame_size = 2;
  92             else if (avctx->frame_size > MAX_FRAME_SIZE)
  93                 new_frame_size = MAX_FRAME_SIZE;
  94             else
  95                 new_frame_size = avctx->frame_size - 1;
  96
  97             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
  98                    "allowed. Using %d instead of %d\n", new_frame_size,
  99                    avctx->frame_size);
 100             avctx->frame_size = new_frame_size;
 101         }
 102     } else {
 103         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
 104            a common packet size for VoIP applications */
 105         avctx->frame_size = 320;
 106     }
 107
 108     if (avctx->trellis) {
 109         /* validate trellis */
 110         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
 111             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
 112             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
 113                    "allowed. Using %d instead of %d\n", new_trellis,
 114                    avctx->trellis);
 115             avctx->trellis = new_trellis;
 116         }
 117     }
 118
 119     return 0;
 120 error:
 121     g722_encode_close(avctx);
 122     return ret;
 123 }
 124
 125 static const int16_t low_quant[33] = {
 126       35,   72,  110,  150,  190,  233,  276,  323,
 127      370,  422,  473,  530,  587,  650,  714,  786,
 128      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
 129     1765, 1980, 2195, 2557, 2919
 130 };
 131
 132 static inline void filter_samples(G722Context *c, const int16_t *samples,
 133                                   int *xlow, int *xhigh)
 134 {
 135     int xout1, xout2;
 136     c->prev_samples[c->prev_samples_pos++] = samples[0];
 137     c->prev_samples[c->prev_samples_pos++] = samples[1];
 138     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
 139     *xlow  = xout1 + xout2 >> 13;
 140     *xhigh = xout1 - xout2 >> 13;
 141     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
 142         memmove(c->prev_samples,
 143                 c->prev_samples + c->prev_samples_pos - 22,
 144                 22 * sizeof(c->prev_samples[0]));
 145         c->prev_samples_pos = 22;
 146     }
 147 }
 148
 149 static inline int encode_high(const struct G722Band *state, int xhigh)
 150 {
 151     int diff = av_clip_int16(xhigh - state->s_predictor);
 152     int pred = 141 * state->scale_factor >> 8;
 153            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 154     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 155 }
 156
 157 static inline int encode_low(const struct G722Band* state, int xlow)
 158 {
 159     int diff  = av_clip_int16(xlow - state->s_predictor);
 160            /* = diff >= 0 ? diff : -(diff + 1) */
 161     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 162     int i = 0;
 163     limit = limit + 1 << 10;
 164     if (limit > low_quant[8] * state->scale_factor)
 165         i = 9;
 166     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 167         i++;
 168     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 169 }
 170
 171 static void g722_encode_trellis(G722Context *c, int trellis,
 172                                 uint8_t *dst, int nb_samples,
 173                                 const int16_t *samples)
 174 {
 175     int i, j, k;
 176     int frontier = 1 << trellis;
 177     struct TrellisNode **nodes[2];
 178     struct TrellisNode **nodes_next[2];
 179     int pathn[2] = {0, 0}, froze = -1;
 180     struct TrellisPath *p[2];
 181
 182     for (i = 0; i < 2; i++) {
 183         nodes[i] = c->nodep_buf[i];
 184         nodes_next[i] = c->nodep_buf[i] + frontier;
 185         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
 186         nodes[i][0] = c->node_buf[i] + frontier;
 187         nodes[i][0]->ssd = 0;
 188         nodes[i][0]->path = 0;
 189         nodes[i][0]->state = c->band[i];
 190     }
 191
 192     for (i = 0; i < nb_samples >> 1; i++) {
 193         int xlow, xhigh;
 194         struct TrellisNode *next[2];
 195         int heap_pos[2] = {0, 0};
 196
 197         for (j = 0; j < 2; j++) {
 198             next[j] = c->node_buf[j] + frontier*(i & 1);
 199             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 200         }
 201
 202         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 203
 204         for (j = 0; j < frontier && nodes[0][j]; j++) {
 205             /* Only k >> 2 affects the future adaptive state, therefore testing
 206              * small steps that don't change k >> 2 is useless, the original
 207              * value from encode_low is better than them. Since we step k
 208              * in steps of 4, make sure range is a multiple of 4, so that
 209              * we don't miss the original value from encode_low. */
 210             int range = j < frontier/2 ? 4 : 0;
 211             struct TrellisNode *cur_node = nodes[0][j];
 212
 213             int ilow = encode_low(&cur_node->state, xlow);
 214
 215             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 216                 int decoded, dec_diff, pos;
 217                 uint32_t ssd;
 218                 struct TrellisNode* node;
 219
 220                 if (k < 0)
 221                     continue;
 222
 223                 decoded = av_clip((cur_node->state.scale_factor *
 224                                   ff_g722_low_inv_quant6[k] >> 10)
 225                                 + cur_node->state.s_predictor, -16384, 16383);
 226                 dec_diff = xlow - decoded;
 227
 228 #define STORE_NODE(index, UPDATE, VALUE)\
 229                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 230                 /* Check for wraparound. Using 64 bit ssd counters would \
 231                  * be simpler, but is slower on x86 32 bit. */\
 232                 if (ssd < cur_node->ssd)\
 233                     continue;\
 234                 if (heap_pos[index] < frontier) {\
 235                     pos = heap_pos[index]++;\
 236                     assert(pathn[index] < FREEZE_INTERVAL * frontier);\
 237                     node = nodes_next[index][pos] = next[index]++;\
 238                     node->path = pathn[index]++;\
 239                 } else {\
 240                     /* Try to replace one of the leaf nodes with the new \
 241                      * one, but not always testing the same leaf position */\
 242                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 243                     if (ssd >= nodes_next[index][pos]->ssd)\
 244                         continue;\
 245                     heap_pos[index]++;\
 246                     node = nodes_next[index][pos];\
 247                 }\
 248                 node->ssd = ssd;\
 249                 node->state = cur_node->state;\
 250                 UPDATE;\
 251                 c->paths[index][node->path].value = VALUE;\
 252                 c->paths[index][node->path].prev = cur_node->path;\
 253                 /* Sift the newly inserted node up in the heap to restore \
 254                  * the heap property */\
 255                 while (pos > 0) {\
 256                     int parent = (pos - 1) >> 1;\
 257                     if (nodes_next[index][parent]->ssd <= ssd)\
 258                         break;\
 259                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 260                                                 nodes_next[index][pos]);\
 261                     pos = parent;\
 262                 }
 263                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 264             }
 265         }
 266
 267         for (j = 0; j < frontier && nodes[1][j]; j++) {
 268             int ihigh;
 269             struct TrellisNode *cur_node = nodes[1][j];
 270
 271             /* We don't try to get any initial guess for ihigh via
 272              * encode_high - since there's only 4 possible values, test
 273              * them all. Testing all of these gives a much, much larger
 274              * gain than testing a larger range around ilow. */
 275             for (ihigh = 0; ihigh < 4; ihigh++) {
 276                 int dhigh, decoded, dec_diff, pos;
 277                 uint32_t ssd;
 278                 struct TrellisNode* node;
 279
 280                 dhigh = cur_node->state.scale_factor *
 281                         ff_g722_high_inv_quant[ihigh] >> 10;
 282                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
 283                                   -16384, 16383);
 284                 dec_diff = xhigh - decoded;
 285
 286                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 287             }
 288         }
 289
 290         for (j = 0; j < 2; j++) {
 291             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 292
 293             if (nodes[j][0]->ssd > (1 << 16)) {
 294                 for (k = 1; k < frontier && nodes[j][k]; k++)
 295                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 296                 nodes[j][0]->ssd = 0;
 297             }
 298         }
 299
 300         if (i == froze + FREEZE_INTERVAL) {
 301             p[0] = &c->paths[0][nodes[0][0]->path];
 302             p[1] = &c->paths[1][nodes[1][0]->path];
 303             for (j = i; j > froze; j--) {
 304                 dst[j] = p[1]->value << 6 | p[0]->value;
 305                 p[0] = &c->paths[0][p[0]->prev];
 306                 p[1] = &c->paths[1][p[1]->prev];
 307             }
 308             froze = i;
 309             pathn[0] = pathn[1] = 0;
 310             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 311             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 312         }
 313     }
 314
 315     p[0] = &c->paths[0][nodes[0][0]->path];
 316     p[1] = &c->paths[1][nodes[1][0]->path];
 317     for (j = i; j > froze; j--) {
 318         dst[j] = p[1]->value << 6 | p[0]->value;
 319         p[0] = &c->paths[0][p[0]->prev];
 320         p[1] = &c->paths[1][p[1]->prev];
 321     }
 322     c->band[0] = nodes[0][0]->state;
 323     c->band[1] = nodes[1][0]->state;
 324 }
 325
 326 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
 327                                          const int16_t *samples)
 328 {
 329     int xlow, xhigh, ilow, ihigh;
 330     filter_samples(c, samples, &xlow, &xhigh);
 331     ihigh = encode_high(&c->band[1], xhigh);
 332     ilow  = encode_low (&c->band[0], xlow);
 333     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 334                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 335     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 336     *dst = ihigh << 6 | ilow;
 337 }
 338
 339 static void g722_encode_no_trellis(G722Context *c,
 340                                    uint8_t *dst, int nb_samples,
 341                                    const int16_t *samples)
 342 {
 343     int i;
 344     for (i = 0; i < nb_samples; i += 2)
 345         encode_byte(c, dst++, &samples[i]);
 346 }
 347
 348 static int g722_encode_frame(AVCodecContext *avctx,
 349                              uint8_t *dst, int buf_size, void *data)
 350 {
 351     G722Context *c = avctx->priv_data;
 352     const int16_t *samples = data;
 353     int nb_samples;
 354
 355     nb_samples = avctx->frame_size - (avctx->frame_size & 1);
 356
 357     if (avctx->trellis)
 358         g722_encode_trellis(c, avctx->trellis, dst, nb_samples, samples);
 359     else
 360         g722_encode_no_trellis(c, dst, nb_samples, samples);
 361
 362     /* handle last frame with odd frame_size */
 363     if (nb_samples < avctx->frame_size) {
 364         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
 365         encode_byte(c, &dst[nb_samples >> 1], last_samples);
 366     }
 367
 368     return (avctx->frame_size + 1) >> 1;
 369 }
 370
 371 AVCodec ff_adpcm_g722_encoder = {
 372     .name           = "g722",
 373     .type           = AVMEDIA_TYPE_AUDIO,
 374     .id             = CODEC_ID_ADPCM_G722,
 375     .priv_data_size = sizeof(G722Context),
 376     .init           = g722_encode_init,
 377     .close          = g722_encode_close,
 378     .encode         = g722_encode_frame,
 379     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
 380     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 381     .sample_fmts    = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
 382 };