git.sesse.net Git - ffmpeg/blob - libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "avcodec.h"
  31 #include "g722.h"
  32
  33 #define FREEZE_INTERVAL 128
  34
  35 /* This is an arbitrary value. Allowing insanely large values leads to strange
  36    problems, so we limit it to a reasonable value */
  37 #define MAX_FRAME_SIZE 32768
  38
  39 /* We clip the value of avctx->trellis to prevent data type overflows and
  40    undefined behavior. Using larger values is insanely slow anyway. */
  41 #define MIN_TRELLIS 0
  42 #define MAX_TRELLIS 16
  43
  44 static av_cold int g722_encode_init(AVCodecContext * avctx)
  45 {
  46     G722Context *c = avctx->priv_data;
  47
  48     if (avctx->channels != 1) {
  49         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
  50         return AVERROR_INVALIDDATA;
  51     }
  52
  53     c->band[0].scale_factor = 8;
  54     c->band[1].scale_factor = 2;
  55     c->prev_samples_pos = 22;
  56
  57     if (avctx->trellis) {
  58         int frontier = 1 << avctx->trellis;
  59         int max_paths = frontier * FREEZE_INTERVAL;
  60         int i;
  61         for (i = 0; i < 2; i++) {
  62             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
  63             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
  64             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
  65         }
  66     }
  67
  68     if (avctx->frame_size) {
  69         /* validate frame size */
  70         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
  71             int new_frame_size;
  72
  73             if (avctx->frame_size == 1)
  74                 new_frame_size = 2;
  75             else if (avctx->frame_size > MAX_FRAME_SIZE)
  76                 new_frame_size = MAX_FRAME_SIZE;
  77             else
  78                 new_frame_size = avctx->frame_size - 1;
  79
  80             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
  81                    "allowed. Using %d instead of %d\n", new_frame_size,
  82                    avctx->frame_size);
  83             avctx->frame_size = new_frame_size;
  84         }
  85     } else {
  86         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
  87            a common packet size for VoIP applications */
  88         avctx->frame_size = 320;
  89     }
  90
  91     if (avctx->trellis) {
  92         /* validate trellis */
  93         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
  94             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
  95             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
  96                    "allowed. Using %d instead of %d\n", new_trellis,
  97                    avctx->trellis);
  98             avctx->trellis = new_trellis;
  99         }
 100     }
 101
 102     return 0;
 103 }
 104
 105 static av_cold int g722_encode_close(AVCodecContext *avctx)
 106 {
 107     G722Context *c = avctx->priv_data;
 108     int i;
 109     for (i = 0; i < 2; i++) {
 110         av_freep(&c->paths[i]);
 111         av_freep(&c->node_buf[i]);
 112         av_freep(&c->nodep_buf[i]);
 113     }
 114     return 0;
 115 }
 116
 117 static const int16_t low_quant[33] = {
 118       35,   72,  110,  150,  190,  233,  276,  323,
 119      370,  422,  473,  530,  587,  650,  714,  786,
 120      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
 121     1765, 1980, 2195, 2557, 2919
 122 };
 123
 124 static inline void filter_samples(G722Context *c, const int16_t *samples,
 125                                   int *xlow, int *xhigh)
 126 {
 127     int xout1, xout2;
 128     c->prev_samples[c->prev_samples_pos++] = samples[0];
 129     c->prev_samples[c->prev_samples_pos++] = samples[1];
 130     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
 131     *xlow  = xout1 + xout2 >> 13;
 132     *xhigh = xout1 - xout2 >> 13;
 133     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
 134         memmove(c->prev_samples,
 135                 c->prev_samples + c->prev_samples_pos - 22,
 136                 22 * sizeof(c->prev_samples[0]));
 137         c->prev_samples_pos = 22;
 138     }
 139 }
 140
 141 static inline int encode_high(const struct G722Band *state, int xhigh)
 142 {
 143     int diff = av_clip_int16(xhigh - state->s_predictor);
 144     int pred = 141 * state->scale_factor >> 8;
 145            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 146     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 147 }
 148
 149 static inline int encode_low(const struct G722Band* state, int xlow)
 150 {
 151     int diff  = av_clip_int16(xlow - state->s_predictor);
 152            /* = diff >= 0 ? diff : -(diff + 1) */
 153     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 154     int i = 0;
 155     limit = limit + 1 << 10;
 156     if (limit > low_quant[8] * state->scale_factor)
 157         i = 9;
 158     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 159         i++;
 160     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 161 }
 162
 163 static void g722_encode_trellis(G722Context *c, int trellis,
 164                                 uint8_t *dst, int nb_samples,
 165                                 const int16_t *samples)
 166 {
 167     int i, j, k;
 168     int frontier = 1 << trellis;
 169     struct TrellisNode **nodes[2];
 170     struct TrellisNode **nodes_next[2];
 171     int pathn[2] = {0, 0}, froze = -1;
 172     struct TrellisPath *p[2];
 173
 174     for (i = 0; i < 2; i++) {
 175         nodes[i] = c->nodep_buf[i];
 176         nodes_next[i] = c->nodep_buf[i] + frontier;
 177         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
 178         nodes[i][0] = c->node_buf[i] + frontier;
 179         nodes[i][0]->ssd = 0;
 180         nodes[i][0]->path = 0;
 181         nodes[i][0]->state = c->band[i];
 182     }
 183
 184     for (i = 0; i < nb_samples >> 1; i++) {
 185         int xlow, xhigh;
 186         struct TrellisNode *next[2];
 187         int heap_pos[2] = {0, 0};
 188
 189         for (j = 0; j < 2; j++) {
 190             next[j] = c->node_buf[j] + frontier*(i & 1);
 191             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 192         }
 193
 194         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 195
 196         for (j = 0; j < frontier && nodes[0][j]; j++) {
 197             /* Only k >> 2 affects the future adaptive state, therefore testing
 198              * small steps that don't change k >> 2 is useless, the original
 199              * value from encode_low is better than them. Since we step k
 200              * in steps of 4, make sure range is a multiple of 4, so that
 201              * we don't miss the original value from encode_low. */
 202             int range = j < frontier/2 ? 4 : 0;
 203             struct TrellisNode *cur_node = nodes[0][j];
 204
 205             int ilow = encode_low(&cur_node->state, xlow);
 206
 207             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 208                 int decoded, dec_diff, pos;
 209                 uint32_t ssd;
 210                 struct TrellisNode* node;
 211
 212                 if (k < 0)
 213                     continue;
 214
 215                 decoded = av_clip((cur_node->state.scale_factor *
 216                                   ff_g722_low_inv_quant6[k] >> 10)
 217                                 + cur_node->state.s_predictor, -16384, 16383);
 218                 dec_diff = xlow - decoded;
 219
 220 #define STORE_NODE(index, UPDATE, VALUE)\
 221                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 222                 /* Check for wraparound. Using 64 bit ssd counters would \
 223                  * be simpler, but is slower on x86 32 bit. */\
 224                 if (ssd < cur_node->ssd)\
 225                     continue;\
 226                 if (heap_pos[index] < frontier) {\
 227                     pos = heap_pos[index]++;\
 228                     assert(pathn[index] < FREEZE_INTERVAL * frontier);\
 229                     node = nodes_next[index][pos] = next[index]++;\
 230                     node->path = pathn[index]++;\
 231                 } else {\
 232                     /* Try to replace one of the leaf nodes with the new \
 233                      * one, but not always testing the same leaf position */\
 234                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 235                     if (ssd >= nodes_next[index][pos]->ssd)\
 236                         continue;\
 237                     heap_pos[index]++;\
 238                     node = nodes_next[index][pos];\
 239                 }\
 240                 node->ssd = ssd;\
 241                 node->state = cur_node->state;\
 242                 UPDATE;\
 243                 c->paths[index][node->path].value = VALUE;\
 244                 c->paths[index][node->path].prev = cur_node->path;\
 245                 /* Sift the newly inserted node up in the heap to restore \
 246                  * the heap property */\
 247                 while (pos > 0) {\
 248                     int parent = (pos - 1) >> 1;\
 249                     if (nodes_next[index][parent]->ssd <= ssd)\
 250                         break;\
 251                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 252                                                 nodes_next[index][pos]);\
 253                     pos = parent;\
 254                 }
 255                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 256             }
 257         }
 258
 259         for (j = 0; j < frontier && nodes[1][j]; j++) {
 260             int ihigh;
 261             struct TrellisNode *cur_node = nodes[1][j];
 262
 263             /* We don't try to get any initial guess for ihigh via
 264              * encode_high - since there's only 4 possible values, test
 265              * them all. Testing all of these gives a much, much larger
 266              * gain than testing a larger range around ilow. */
 267             for (ihigh = 0; ihigh < 4; ihigh++) {
 268                 int dhigh, decoded, dec_diff, pos;
 269                 uint32_t ssd;
 270                 struct TrellisNode* node;
 271
 272                 dhigh = cur_node->state.scale_factor *
 273                         ff_g722_high_inv_quant[ihigh] >> 10;
 274                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
 275                                   -16384, 16383);
 276                 dec_diff = xhigh - decoded;
 277
 278                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 279             }
 280         }
 281
 282         for (j = 0; j < 2; j++) {
 283             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 284
 285             if (nodes[j][0]->ssd > (1 << 16)) {
 286                 for (k = 1; k < frontier && nodes[j][k]; k++)
 287                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 288                 nodes[j][0]->ssd = 0;
 289             }
 290         }
 291
 292         if (i == froze + FREEZE_INTERVAL) {
 293             p[0] = &c->paths[0][nodes[0][0]->path];
 294             p[1] = &c->paths[1][nodes[1][0]->path];
 295             for (j = i; j > froze; j--) {
 296                 dst[j] = p[1]->value << 6 | p[0]->value;
 297                 p[0] = &c->paths[0][p[0]->prev];
 298                 p[1] = &c->paths[1][p[1]->prev];
 299             }
 300             froze = i;
 301             pathn[0] = pathn[1] = 0;
 302             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 303             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 304         }
 305     }
 306
 307     p[0] = &c->paths[0][nodes[0][0]->path];
 308     p[1] = &c->paths[1][nodes[1][0]->path];
 309     for (j = i; j > froze; j--) {
 310         dst[j] = p[1]->value << 6 | p[0]->value;
 311         p[0] = &c->paths[0][p[0]->prev];
 312         p[1] = &c->paths[1][p[1]->prev];
 313     }
 314     c->band[0] = nodes[0][0]->state;
 315     c->band[1] = nodes[1][0]->state;
 316 }
 317
 318 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
 319                                          const int16_t *samples)
 320 {
 321     int xlow, xhigh, ilow, ihigh;
 322     filter_samples(c, samples, &xlow, &xhigh);
 323     ihigh = encode_high(&c->band[1], xhigh);
 324     ilow  = encode_low (&c->band[0], xlow);
 325     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 326                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 327     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 328     *dst = ihigh << 6 | ilow;
 329 }
 330
 331 static void g722_encode_no_trellis(G722Context *c,
 332                                    uint8_t *dst, int nb_samples,
 333                                    const int16_t *samples)
 334 {
 335     int i;
 336     for (i = 0; i < nb_samples; i += 2)
 337         encode_byte(c, dst++, &samples[i]);
 338 }
 339
 340 static int g722_encode_frame(AVCodecContext *avctx,
 341                              uint8_t *dst, int buf_size, void *data)
 342 {
 343     G722Context *c = avctx->priv_data;
 344     const int16_t *samples = data;
 345     int nb_samples;
 346
 347     nb_samples = avctx->frame_size - (avctx->frame_size & 1);
 348
 349     if (avctx->trellis)
 350         g722_encode_trellis(c, avctx->trellis, dst, nb_samples, samples);
 351     else
 352         g722_encode_no_trellis(c, dst, nb_samples, samples);
 353
 354     /* handle last frame with odd frame_size */
 355     if (nb_samples < avctx->frame_size) {
 356         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
 357         encode_byte(c, &dst[nb_samples >> 1], last_samples);
 358     }
 359
 360     return (avctx->frame_size + 1) >> 1;
 361 }
 362
 363 AVCodec ff_adpcm_g722_encoder = {
 364     .name           = "g722",
 365     .type           = AVMEDIA_TYPE_AUDIO,
 366     .id             = CODEC_ID_ADPCM_G722,
 367     .priv_data_size = sizeof(G722Context),
 368     .init           = g722_encode_init,
 369     .close          = g722_encode_close,
 370     .encode         = g722_encode_frame,
 371     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
 372     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 373     .sample_fmts    = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
 374 };