git.sesse.net Git - ffmpeg/blob - libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "avcodec.h"
  31 #include "internal.h"
  32 #include "g722.h"
  33
  34 #define FREEZE_INTERVAL 128
  35
  36 /* This is an arbitrary value. Allowing insanely large values leads to strange
  37    problems, so we limit it to a reasonable value */
  38 #define MAX_FRAME_SIZE 32768
  39
  40 /* We clip the value of avctx->trellis to prevent data type overflows and
  41    undefined behavior. Using larger values is insanely slow anyway. */
  42 #define MIN_TRELLIS 0
  43 #define MAX_TRELLIS 16
  44
  45 static av_cold int g722_encode_close(AVCodecContext *avctx)
  46 {
  47     G722Context *c = avctx->priv_data;
  48     int i;
  49     for (i = 0; i < 2; i++) {
  50         av_freep(&c->paths[i]);
  51         av_freep(&c->node_buf[i]);
  52         av_freep(&c->nodep_buf[i]);
  53     }
  54 #if FF_API_OLD_ENCODE_AUDIO
  55     av_freep(&avctx->coded_frame);
  56 #endif
  57     return 0;
  58 }
  59
  60 static av_cold int g722_encode_init(AVCodecContext * avctx)
  61 {
  62     G722Context *c = avctx->priv_data;
  63     int ret;
  64
  65     if (avctx->channels != 1) {
  66         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
  67         return AVERROR_INVALIDDATA;
  68     }
  69
  70     c->band[0].scale_factor = 8;
  71     c->band[1].scale_factor = 2;
  72     c->prev_samples_pos = 22;
  73
  74     if (avctx->trellis) {
  75         int frontier = 1 << avctx->trellis;
  76         int max_paths = frontier * FREEZE_INTERVAL;
  77         int i;
  78         for (i = 0; i < 2; i++) {
  79             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
  80             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
  81             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
  82             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
  83                 ret = AVERROR(ENOMEM);
  84                 goto error;
  85             }
  86         }
  87     }
  88
  89     if (avctx->frame_size) {
  90         /* validate frame size */
  91         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
  92             int new_frame_size;
  93
  94             if (avctx->frame_size == 1)
  95                 new_frame_size = 2;
  96             else if (avctx->frame_size > MAX_FRAME_SIZE)
  97                 new_frame_size = MAX_FRAME_SIZE;
  98             else
  99                 new_frame_size = avctx->frame_size - 1;
 100
 101             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
 102                    "allowed. Using %d instead of %d\n", new_frame_size,
 103                    avctx->frame_size);
 104             avctx->frame_size = new_frame_size;
 105         }
 106     } else {
 107         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
 108            a common packet size for VoIP applications */
 109         avctx->frame_size = 320;
 110     }
 111     avctx->delay = 22;
 112
 113     if (avctx->trellis) {
 114         /* validate trellis */
 115         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
 116             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
 117             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
 118                    "allowed. Using %d instead of %d\n", new_trellis,
 119                    avctx->trellis);
 120             avctx->trellis = new_trellis;
 121         }
 122     }
 123
 124 #if FF_API_OLD_ENCODE_AUDIO
 125     avctx->coded_frame = avcodec_alloc_frame();
 126     if (!avctx->coded_frame) {
 127         ret = AVERROR(ENOMEM);
 128         goto error;
 129     }
 130 #endif
 131
 132     return 0;
 133 error:
 134     g722_encode_close(avctx);
 135     return ret;
 136 }
 137
 138 static const int16_t low_quant[33] = {
 139       35,   72,  110,  150,  190,  233,  276,  323,
 140      370,  422,  473,  530,  587,  650,  714,  786,
 141      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
 142     1765, 1980, 2195, 2557, 2919
 143 };
 144
 145 static inline void filter_samples(G722Context *c, const int16_t *samples,
 146                                   int *xlow, int *xhigh)
 147 {
 148     int xout1, xout2;
 149     c->prev_samples[c->prev_samples_pos++] = samples[0];
 150     c->prev_samples[c->prev_samples_pos++] = samples[1];
 151     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
 152     *xlow  = xout1 + xout2 >> 14;
 153     *xhigh = xout1 - xout2 >> 14;
 154     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
 155         memmove(c->prev_samples,
 156                 c->prev_samples + c->prev_samples_pos - 22,
 157                 22 * sizeof(c->prev_samples[0]));
 158         c->prev_samples_pos = 22;
 159     }
 160 }
 161
 162 static inline int encode_high(const struct G722Band *state, int xhigh)
 163 {
 164     int diff = av_clip_int16(xhigh - state->s_predictor);
 165     int pred = 141 * state->scale_factor >> 8;
 166            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 167     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 168 }
 169
 170 static inline int encode_low(const struct G722Band* state, int xlow)
 171 {
 172     int diff  = av_clip_int16(xlow - state->s_predictor);
 173            /* = diff >= 0 ? diff : -(diff + 1) */
 174     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 175     int i = 0;
 176     limit = limit + 1 << 10;
 177     if (limit > low_quant[8] * state->scale_factor)
 178         i = 9;
 179     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 180         i++;
 181     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 182 }
 183
 184 static void g722_encode_trellis(G722Context *c, int trellis,
 185                                 uint8_t *dst, int nb_samples,
 186                                 const int16_t *samples)
 187 {
 188     int i, j, k;
 189     int frontier = 1 << trellis;
 190     struct TrellisNode **nodes[2];
 191     struct TrellisNode **nodes_next[2];
 192     int pathn[2] = {0, 0}, froze = -1;
 193     struct TrellisPath *p[2];
 194
 195     for (i = 0; i < 2; i++) {
 196         nodes[i] = c->nodep_buf[i];
 197         nodes_next[i] = c->nodep_buf[i] + frontier;
 198         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
 199         nodes[i][0] = c->node_buf[i] + frontier;
 200         nodes[i][0]->ssd = 0;
 201         nodes[i][0]->path = 0;
 202         nodes[i][0]->state = c->band[i];
 203     }
 204
 205     for (i = 0; i < nb_samples >> 1; i++) {
 206         int xlow, xhigh;
 207         struct TrellisNode *next[2];
 208         int heap_pos[2] = {0, 0};
 209
 210         for (j = 0; j < 2; j++) {
 211             next[j] = c->node_buf[j] + frontier*(i & 1);
 212             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 213         }
 214
 215         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 216
 217         for (j = 0; j < frontier && nodes[0][j]; j++) {
 218             /* Only k >> 2 affects the future adaptive state, therefore testing
 219              * small steps that don't change k >> 2 is useless, the original
 220              * value from encode_low is better than them. Since we step k
 221              * in steps of 4, make sure range is a multiple of 4, so that
 222              * we don't miss the original value from encode_low. */
 223             int range = j < frontier/2 ? 4 : 0;
 224             struct TrellisNode *cur_node = nodes[0][j];
 225
 226             int ilow = encode_low(&cur_node->state, xlow);
 227
 228             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 229                 int decoded, dec_diff, pos;
 230                 uint32_t ssd;
 231                 struct TrellisNode* node;
 232
 233                 if (k < 0)
 234                     continue;
 235
 236                 decoded = av_clip((cur_node->state.scale_factor *
 237                                   ff_g722_low_inv_quant6[k] >> 10)
 238                                 + cur_node->state.s_predictor, -16384, 16383);
 239                 dec_diff = xlow - decoded;
 240
 241 #define STORE_NODE(index, UPDATE, VALUE)\
 242                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 243                 /* Check for wraparound. Using 64 bit ssd counters would \
 244                  * be simpler, but is slower on x86 32 bit. */\
 245                 if (ssd < cur_node->ssd)\
 246                     continue;\
 247                 if (heap_pos[index] < frontier) {\
 248                     pos = heap_pos[index]++;\
 249                     assert(pathn[index] < FREEZE_INTERVAL * frontier);\
 250                     node = nodes_next[index][pos] = next[index]++;\
 251                     node->path = pathn[index]++;\
 252                 } else {\
 253                     /* Try to replace one of the leaf nodes with the new \
 254                      * one, but not always testing the same leaf position */\
 255                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 256                     if (ssd >= nodes_next[index][pos]->ssd)\
 257                         continue;\
 258                     heap_pos[index]++;\
 259                     node = nodes_next[index][pos];\
 260                 }\
 261                 node->ssd = ssd;\
 262                 node->state = cur_node->state;\
 263                 UPDATE;\
 264                 c->paths[index][node->path].value = VALUE;\
 265                 c->paths[index][node->path].prev = cur_node->path;\
 266                 /* Sift the newly inserted node up in the heap to restore \
 267                  * the heap property */\
 268                 while (pos > 0) {\
 269                     int parent = (pos - 1) >> 1;\
 270                     if (nodes_next[index][parent]->ssd <= ssd)\
 271                         break;\
 272                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 273                                                 nodes_next[index][pos]);\
 274                     pos = parent;\
 275                 }
 276                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 277             }
 278         }
 279
 280         for (j = 0; j < frontier && nodes[1][j]; j++) {
 281             int ihigh;
 282             struct TrellisNode *cur_node = nodes[1][j];
 283
 284             /* We don't try to get any initial guess for ihigh via
 285              * encode_high - since there's only 4 possible values, test
 286              * them all. Testing all of these gives a much, much larger
 287              * gain than testing a larger range around ilow. */
 288             for (ihigh = 0; ihigh < 4; ihigh++) {
 289                 int dhigh, decoded, dec_diff, pos;
 290                 uint32_t ssd;
 291                 struct TrellisNode* node;
 292
 293                 dhigh = cur_node->state.scale_factor *
 294                         ff_g722_high_inv_quant[ihigh] >> 10;
 295                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
 296                                   -16384, 16383);
 297                 dec_diff = xhigh - decoded;
 298
 299                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 300             }
 301         }
 302
 303         for (j = 0; j < 2; j++) {
 304             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 305
 306             if (nodes[j][0]->ssd > (1 << 16)) {
 307                 for (k = 1; k < frontier && nodes[j][k]; k++)
 308                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 309                 nodes[j][0]->ssd = 0;
 310             }
 311         }
 312
 313         if (i == froze + FREEZE_INTERVAL) {
 314             p[0] = &c->paths[0][nodes[0][0]->path];
 315             p[1] = &c->paths[1][nodes[1][0]->path];
 316             for (j = i; j > froze; j--) {
 317                 dst[j] = p[1]->value << 6 | p[0]->value;
 318                 p[0] = &c->paths[0][p[0]->prev];
 319                 p[1] = &c->paths[1][p[1]->prev];
 320             }
 321             froze = i;
 322             pathn[0] = pathn[1] = 0;
 323             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 324             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 325         }
 326     }
 327
 328     p[0] = &c->paths[0][nodes[0][0]->path];
 329     p[1] = &c->paths[1][nodes[1][0]->path];
 330     for (j = i; j > froze; j--) {
 331         dst[j] = p[1]->value << 6 | p[0]->value;
 332         p[0] = &c->paths[0][p[0]->prev];
 333         p[1] = &c->paths[1][p[1]->prev];
 334     }
 335     c->band[0] = nodes[0][0]->state;
 336     c->band[1] = nodes[1][0]->state;
 337 }
 338
 339 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
 340                                          const int16_t *samples)
 341 {
 342     int xlow, xhigh, ilow, ihigh;
 343     filter_samples(c, samples, &xlow, &xhigh);
 344     ihigh = encode_high(&c->band[1], xhigh);
 345     ilow  = encode_low (&c->band[0], xlow);
 346     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 347                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 348     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 349     *dst = ihigh << 6 | ilow;
 350 }
 351
 352 static void g722_encode_no_trellis(G722Context *c,
 353                                    uint8_t *dst, int nb_samples,
 354                                    const int16_t *samples)
 355 {
 356     int i;
 357     for (i = 0; i < nb_samples; i += 2)
 358         encode_byte(c, dst++, &samples[i]);
 359 }
 360
 361 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 362                              const AVFrame *frame, int *got_packet_ptr)
 363 {
 364     G722Context *c = avctx->priv_data;
 365     const int16_t *samples = (const int16_t *)frame->data[0];
 366     int nb_samples, out_size, ret;
 367
 368     out_size = (frame->nb_samples + 1) / 2;
 369     if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)))
 370         return ret;
 371
 372     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
 373
 374     if (avctx->trellis)
 375         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
 376     else
 377         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
 378
 379     /* handle last frame with odd frame_size */
 380     if (nb_samples < frame->nb_samples) {
 381         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
 382         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
 383     }
 384
 385     if (frame->pts != AV_NOPTS_VALUE)
 386         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
 387     *got_packet_ptr = 1;
 388     return 0;
 389 }
 390
 391 AVCodec ff_adpcm_g722_encoder = {
 392     .name           = "g722",
 393     .type           = AVMEDIA_TYPE_AUDIO,
 394     .id             = CODEC_ID_ADPCM_G722,
 395     .priv_data_size = sizeof(G722Context),
 396     .init           = g722_encode_init,
 397     .close          = g722_encode_close,
 398     .encode2        = g722_encode_frame,
 399     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
 400     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 401     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
 402                                                      AV_SAMPLE_FMT_NONE },
 403 };