git.sesse.net Git - ffmpeg/blob - libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "libavutil/avassert.h"
  31 #include "avcodec.h"
  32 #include "internal.h"
  33 #include "g722.h"
  34 #include "libavutil/common.h"
  35
  36 #define FREEZE_INTERVAL 128
  37
  38 /* This is an arbitrary value. Allowing insanely large values leads to strange
  39    problems, so we limit it to a reasonable value */
  40 #define MAX_FRAME_SIZE 32768
  41
  42 /* We clip the value of avctx->trellis to prevent data type overflows and
  43    undefined behavior. Using larger values is insanely slow anyway. */
  44 #define MIN_TRELLIS 0
  45 #define MAX_TRELLIS 16
  46
  47 static av_cold int g722_encode_close(AVCodecContext *avctx)
  48 {
  49     G722Context *c = avctx->priv_data;
  50     int i;
  51     for (i = 0; i < 2; i++) {
  52         av_freep(&c->paths[i]);
  53         av_freep(&c->node_buf[i]);
  54         av_freep(&c->nodep_buf[i]);
  55     }
  56     return 0;
  57 }
  58
  59 static av_cold int g722_encode_init(AVCodecContext * avctx)
  60 {
  61     G722Context *c = avctx->priv_data;
  62
  63     c->band[0].scale_factor = 8;
  64     c->band[1].scale_factor = 2;
  65     c->prev_samples_pos = 22;
  66
  67     if (avctx->trellis) {
  68         int frontier = 1 << avctx->trellis;
  69         int max_paths = frontier * FREEZE_INTERVAL;
  70         int i;
  71         for (i = 0; i < 2; i++) {
  72             c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
  73             c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
  74             c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
  75             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
  76                 return AVERROR(ENOMEM);
  77         }
  78     }
  79
  80     if (avctx->frame_size) {
  81         /* validate frame size */
  82         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
  83             int new_frame_size;
  84
  85             if (avctx->frame_size == 1)
  86                 new_frame_size = 2;
  87             else if (avctx->frame_size > MAX_FRAME_SIZE)
  88                 new_frame_size = MAX_FRAME_SIZE;
  89             else
  90                 new_frame_size = avctx->frame_size - 1;
  91
  92             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
  93                    "allowed. Using %d instead of %d\n", new_frame_size,
  94                    avctx->frame_size);
  95             avctx->frame_size = new_frame_size;
  96         }
  97     } else {
  98         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
  99            a common packet size for VoIP applications */
 100         avctx->frame_size = 320;
 101     }
 102     avctx->initial_padding = 22;
 103
 104     if (avctx->trellis) {
 105         /* validate trellis */
 106         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
 107             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
 108             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
 109                    "allowed. Using %d instead of %d\n", new_trellis,
 110                    avctx->trellis);
 111             avctx->trellis = new_trellis;
 112         }
 113     }
 114
 115     ff_g722dsp_init(&c->dsp);
 116
 117     return 0;
 118 }
 119
 120 static const int16_t low_quant[33] = {
 121       35,   72,  110,  150,  190,  233,  276,  323,
 122      370,  422,  473,  530,  587,  650,  714,  786,
 123      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
 124     1765, 1980, 2195, 2557, 2919
 125 };
 126
 127 static inline void filter_samples(G722Context *c, const int16_t *samples,
 128                                   int *xlow, int *xhigh)
 129 {
 130     int xout[2];
 131     c->prev_samples[c->prev_samples_pos++] = samples[0];
 132     c->prev_samples[c->prev_samples_pos++] = samples[1];
 133     c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
 134     *xlow  = xout[0] + xout[1] >> 14;
 135     *xhigh = xout[0] - xout[1] >> 14;
 136     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
 137         memmove(c->prev_samples,
 138                 c->prev_samples + c->prev_samples_pos - 22,
 139                 22 * sizeof(c->prev_samples[0]));
 140         c->prev_samples_pos = 22;
 141     }
 142 }
 143
 144 static inline int encode_high(const struct G722Band *state, int xhigh)
 145 {
 146     int diff = av_clip_int16(xhigh - state->s_predictor);
 147     int pred = 141 * state->scale_factor >> 8;
 148            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 149     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 150 }
 151
 152 static inline int encode_low(const struct G722Band* state, int xlow)
 153 {
 154     int diff  = av_clip_int16(xlow - state->s_predictor);
 155            /* = diff >= 0 ? diff : -(diff + 1) */
 156     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 157     int i = 0;
 158     limit = limit + 1 << 10;
 159     if (limit > low_quant[8] * state->scale_factor)
 160         i = 9;
 161     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 162         i++;
 163     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 164 }
 165
 166 static void g722_encode_trellis(G722Context *c, int trellis,
 167                                 uint8_t *dst, int nb_samples,
 168                                 const int16_t *samples)
 169 {
 170     int i, j, k;
 171     int frontier = 1 << trellis;
 172     struct TrellisNode **nodes[2];
 173     struct TrellisNode **nodes_next[2];
 174     int pathn[2] = {0, 0}, froze = -1;
 175     struct TrellisPath *p[2];
 176
 177     for (i = 0; i < 2; i++) {
 178         nodes[i] = c->nodep_buf[i];
 179         nodes_next[i] = c->nodep_buf[i] + frontier;
 180         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
 181         nodes[i][0] = c->node_buf[i] + frontier;
 182         nodes[i][0]->ssd = 0;
 183         nodes[i][0]->path = 0;
 184         nodes[i][0]->state = c->band[i];
 185     }
 186
 187     for (i = 0; i < nb_samples >> 1; i++) {
 188         int xlow, xhigh;
 189         struct TrellisNode *next[2];
 190         int heap_pos[2] = {0, 0};
 191
 192         for (j = 0; j < 2; j++) {
 193             next[j] = c->node_buf[j] + frontier*(i & 1);
 194             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 195         }
 196
 197         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 198
 199         for (j = 0; j < frontier && nodes[0][j]; j++) {
 200             /* Only k >> 2 affects the future adaptive state, therefore testing
 201              * small steps that don't change k >> 2 is useless, the original
 202              * value from encode_low is better than them. Since we step k
 203              * in steps of 4, make sure range is a multiple of 4, so that
 204              * we don't miss the original value from encode_low. */
 205             int range = j < frontier/2 ? 4 : 0;
 206             struct TrellisNode *cur_node = nodes[0][j];
 207
 208             int ilow = encode_low(&cur_node->state, xlow);
 209
 210             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 211                 int decoded, dec_diff, pos;
 212                 uint32_t ssd;
 213                 struct TrellisNode* node;
 214
 215                 if (k < 0)
 216                     continue;
 217
 218                 decoded = av_clip_intp2((cur_node->state.scale_factor *
 219                                   ff_g722_low_inv_quant6[k] >> 10)
 220                                 + cur_node->state.s_predictor, 14);
 221                 dec_diff = xlow - decoded;
 222
 223 #define STORE_NODE(index, UPDATE, VALUE)\
 224                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 225                 /* Check for wraparound. Using 64 bit ssd counters would \
 226                  * be simpler, but is slower on x86 32 bit. */\
 227                 if (ssd < cur_node->ssd)\
 228                     continue;\
 229                 if (heap_pos[index] < frontier) {\
 230                     pos = heap_pos[index]++;\
 231                     av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
 232                     node = nodes_next[index][pos] = next[index]++;\
 233                     node->path = pathn[index]++;\
 234                 } else {\
 235                     /* Try to replace one of the leaf nodes with the new \
 236                      * one, but not always testing the same leaf position */\
 237                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 238                     if (ssd >= nodes_next[index][pos]->ssd)\
 239                         continue;\
 240                     heap_pos[index]++;\
 241                     node = nodes_next[index][pos];\
 242                 }\
 243                 node->ssd = ssd;\
 244                 node->state = cur_node->state;\
 245                 UPDATE;\
 246                 c->paths[index][node->path].value = VALUE;\
 247                 c->paths[index][node->path].prev = cur_node->path;\
 248                 /* Sift the newly inserted node up in the heap to restore \
 249                  * the heap property */\
 250                 while (pos > 0) {\
 251                     int parent = (pos - 1) >> 1;\
 252                     if (nodes_next[index][parent]->ssd <= ssd)\
 253                         break;\
 254                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 255                                                 nodes_next[index][pos]);\
 256                     pos = parent;\
 257                 }
 258                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 259             }
 260         }
 261
 262         for (j = 0; j < frontier && nodes[1][j]; j++) {
 263             int ihigh;
 264             struct TrellisNode *cur_node = nodes[1][j];
 265
 266             /* We don't try to get any initial guess for ihigh via
 267              * encode_high - since there's only 4 possible values, test
 268              * them all. Testing all of these gives a much, much larger
 269              * gain than testing a larger range around ilow. */
 270             for (ihigh = 0; ihigh < 4; ihigh++) {
 271                 int dhigh, decoded, dec_diff, pos;
 272                 uint32_t ssd;
 273                 struct TrellisNode* node;
 274
 275                 dhigh = cur_node->state.scale_factor *
 276                         ff_g722_high_inv_quant[ihigh] >> 10;
 277                 decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
 278                 dec_diff = xhigh - decoded;
 279
 280                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 281             }
 282         }
 283
 284         for (j = 0; j < 2; j++) {
 285             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 286
 287             if (nodes[j][0]->ssd > (1 << 16)) {
 288                 for (k = 1; k < frontier && nodes[j][k]; k++)
 289                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 290                 nodes[j][0]->ssd = 0;
 291             }
 292         }
 293
 294         if (i == froze + FREEZE_INTERVAL) {
 295             p[0] = &c->paths[0][nodes[0][0]->path];
 296             p[1] = &c->paths[1][nodes[1][0]->path];
 297             for (j = i; j > froze; j--) {
 298                 dst[j] = p[1]->value << 6 | p[0]->value;
 299                 p[0] = &c->paths[0][p[0]->prev];
 300                 p[1] = &c->paths[1][p[1]->prev];
 301             }
 302             froze = i;
 303             pathn[0] = pathn[1] = 0;
 304             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 305             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 306         }
 307     }
 308
 309     p[0] = &c->paths[0][nodes[0][0]->path];
 310     p[1] = &c->paths[1][nodes[1][0]->path];
 311     for (j = i; j > froze; j--) {
 312         dst[j] = p[1]->value << 6 | p[0]->value;
 313         p[0] = &c->paths[0][p[0]->prev];
 314         p[1] = &c->paths[1][p[1]->prev];
 315     }
 316     c->band[0] = nodes[0][0]->state;
 317     c->band[1] = nodes[1][0]->state;
 318 }
 319
 320 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
 321                                          const int16_t *samples)
 322 {
 323     int xlow, xhigh, ilow, ihigh;
 324     filter_samples(c, samples, &xlow, &xhigh);
 325     ihigh = encode_high(&c->band[1], xhigh);
 326     ilow  = encode_low (&c->band[0], xlow);
 327     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 328                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 329     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 330     *dst = ihigh << 6 | ilow;
 331 }
 332
 333 static void g722_encode_no_trellis(G722Context *c,
 334                                    uint8_t *dst, int nb_samples,
 335                                    const int16_t *samples)
 336 {
 337     int i;
 338     for (i = 0; i < nb_samples; i += 2)
 339         encode_byte(c, dst++, &samples[i]);
 340 }
 341
 342 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 343                              const AVFrame *frame, int *got_packet_ptr)
 344 {
 345     G722Context *c = avctx->priv_data;
 346     const int16_t *samples = (const int16_t *)frame->data[0];
 347     int nb_samples, out_size, ret;
 348
 349     out_size = (frame->nb_samples + 1) / 2;
 350     if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
 351         return ret;
 352
 353     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
 354
 355     if (avctx->trellis)
 356         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
 357     else
 358         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
 359
 360     /* handle last frame with odd frame_size */
 361     if (nb_samples < frame->nb_samples) {
 362         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
 363         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
 364     }
 365
 366     if (frame->pts != AV_NOPTS_VALUE)
 367         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 368     *got_packet_ptr = 1;
 369     return 0;
 370 }
 371
 372 AVCodec ff_adpcm_g722_encoder = {
 373     .name            = "g722",
 374     .long_name       = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 375     .type            = AVMEDIA_TYPE_AUDIO,
 376     .id              = AV_CODEC_ID_ADPCM_G722,
 377     .priv_data_size  = sizeof(G722Context),
 378     .init            = g722_encode_init,
 379     .close           = g722_encode_close,
 380     .encode2         = g722_encode_frame,
 381     .capabilities    = AV_CODEC_CAP_SMALL_LAST_FRAME,
 382     .sample_fmts     = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
 383     .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
 384     .caps_internal   = FF_CODEC_CAP_INIT_CLEANUP,
 385 };