git.sesse.net Git - ffmpeg/blob - libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "libavutil/avassert.h"
  31 #include "avcodec.h"
  32 #include "internal.h"
  33 #include "g722.h"
  34 #include "libavutil/common.h"
  35
  36 #define FREEZE_INTERVAL 128
  37
  38 /* This is an arbitrary value. Allowing insanely large values leads to strange
  39    problems, so we limit it to a reasonable value */
  40 #define MAX_FRAME_SIZE 32768
  41
  42 /* We clip the value of avctx->trellis to prevent data type overflows and
  43    undefined behavior. Using larger values is insanely slow anyway. */
  44 #define MIN_TRELLIS 0
  45 #define MAX_TRELLIS 16
  46
  47 static av_cold int g722_encode_close(AVCodecContext *avctx)
  48 {
  49     G722Context *c = avctx->priv_data;
  50     int i;
  51     for (i = 0; i < 2; i++) {
  52         av_freep(&c->paths[i]);
  53         av_freep(&c->node_buf[i]);
  54         av_freep(&c->nodep_buf[i]);
  55     }
  56     return 0;
  57 }
  58
  59 static av_cold int g722_encode_init(AVCodecContext * avctx)
  60 {
  61     G722Context *c = avctx->priv_data;
  62     int ret;
  63
  64     if (avctx->channels != 1) {
  65         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
  66         return AVERROR_INVALIDDATA;
  67     }
  68
  69     c->band[0].scale_factor = 8;
  70     c->band[1].scale_factor = 2;
  71     c->prev_samples_pos = 22;
  72
  73     if (avctx->trellis) {
  74         int frontier = 1 << avctx->trellis;
  75         int max_paths = frontier * FREEZE_INTERVAL;
  76         int i;
  77         for (i = 0; i < 2; i++) {
  78             c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
  79             c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
  80             c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
  81             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
  82                 ret = AVERROR(ENOMEM);
  83                 goto error;
  84             }
  85         }
  86     }
  87
  88     if (avctx->frame_size) {
  89         /* validate frame size */
  90         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
  91             int new_frame_size;
  92
  93             if (avctx->frame_size == 1)
  94                 new_frame_size = 2;
  95             else if (avctx->frame_size > MAX_FRAME_SIZE)
  96                 new_frame_size = MAX_FRAME_SIZE;
  97             else
  98                 new_frame_size = avctx->frame_size - 1;
  99
 100             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
 101                    "allowed. Using %d instead of %d\n", new_frame_size,
 102                    avctx->frame_size);
 103             avctx->frame_size = new_frame_size;
 104         }
 105     } else {
 106         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
 107            a common packet size for VoIP applications */
 108         avctx->frame_size = 320;
 109     }
 110     avctx->initial_padding = 22;
 111
 112     if (avctx->trellis) {
 113         /* validate trellis */
 114         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
 115             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
 116             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
 117                    "allowed. Using %d instead of %d\n", new_trellis,
 118                    avctx->trellis);
 119             avctx->trellis = new_trellis;
 120         }
 121     }
 122
 123     return 0;
 124 error:
 125     g722_encode_close(avctx);
 126     return ret;
 127 }
 128
 129 static const int16_t low_quant[33] = {
 130       35,   72,  110,  150,  190,  233,  276,  323,
 131      370,  422,  473,  530,  587,  650,  714,  786,
 132      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
 133     1765, 1980, 2195, 2557, 2919
 134 };
 135
 136 static inline void filter_samples(G722Context *c, const int16_t *samples,
 137                                   int *xlow, int *xhigh)
 138 {
 139     int xout1, xout2;
 140     c->prev_samples[c->prev_samples_pos++] = samples[0];
 141     c->prev_samples[c->prev_samples_pos++] = samples[1];
 142     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
 143     *xlow  = xout1 + xout2 >> 14;
 144     *xhigh = xout1 - xout2 >> 14;
 145     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
 146         memmove(c->prev_samples,
 147                 c->prev_samples + c->prev_samples_pos - 22,
 148                 22 * sizeof(c->prev_samples[0]));
 149         c->prev_samples_pos = 22;
 150     }
 151 }
 152
 153 static inline int encode_high(const struct G722Band *state, int xhigh)
 154 {
 155     int diff = av_clip_int16(xhigh - state->s_predictor);
 156     int pred = 141 * state->scale_factor >> 8;
 157            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 158     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 159 }
 160
 161 static inline int encode_low(const struct G722Band* state, int xlow)
 162 {
 163     int diff  = av_clip_int16(xlow - state->s_predictor);
 164            /* = diff >= 0 ? diff : -(diff + 1) */
 165     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 166     int i = 0;
 167     limit = limit + 1 << 10;
 168     if (limit > low_quant[8] * state->scale_factor)
 169         i = 9;
 170     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 171         i++;
 172     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 173 }
 174
 175 static void g722_encode_trellis(G722Context *c, int trellis,
 176                                 uint8_t *dst, int nb_samples,
 177                                 const int16_t *samples)
 178 {
 179     int i, j, k;
 180     int frontier = 1 << trellis;
 181     struct TrellisNode **nodes[2];
 182     struct TrellisNode **nodes_next[2];
 183     int pathn[2] = {0, 0}, froze = -1;
 184     struct TrellisPath *p[2];
 185
 186     for (i = 0; i < 2; i++) {
 187         nodes[i] = c->nodep_buf[i];
 188         nodes_next[i] = c->nodep_buf[i] + frontier;
 189         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
 190         nodes[i][0] = c->node_buf[i] + frontier;
 191         nodes[i][0]->ssd = 0;
 192         nodes[i][0]->path = 0;
 193         nodes[i][0]->state = c->band[i];
 194     }
 195
 196     for (i = 0; i < nb_samples >> 1; i++) {
 197         int xlow, xhigh;
 198         struct TrellisNode *next[2];
 199         int heap_pos[2] = {0, 0};
 200
 201         for (j = 0; j < 2; j++) {
 202             next[j] = c->node_buf[j] + frontier*(i & 1);
 203             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 204         }
 205
 206         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 207
 208         for (j = 0; j < frontier && nodes[0][j]; j++) {
 209             /* Only k >> 2 affects the future adaptive state, therefore testing
 210              * small steps that don't change k >> 2 is useless, the original
 211              * value from encode_low is better than them. Since we step k
 212              * in steps of 4, make sure range is a multiple of 4, so that
 213              * we don't miss the original value from encode_low. */
 214             int range = j < frontier/2 ? 4 : 0;
 215             struct TrellisNode *cur_node = nodes[0][j];
 216
 217             int ilow = encode_low(&cur_node->state, xlow);
 218
 219             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 220                 int decoded, dec_diff, pos;
 221                 uint32_t ssd;
 222                 struct TrellisNode* node;
 223
 224                 if (k < 0)
 225                     continue;
 226
 227                 decoded = av_clip((cur_node->state.scale_factor *
 228                                   ff_g722_low_inv_quant6[k] >> 10)
 229                                 + cur_node->state.s_predictor, -16384, 16383);
 230                 dec_diff = xlow - decoded;
 231
 232 #define STORE_NODE(index, UPDATE, VALUE)\
 233                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 234                 /* Check for wraparound. Using 64 bit ssd counters would \
 235                  * be simpler, but is slower on x86 32 bit. */\
 236                 if (ssd < cur_node->ssd)\
 237                     continue;\
 238                 if (heap_pos[index] < frontier) {\
 239                     pos = heap_pos[index]++;\
 240                     av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
 241                     node = nodes_next[index][pos] = next[index]++;\
 242                     node->path = pathn[index]++;\
 243                 } else {\
 244                     /* Try to replace one of the leaf nodes with the new \
 245                      * one, but not always testing the same leaf position */\
 246                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 247                     if (ssd >= nodes_next[index][pos]->ssd)\
 248                         continue;\
 249                     heap_pos[index]++;\
 250                     node = nodes_next[index][pos];\
 251                 }\
 252                 node->ssd = ssd;\
 253                 node->state = cur_node->state;\
 254                 UPDATE;\
 255                 c->paths[index][node->path].value = VALUE;\
 256                 c->paths[index][node->path].prev = cur_node->path;\
 257                 /* Sift the newly inserted node up in the heap to restore \
 258                  * the heap property */\
 259                 while (pos > 0) {\
 260                     int parent = (pos - 1) >> 1;\
 261                     if (nodes_next[index][parent]->ssd <= ssd)\
 262                         break;\
 263                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 264                                                 nodes_next[index][pos]);\
 265                     pos = parent;\
 266                 }
 267                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 268             }
 269         }
 270
 271         for (j = 0; j < frontier && nodes[1][j]; j++) {
 272             int ihigh;
 273             struct TrellisNode *cur_node = nodes[1][j];
 274
 275             /* We don't try to get any initial guess for ihigh via
 276              * encode_high - since there's only 4 possible values, test
 277              * them all. Testing all of these gives a much, much larger
 278              * gain than testing a larger range around ilow. */
 279             for (ihigh = 0; ihigh < 4; ihigh++) {
 280                 int dhigh, decoded, dec_diff, pos;
 281                 uint32_t ssd;
 282                 struct TrellisNode* node;
 283
 284                 dhigh = cur_node->state.scale_factor *
 285                         ff_g722_high_inv_quant[ihigh] >> 10;
 286                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
 287                                   -16384, 16383);
 288                 dec_diff = xhigh - decoded;
 289
 290                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 291             }
 292         }
 293
 294         for (j = 0; j < 2; j++) {
 295             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 296
 297             if (nodes[j][0]->ssd > (1 << 16)) {
 298                 for (k = 1; k < frontier && nodes[j][k]; k++)
 299                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 300                 nodes[j][0]->ssd = 0;
 301             }
 302         }
 303
 304         if (i == froze + FREEZE_INTERVAL) {
 305             p[0] = &c->paths[0][nodes[0][0]->path];
 306             p[1] = &c->paths[1][nodes[1][0]->path];
 307             for (j = i; j > froze; j--) {
 308                 dst[j] = p[1]->value << 6 | p[0]->value;
 309                 p[0] = &c->paths[0][p[0]->prev];
 310                 p[1] = &c->paths[1][p[1]->prev];
 311             }
 312             froze = i;
 313             pathn[0] = pathn[1] = 0;
 314             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 315             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 316         }
 317     }
 318
 319     p[0] = &c->paths[0][nodes[0][0]->path];
 320     p[1] = &c->paths[1][nodes[1][0]->path];
 321     for (j = i; j > froze; j--) {
 322         dst[j] = p[1]->value << 6 | p[0]->value;
 323         p[0] = &c->paths[0][p[0]->prev];
 324         p[1] = &c->paths[1][p[1]->prev];
 325     }
 326     c->band[0] = nodes[0][0]->state;
 327     c->band[1] = nodes[1][0]->state;
 328 }
 329
 330 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
 331                                          const int16_t *samples)
 332 {
 333     int xlow, xhigh, ilow, ihigh;
 334     filter_samples(c, samples, &xlow, &xhigh);
 335     ihigh = encode_high(&c->band[1], xhigh);
 336     ilow  = encode_low (&c->band[0], xlow);
 337     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 338                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 339     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 340     *dst = ihigh << 6 | ilow;
 341 }
 342
 343 static void g722_encode_no_trellis(G722Context *c,
 344                                    uint8_t *dst, int nb_samples,
 345                                    const int16_t *samples)
 346 {
 347     int i;
 348     for (i = 0; i < nb_samples; i += 2)
 349         encode_byte(c, dst++, &samples[i]);
 350 }
 351
 352 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 353                              const AVFrame *frame, int *got_packet_ptr)
 354 {
 355     G722Context *c = avctx->priv_data;
 356     const int16_t *samples = (const int16_t *)frame->data[0];
 357     int nb_samples, out_size, ret;
 358
 359     out_size = (frame->nb_samples + 1) / 2;
 360     if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
 361         return ret;
 362
 363     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
 364
 365     if (avctx->trellis)
 366         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
 367     else
 368         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
 369
 370     /* handle last frame with odd frame_size */
 371     if (nb_samples < frame->nb_samples) {
 372         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
 373         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
 374     }
 375
 376     if (frame->pts != AV_NOPTS_VALUE)
 377         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 378     *got_packet_ptr = 1;
 379     return 0;
 380 }
 381
 382 AVCodec ff_adpcm_g722_encoder = {
 383     .name           = "g722",
 384     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 385     .type           = AVMEDIA_TYPE_AUDIO,
 386     .id             = AV_CODEC_ID_ADPCM_G722,
 387     .priv_data_size = sizeof(G722Context),
 388     .init           = g722_encode_init,
 389     .close          = g722_encode_close,
 390     .encode2        = g722_encode_frame,
 391     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
 392     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
 393                                                      AV_SAMPLE_FMT_NONE },
 394 };