git.sesse.net Git - ffmpeg/blob - libavcodec/g722enc.c

   1 /*
   2  * Copyright (c) CMU 1993 Computer Science, Speech Group
   3  *                        Chengxiang Lu and Alex Hauptmann
   4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
   5  * Copyright (c) 2009 Kenan Gillet
   6  * Copyright (c) 2010 Martin Storsjo
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * G.722 ADPCM audio encoder
  28  */
  29
  30 #include "avcodec.h"
  31 #include "g722.h"
  32
  33 #define FREEZE_INTERVAL 128
  34
  35 static av_cold int g722_encode_init(AVCodecContext * avctx)
  36 {
  37     G722Context *c = avctx->priv_data;
  38
  39     if (avctx->channels != 1) {
  40         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
  41         return AVERROR_INVALIDDATA;
  42     }
  43
  44     c->band[0].scale_factor = 8;
  45     c->band[1].scale_factor = 2;
  46     c->prev_samples_pos = 22;
  47
  48     if (avctx->trellis) {
  49         int frontier = 1 << avctx->trellis;
  50         int max_paths = frontier * FREEZE_INTERVAL;
  51         int i;
  52         for (i = 0; i < 2; i++) {
  53             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
  54             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
  55             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
  56         }
  57     }
  58
  59     return 0;
  60 }
  61
  62 static av_cold int g722_encode_close(AVCodecContext *avctx)
  63 {
  64     G722Context *c = avctx->priv_data;
  65     int i;
  66     for (i = 0; i < 2; i++) {
  67         av_freep(&c->paths[i]);
  68         av_freep(&c->node_buf[i]);
  69         av_freep(&c->nodep_buf[i]);
  70     }
  71     return 0;
  72 }
  73
  74 static const int16_t low_quant[33] = {
  75       35,   72,  110,  150,  190,  233,  276,  323,
  76      370,  422,  473,  530,  587,  650,  714,  786,
  77      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
  78     1765, 1980, 2195, 2557, 2919
  79 };
  80
  81 static inline void filter_samples(G722Context *c, const int16_t *samples,
  82                                   int *xlow, int *xhigh)
  83 {
  84     int xout1, xout2;
  85     c->prev_samples[c->prev_samples_pos++] = samples[0];
  86     c->prev_samples[c->prev_samples_pos++] = samples[1];
  87     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
  88     *xlow  = xout1 + xout2 >> 13;
  89     *xhigh = xout1 - xout2 >> 13;
  90     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
  91         memmove(c->prev_samples,
  92                 c->prev_samples + c->prev_samples_pos - 22,
  93                 22 * sizeof(c->prev_samples[0]));
  94         c->prev_samples_pos = 22;
  95     }
  96 }
  97
  98 static inline int encode_high(const struct G722Band *state, int xhigh)
  99 {
 100     int diff = av_clip_int16(xhigh - state->s_predictor);
 101     int pred = 141 * state->scale_factor >> 8;
 102            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
 103     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
 104 }
 105
 106 static inline int encode_low(const struct G722Band* state, int xlow)
 107 {
 108     int diff  = av_clip_int16(xlow - state->s_predictor);
 109            /* = diff >= 0 ? diff : -(diff + 1) */
 110     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
 111     int i = 0;
 112     limit = limit + 1 << 10;
 113     if (limit > low_quant[8] * state->scale_factor)
 114         i = 9;
 115     while (i < 29 && limit > low_quant[i] * state->scale_factor)
 116         i++;
 117     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
 118 }
 119
 120 static int g722_encode_trellis(AVCodecContext *avctx,
 121                                uint8_t *dst, int buf_size, void *data)
 122 {
 123     G722Context *c = avctx->priv_data;
 124     const int16_t *samples = data;
 125     int i, j, k;
 126     int frontier = 1 << avctx->trellis;
 127     struct TrellisNode **nodes[2];
 128     struct TrellisNode **nodes_next[2];
 129     int pathn[2] = {0, 0}, froze = -1;
 130     struct TrellisPath *p[2];
 131
 132     for (i = 0; i < 2; i++) {
 133         nodes[i] = c->nodep_buf[i];
 134         nodes_next[i] = c->nodep_buf[i] + frontier;
 135         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
 136         nodes[i][0] = c->node_buf[i] + frontier;
 137         nodes[i][0]->ssd = 0;
 138         nodes[i][0]->path = 0;
 139         nodes[i][0]->state = c->band[i];
 140     }
 141
 142     for (i = 0; i < buf_size; i++) {
 143         int xlow, xhigh;
 144         struct TrellisNode *next[2];
 145         int heap_pos[2] = {0, 0};
 146
 147         for (j = 0; j < 2; j++) {
 148             next[j] = c->node_buf[j] + frontier*(i & 1);
 149             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
 150         }
 151
 152         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 153
 154         for (j = 0; j < frontier && nodes[0][j]; j++) {
 155             /* Only k >> 2 affects the future adaptive state, therefore testing
 156              * small steps that don't change k >> 2 is useless, the original
 157              * value from encode_low is better than them. Since we step k
 158              * in steps of 4, make sure range is a multiple of 4, so that
 159              * we don't miss the original value from encode_low. */
 160             int range = j < frontier/2 ? 4 : 0;
 161             struct TrellisNode *cur_node = nodes[0][j];
 162
 163             int ilow = encode_low(&cur_node->state, xlow);
 164
 165             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
 166                 int decoded, dec_diff, pos;
 167                 uint32_t ssd;
 168                 struct TrellisNode* node;
 169
 170                 if (k < 0)
 171                     continue;
 172
 173                 decoded = av_clip((cur_node->state.scale_factor *
 174                                   ff_g722_low_inv_quant6[k] >> 10)
 175                                 + cur_node->state.s_predictor, -16384, 16383);
 176                 dec_diff = xlow - decoded;
 177
 178 #define STORE_NODE(index, UPDATE, VALUE)\
 179                 ssd = cur_node->ssd + dec_diff*dec_diff;\
 180                 /* Check for wraparound. Using 64 bit ssd counters would \
 181                  * be simpler, but is slower on x86 32 bit. */\
 182                 if (ssd < cur_node->ssd)\
 183                     continue;\
 184                 if (heap_pos[index] < frontier) {\
 185                     pos = heap_pos[index]++;\
 186                     assert(pathn[index] < FREEZE_INTERVAL * frontier);\
 187                     node = nodes_next[index][pos] = next[index]++;\
 188                     node->path = pathn[index]++;\
 189                 } else {\
 190                     /* Try to replace one of the leaf nodes with the new \
 191                      * one, but not always testing the same leaf position */\
 192                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
 193                     if (ssd >= nodes_next[index][pos]->ssd)\
 194                         continue;\
 195                     heap_pos[index]++;\
 196                     node = nodes_next[index][pos];\
 197                 }\
 198                 node->ssd = ssd;\
 199                 node->state = cur_node->state;\
 200                 UPDATE;\
 201                 c->paths[index][node->path].value = VALUE;\
 202                 c->paths[index][node->path].prev = cur_node->path;\
 203                 /* Sift the newly inserted node up in the heap to restore \
 204                  * the heap property */\
 205                 while (pos > 0) {\
 206                     int parent = (pos - 1) >> 1;\
 207                     if (nodes_next[index][parent]->ssd <= ssd)\
 208                         break;\
 209                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
 210                                                 nodes_next[index][pos]);\
 211                     pos = parent;\
 212                 }
 213                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
 214             }
 215         }
 216
 217         for (j = 0; j < frontier && nodes[1][j]; j++) {
 218             int ihigh;
 219             struct TrellisNode *cur_node = nodes[1][j];
 220
 221             /* We don't try to get any initial guess for ihigh via
 222              * encode_high - since there's only 4 possible values, test
 223              * them all. Testing all of these gives a much, much larger
 224              * gain than testing a larger range around ilow. */
 225             for (ihigh = 0; ihigh < 4; ihigh++) {
 226                 int dhigh, decoded, dec_diff, pos;
 227                 uint32_t ssd;
 228                 struct TrellisNode* node;
 229
 230                 dhigh = cur_node->state.scale_factor *
 231                         ff_g722_high_inv_quant[ihigh] >> 10;
 232                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
 233                                   -16384, 16383);
 234                 dec_diff = xhigh - decoded;
 235
 236                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
 237             }
 238         }
 239
 240         for (j = 0; j < 2; j++) {
 241             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
 242
 243             if (nodes[j][0]->ssd > (1 << 16)) {
 244                 for (k = 1; k < frontier && nodes[j][k]; k++)
 245                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
 246                 nodes[j][0]->ssd = 0;
 247             }
 248         }
 249
 250         if (i == froze + FREEZE_INTERVAL) {
 251             p[0] = &c->paths[0][nodes[0][0]->path];
 252             p[1] = &c->paths[1][nodes[1][0]->path];
 253             for (j = i; j > froze; j--) {
 254                 dst[j] = p[1]->value << 6 | p[0]->value;
 255                 p[0] = &c->paths[0][p[0]->prev];
 256                 p[1] = &c->paths[1][p[1]->prev];
 257             }
 258             froze = i;
 259             pathn[0] = pathn[1] = 0;
 260             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
 261             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
 262         }
 263     }
 264
 265     p[0] = &c->paths[0][nodes[0][0]->path];
 266     p[1] = &c->paths[1][nodes[1][0]->path];
 267     for (j = i; j > froze; j--) {
 268         dst[j] = p[1]->value << 6 | p[0]->value;
 269         p[0] = &c->paths[0][p[0]->prev];
 270         p[1] = &c->paths[1][p[1]->prev];
 271     }
 272     c->band[0] = nodes[0][0]->state;
 273     c->band[1] = nodes[1][0]->state;
 274
 275     return i;
 276 }
 277
 278 static int g722_encode_frame(AVCodecContext *avctx,
 279                              uint8_t *dst, int buf_size, void *data)
 280 {
 281     G722Context *c = avctx->priv_data;
 282     const int16_t *samples = data;
 283     int i;
 284
 285     if (avctx->trellis)
 286         return g722_encode_trellis(avctx, dst, buf_size, data);
 287
 288     for (i = 0; i < buf_size; i++) {
 289         int xlow, xhigh, ihigh, ilow;
 290         filter_samples(c, &samples[2*i], &xlow, &xhigh);
 291         ihigh = encode_high(&c->band[1], xhigh);
 292         ilow  = encode_low(&c->band[0], xlow);
 293         ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
 294                                       ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
 295         ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
 296         *dst++ = ihigh << 6 | ilow;
 297     }
 298     return i;
 299 }
 300
 301 AVCodec ff_adpcm_g722_encoder = {
 302     .name           = "g722",
 303     .type           = AVMEDIA_TYPE_AUDIO,
 304     .id             = CODEC_ID_ADPCM_G722,
 305     .priv_data_size = sizeof(G722Context),
 306     .init           = g722_encode_init,
 307     .close          = g722_encode_close,
 308     .encode         = g722_encode_frame,
 309     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
 310     .sample_fmts    = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
 311 };