git.sesse.net Git - ffmpeg/blob - libavcodec/flacdsp_lpc_template.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with FFmpeg; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 #include <stdint.h>
  20 #include "libavutil/avutil.h"
  21 #include "mathops.h"
  22
  23 #undef FUNC
  24 #undef sum_type
  25 #undef MUL
  26 #undef CLIP
  27 #undef FSUF
  28
  29 #define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE)
  30
  31 #if SAMPLE_SIZE == 32
  32 #   define sum_type  int64_t
  33 #   define MUL(a, b) MUL64(a, b)
  34 #   define CLIP(x) av_clipl_int32(x)
  35 #else
  36 #   define sum_type  int32_t
  37 #   define MUL(a, b) ((a) * (b))
  38 #   define CLIP(x) (x)
  39 #endif
  40
  41 #define LPC1(x) {           \
  42     int c = coefs[(x)-1];   \
  43     p0   += MUL(c, s);      \
  44     s     = smp[i-(x)+1];   \
  45     p1   += MUL(c, s);      \
  46 }
  47
  48 static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res,
  49                                   const int32_t *smp, int len, int order,
  50                                   const int32_t *coefs, int shift, int big)
  51 {
  52     int i;
  53     for (i = order; i < len; i += 2) {
  54         int s  = smp[i-order];
  55         sum_type p0 = 0, p1 = 0;
  56         if (big) {
  57             switch (order) {
  58             case 32: LPC1(32)
  59             case 31: LPC1(31)
  60             case 30: LPC1(30)
  61             case 29: LPC1(29)
  62             case 28: LPC1(28)
  63             case 27: LPC1(27)
  64             case 26: LPC1(26)
  65             case 25: LPC1(25)
  66             case 24: LPC1(24)
  67             case 23: LPC1(23)
  68             case 22: LPC1(22)
  69             case 21: LPC1(21)
  70             case 20: LPC1(20)
  71             case 19: LPC1(19)
  72             case 18: LPC1(18)
  73             case 17: LPC1(17)
  74             case 16: LPC1(16)
  75             case 15: LPC1(15)
  76             case 14: LPC1(14)
  77             case 13: LPC1(13)
  78             case 12: LPC1(12)
  79             case 11: LPC1(11)
  80             case 10: LPC1(10)
  81             case  9: LPC1( 9)
  82                      LPC1( 8)
  83                      LPC1( 7)
  84                      LPC1( 6)
  85                      LPC1( 5)
  86                      LPC1( 4)
  87                      LPC1( 3)
  88                      LPC1( 2)
  89                      LPC1( 1)
  90             }
  91         } else {
  92             switch (order) {
  93             case  8: LPC1( 8)
  94             case  7: LPC1( 7)
  95             case  6: LPC1( 6)
  96             case  5: LPC1( 5)
  97             case  4: LPC1( 4)
  98             case  3: LPC1( 3)
  99             case  2: LPC1( 2)
 100             case  1: LPC1( 1)
 101             }
 102         }
 103         res[i  ] = smp[i  ] - CLIP(p0 >> shift);
 104         res[i+1] = smp[i+1] - CLIP(p1 >> shift);
 105     }
 106 }
 107
 108 static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len,
 109                                     int order, const int32_t *coefs, int shift)
 110 {
 111     int i;
 112     for (i = 0; i < order; i++)
 113         res[i] = smp[i];
 114 #if CONFIG_SMALL
 115     for (i = order; i < len; i += 2) {
 116         int j;
 117         int s  = smp[i];
 118         sum_type p0 = 0, p1 = 0;
 119         for (j = 0; j < order; j++) {
 120             int c = coefs[j];
 121             p1   += MUL(c, s);
 122             s     = smp[i-j-1];
 123             p0   += MUL(c, s);
 124         }
 125         res[i  ] = smp[i  ] - CLIP(p0 >> shift);
 126         res[i+1] = smp[i+1] - CLIP(p1 >> shift);
 127     }
 128 #else
 129     switch (order) {
 130     case  1: FUNC(lpc_encode_unrolled)(res, smp, len,     1, coefs, shift, 0); break;
 131     case  2: FUNC(lpc_encode_unrolled)(res, smp, len,     2, coefs, shift, 0); break;
 132     case  3: FUNC(lpc_encode_unrolled)(res, smp, len,     3, coefs, shift, 0); break;
 133     case  4: FUNC(lpc_encode_unrolled)(res, smp, len,     4, coefs, shift, 0); break;
 134     case  5: FUNC(lpc_encode_unrolled)(res, smp, len,     5, coefs, shift, 0); break;
 135     case  6: FUNC(lpc_encode_unrolled)(res, smp, len,     6, coefs, shift, 0); break;
 136     case  7: FUNC(lpc_encode_unrolled)(res, smp, len,     7, coefs, shift, 0); break;
 137     case  8: FUNC(lpc_encode_unrolled)(res, smp, len,     8, coefs, shift, 0); break;
 138     default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break;
 139     }
 140 #endif
 141 }
 142
 143 /* Comment for clarity/de-obfuscation.
 144  *
 145  * for (int i = order; i < len; i++) {
 146  *     int32_t p = 0;
 147  *     for (int j = 0; j < order; j++) {
 148  *         int c = coefs[j];
 149  *         int s = smp[(i-1)-j];
 150  *         p    += c*s;
 151  *     }
 152  *     res[i] = smp[i] - (p >> shift);
 153  * }
 154  *
 155  * The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE
 156  * not being equal to 32 (at the present time that means for 16-bit audio). The
 157  * code above does 2 samples per iteration.  Commit bfdd5bc (made all the way
 158  * back in 2007) says that way is faster.
 159  */