suggest to use av_get_bytes_per_sample() in av_get_bits_per_sample_format() doxy

[ffmpeg] / libavcodec / mpegaudiodec.c
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c

index 7fd6bd2dc64e94307c8656b57429342e2658e66b..033d76e049e7e76153a6b80b8f0ea72c8a9a8d28 100644 (file)
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -27,8 +27,8 @@
  #include "libavutil/audioconvert.h"
  #include "avcodec.h"
  #include "get_bits.h"
-#include "dsputil.h"
  #include "mathops.h"
+#include "mpegaudiodsp.h"
  
  /*
   * TODO:
@@ -38,25 +38,71 @@
  #include "mpegaudio.h"
  #include "mpegaudiodecheader.h"
  
+#define BACKSTEP_SIZE 512
+#define EXTRABYTES 24
+
+/* layer 3 "granule" */
+typedef struct GranuleDef {
+    uint8_t scfsi;
+    int part2_3_length;
+    int big_values;
+    int global_gain;
+    int scalefac_compress;
+    uint8_t block_type;
+    uint8_t switch_point;
+    int table_select[3];
+    int subblock_gain[3];
+    uint8_t scalefac_scale;
+    uint8_t count1table_select;
+    int region_size[3]; /* number of huffman codes in each region */
+    int preflag;
+    int short_start, long_end; /* long/short band indexes */
+    uint8_t scale_factors[40];
+    INTFLOAT sb_hybrid[SBLIMIT * 18]; /* 576 samples */
+} GranuleDef;
+
+typedef struct MPADecodeContext {
+    MPA_DECODE_HEADER
+    uint8_t last_buf[2*BACKSTEP_SIZE + EXTRABYTES];
+    int last_buf_size;
+    /* next header (used in free format parsing) */
+    uint32_t free_format_next_header;
+    GetBitContext gb;
+    GetBitContext in_gb;
+    DECLARE_ALIGNED(32, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
+    int synth_buf_offset[MPA_MAX_CHANNELS];
+    DECLARE_ALIGNED(32, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
+    INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
+    GranuleDef granules[2][2]; /* Used in Layer 3 */
+#ifdef DEBUG
+    int frame_count;
+#endif
+    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
+    int dither_state;
+    int error_recognition;
+    AVCodecContext* avctx;
+    MPADSPContext mpadsp;
+} MPADecodeContext;
+
  #if CONFIG_FLOAT
  #   define SHR(a,b)       ((a)*(1.0f/(1<<(b))))
-#   define compute_antialias compute_antialias_float
  #   define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
  #   define FIXR(x)        ((float)(x))
  #   define FIXHR(x)       ((float)(x))
  #   define MULH3(x, y, s) ((s)*(y)*(x))
  #   define MULLx(x, y, s) ((y)*(x))
  #   define RENAME(a) a ## _float
+#   define OUT_FMT AV_SAMPLE_FMT_FLT
  #else
  #   define SHR(a,b)       ((a)>>(b))
-#   define compute_antialias compute_antialias_integer
  /* WARNING: only correct for posititive numbers */
  #   define FIXR_OLD(a)    ((int)((a) * FRAC_ONE + 0.5))
  #   define FIXR(a)        ((int)((a) * FRAC_ONE + 0.5))
  #   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
  #   define MULH3(x, y, s) MULH((s)*(x), y)
  #   define MULLx(x, y, s) MULL(x,y,s)
-#   define RENAME(a)      a
+#   define RENAME(a)      a ## _fixed
+#   define OUT_FMT AV_SAMPLE_FMT_S16
  #endif
  
  /****************/
@@ -66,16 +112,6 @@
  #include "mpegaudiodata.h"
  #include "mpegaudiodectab.h"
  
-#if CONFIG_FLOAT
-#    include "fft.h"
-#else
-#    include "dct32.c"
-#endif
-
-static void compute_antialias(MPADecodeContext *s, GranuleDef *g);
-static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
-                               int *dither_state, OUT_INT *samples, int incr);
-
  /* vlc structure for decoding layer 3 huffman tables */
  static VLC huff_vlc[16];
  static VLC_TYPE huff_vlc_tables[
@@ -97,8 +133,7 @@ static uint16_t band_index_long[9][23];
  /* intensity stereo coef table */
  static INTFLOAT is_table[2][16];
  static INTFLOAT is_table_lsf[2][2][16];
-static int32_t csa_table[8][4];
-static float csa_table_float[8][4];
+static INTFLOAT csa_table[8][4];
  static INTFLOAT mdct_win[8][36];
  
  static int16_t division_tab3[1<<6 ];
@@ -124,8 +159,6 @@ static const int32_t scale_factor_mult2[3][3] = {
      SCALE_GEN(4.0 / 9.0), /* 9 steps */
  };
  
-DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
-
  /**
   * Convert region offsets to region sizes and truncate
   * size to big_values.
@@ -236,27 +269,6 @@ static inline int l3_unscale(int value, int exponent)
      return m;
  }
  
-/* all integer n^(4/3) computation code */
-#define DEV_ORDER 13
-
-#define POW_FRAC_BITS 24
-#define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
-#define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
-#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
-
-static int dev_4_3_coefs[DEV_ORDER];
-
-static av_cold void int_pow_init(void)
-{
-    int i, a;
-
-    a = POW_FIX(1.0);
-    for(i=0;i<DEV_ORDER;i++) {
-        a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
-        dev_4_3_coefs[i] = a;
-    }
-}
-
  static av_cold int decode_init(AVCodecContext * avctx)
  {
      MPADecodeContext *s = avctx->priv_data;
@@ -264,14 +276,8 @@ static av_cold int decode_init(AVCodecContext * avctx)
      int i, j, k;
  
      s->avctx = avctx;
-    s->apply_window_mp3 = apply_window_mp3_c;
-#if HAVE_MMX && CONFIG_FLOAT
-    ff_mpegaudiodec_init_mmx(s);
-#endif
-#if CONFIG_FLOAT
-    ff_dct_init(&s->dct, 5, DCT_II);
-#endif
-    if (HAVE_ALTIVEC && CONFIG_FLOAT) ff_mpegaudiodec_init_altivec(s);
+
+    ff_mpadsp_init(&s->mpadsp);
  
      avctx->sample_fmt= OUT_FMT;
      s->error_recognition= avctx->error_recognition;
@@ -358,7 +364,6 @@ static av_cold int decode_init(AVCodecContext * avctx)
  
          /* compute n ^ (4/3) and store it in mantissa/exp format */
  
-        int_pow_init();
          mpegaudio_tableinit();
  
          for (i = 0; i < 4; i++)
@@ -401,8 +406,9 @@ static av_cold int decode_init(AVCodecContext * avctx)
                  k = i & 1;
                  is_table_lsf[j][k ^ 1][i] = FIXR(f);
                  is_table_lsf[j][k][i] = FIXR(1.0);
-                av_dlog(avctx, "is_table_lsf %d %d: %x %x\n",
-                        i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
+                av_dlog(avctx, "is_table_lsf %d %d: %f %f\n",
+                        i, j, (float) is_table_lsf[j][0][i],
+                        (float) is_table_lsf[j][1][i]);
              }
          }
  
@@ -411,14 +417,17 @@ static av_cold int decode_init(AVCodecContext * avctx)
              ci = ci_table[i];
              cs = 1.0 / sqrt(1.0 + ci * ci);
              ca = cs * ci;
+#if !CONFIG_FLOAT
              csa_table[i][0] = FIXHR(cs/4);
              csa_table[i][1] = FIXHR(ca/4);
              csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
              csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
-            csa_table_float[i][0] = cs;
-            csa_table_float[i][1] = ca;
-            csa_table_float[i][2] = ca + cs;
-            csa_table_float[i][3] = ca - cs;
+#else
+            csa_table[i][0] = cs;
+            csa_table[i][1] = ca;
+            csa_table[i][2] = ca + cs;
+            csa_table[i][3] = ca - cs;
+#endif
          }
  
          /* compute mdct windows */
@@ -466,183 +475,6 @@ static av_cold int decode_init(AVCodecContext * avctx)
      return 0;
  }
  
-
-#if CONFIG_FLOAT
-static inline float round_sample(float *sum)
-{
-    float sum1=*sum;
-    *sum = 0;
-    return sum1;
-}
-
-/* signed 16x16 -> 32 multiply add accumulate */
-#define MACS(rt, ra, rb) rt+=(ra)*(rb)
-
-/* signed 16x16 -> 32 multiply */
-#define MULS(ra, rb) ((ra)*(rb))
-
-#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
-
-#else
-
-static inline int round_sample(int64_t *sum)
-{
-    int sum1;
-    sum1 = (int)((*sum) >> OUT_SHIFT);
-    *sum &= (1<<OUT_SHIFT)-1;
-    return av_clip(sum1, OUT_MIN, OUT_MAX);
-}
-
-#   define MULS(ra, rb) MUL64(ra, rb)
-#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
-#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
-#endif
-
-#define SUM8(op, sum, w, p)               \
-{                                         \
-    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
-    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
-    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
-    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
-    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
-    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
-    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
-    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
-}
-
-#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
-{                                               \
-    INTFLOAT tmp;\
-    tmp = p[0 * 64];\
-    op1(sum1, (w1)[0 * 64], tmp);\
-    op2(sum2, (w2)[0 * 64], tmp);\
-    tmp = p[1 * 64];\
-    op1(sum1, (w1)[1 * 64], tmp);\
-    op2(sum2, (w2)[1 * 64], tmp);\
-    tmp = p[2 * 64];\
-    op1(sum1, (w1)[2 * 64], tmp);\
-    op2(sum2, (w2)[2 * 64], tmp);\
-    tmp = p[3 * 64];\
-    op1(sum1, (w1)[3 * 64], tmp);\
-    op2(sum2, (w2)[3 * 64], tmp);\
-    tmp = p[4 * 64];\
-    op1(sum1, (w1)[4 * 64], tmp);\
-    op2(sum2, (w2)[4 * 64], tmp);\
-    tmp = p[5 * 64];\
-    op1(sum1, (w1)[5 * 64], tmp);\
-    op2(sum2, (w2)[5 * 64], tmp);\
-    tmp = p[6 * 64];\
-    op1(sum1, (w1)[6 * 64], tmp);\
-    op2(sum2, (w2)[6 * 64], tmp);\
-    tmp = p[7 * 64];\
-    op1(sum1, (w1)[7 * 64], tmp);\
-    op2(sum2, (w2)[7 * 64], tmp);\
-}
-
-void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window)
-{
-    int i, j;
-
-    /* max = 18760, max sum over all 16 coefs : 44736 */
-    for(i=0;i<257;i++) {
-        INTFLOAT v;
-        v = ff_mpa_enwindow[i];
-#if CONFIG_FLOAT
-        v *= 1.0 / (1LL<<(16 + FRAC_BITS));
-#endif
-        window[i] = v;
-        if ((i & 63) != 0)
-            v = -v;
-        if (i != 0)
-            window[512 - i] = v;
-    }
-
-    // Needed for avoiding shuffles in ASM implementations
-    for(i=0; i < 8; i++)
-        for(j=0; j < 16; j++)
-            window[512+16*i+j] = window[64*i+32-j];
-
-    for(i=0; i < 8; i++)
-        for(j=0; j < 16; j++)
-            window[512+128+16*i+j] = window[64*i+48-j];
-}
-
-static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
-                               int *dither_state, OUT_INT *samples, int incr)
-{
-    register const MPA_INT *w, *w2, *p;
-    int j;
-    OUT_INT *samples2;
-#if CONFIG_FLOAT
-    float sum, sum2;
-#else
-    int64_t sum, sum2;
-#endif
-
-    /* copy to avoid wrap */
-    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
-
-    samples2 = samples + 31 * incr;
-    w = window;
-    w2 = window + 31;
-
-    sum = *dither_state;
-    p = synth_buf + 16;
-    SUM8(MACS, sum, w, p);
-    p = synth_buf + 48;
-    SUM8(MLSS, sum, w + 32, p);
-    *samples = round_sample(&sum);
-    samples += incr;
-    w++;
-
-    /* we calculate two samples at the same time to avoid one memory
-       access per two sample */
-    for(j=1;j<16;j++) {
-        sum2 = 0;
-        p = synth_buf + 16 + j;
-        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
-        p = synth_buf + 48 - j;
-        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
-
-        *samples = round_sample(&sum);
-        samples += incr;
-        sum += sum2;
-        *samples2 = round_sample(&sum);
-        samples2 -= incr;
-        w++;
-        w2--;
-    }
-
-    p = synth_buf + 32;
-    SUM8(MLSS, sum, w + 32, p);
-    *samples = round_sample(&sum);
-    *dither_state= sum;
-}
-
-
-/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
-   32 samples. */
-/* XXX: optimize by avoiding ring buffer usage */
-#if !CONFIG_FLOAT
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT])
-{
-    register MPA_INT *synth_buf;
-    int offset;
-
-    offset = *synth_buf_offset;
-    synth_buf = synth_buf_ptr + offset;
-
-    dct32(synth_buf, sb_samples);
-    apply_window_mp3_c(synth_buf, window, dither_state, samples, incr);
-
-    offset = (offset - 32) & 511;
-    *synth_buf_offset = offset;
-}
-#endif
-
  #define C3 FIXHR(0.86602540378443864676/2)
  
  /* 0.5 / cos(pi*(2*i+1)/36) */
@@ -1482,11 +1314,26 @@ static void compute_stereo(MPADecodeContext *s,
      }
  }
  
-#if !CONFIG_FLOAT
-static void compute_antialias_integer(MPADecodeContext *s,
-                              GranuleDef *g)
+#if CONFIG_FLOAT
+#define AA(j) do {                                                      \
+        float tmp0 = ptr[-1-j];                                         \
+        float tmp1 = ptr[   j];                                         \
+        ptr[-1-j] = tmp0 * csa_table[j][0] - tmp1 * csa_table[j][1];    \
+        ptr[   j] = tmp0 * csa_table[j][1] + tmp1 * csa_table[j][0];    \
+    } while (0)
+#else
+#define AA(j) do {                                              \
+        int tmp0 = ptr[-1-j];                                   \
+        int tmp1 = ptr[   j];                                   \
+        int tmp2 = MULH(tmp0 + tmp1, csa_table[j][0]);          \
+        ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa_table[j][2]));     \
+        ptr[   j] = 4*(tmp2 + MULH(tmp0, csa_table[j][3]));     \
+    } while (0)
+#endif
+
+static void compute_antialias(MPADecodeContext *s, GranuleDef *g)
  {
-    int32_t *ptr, *csa;
+    INTFLOAT *ptr;
      int n, i;
  
      /* we antialias only "long" bands */
@@ -1501,28 +1348,18 @@ static void compute_antialias_integer(MPADecodeContext *s,
  
      ptr = g->sb_hybrid + 18;
      for(i = n;i > 0;i--) {
-        int tmp0, tmp1, tmp2;
-        csa = &csa_table[0][0];
-#define INT_AA(j) \
-            tmp0 = ptr[-1-j];\
-            tmp1 = ptr[   j];\
-            tmp2= MULH(tmp0 + tmp1, csa[0+4*j]);\
-            ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa[2+4*j]));\
-            ptr[   j] = 4*(tmp2 + MULH(tmp0, csa[3+4*j]));
-
-        INT_AA(0)
-        INT_AA(1)
-        INT_AA(2)
-        INT_AA(3)
-        INT_AA(4)
-        INT_AA(5)
-        INT_AA(6)
-        INT_AA(7)
+        AA(0);
+        AA(1);
+        AA(2);
+        AA(3);
+        AA(4);
+        AA(5);
+        AA(6);
+        AA(7);
  
          ptr += 18;
      }
  }
-#endif
  
  static void compute_imdct(MPADecodeContext *s,
                            GranuleDef *g,
@@ -1618,7 +1455,7 @@ static void compute_imdct(MPADecodeContext *s,
  /* main layer3 decoding function */
  static int mp_decode_layer3(MPADecodeContext *s)
  {
-    int nb_granules, main_data_begin, private_bits;
+    int nb_granules, main_data_begin;
      int gr, ch, blocksplit_flag, i, j, k, n, bits_pos;
      GranuleDef *g;
      int16_t exponents[576]; //FIXME try INTFLOAT
@@ -1626,14 +1463,14 @@ static int mp_decode_layer3(MPADecodeContext *s)
      /* read side info */
      if (s->lsf) {
          main_data_begin = get_bits(&s->gb, 8);
-        private_bits = get_bits(&s->gb, s->nb_channels);
+        skip_bits(&s->gb, s->nb_channels);
          nb_granules = 1;
      } else {
          main_data_begin = get_bits(&s->gb, 9);
          if (s->nb_channels == 2)
-            private_bits = get_bits(&s->gb, 3);
+            skip_bits(&s->gb, 3);
          else
-            private_bits = get_bits(&s->gb, 5);
+            skip_bits(&s->gb, 5);
          nb_granules = 2;
          for(ch=0;ch<s->nb_channels;ch++) {
              s->granules[ch][0].scfsi = 0;/* all scale factors are transmitted */
@@ -1921,9 +1758,7 @@ static int mp_decode_frame(MPADecodeContext *s,
          samples_ptr = samples + ch;
          for(i=0;i<nb_frames;i++) {
              RENAME(ff_mpa_synth_filter)(
-#if CONFIG_FLOAT
-                         s,
-#endif
+                         &s->mpadsp,
                           s->synth_buf[ch], &(s->synth_buf_offset[ch]),
                           RENAME(ff_mpa_synth_window), &s->dither_state,
                           samples_ptr, s->nb_channels,