sgidec: make compiler optimize away memcpy call in inner loop.

[ffmpeg] / libavcodec / nellymoserdec.c
diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c

index 04d966173a9d93d704c1963e58f6ad727ba3c146..2a1ec5b4cd7403e0ebb171bba62057f42d38a965 100644 (file)
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -41,14 +41,15 @@
  #include "fmtconvert.h"
  #include "sinewin.h"
  
-#define ALT_BITSTREAM_READER_LE
+#define BITSTREAM_READER_LE
  #include "get_bits.h"
  
  
  typedef struct NellyMoserDecodeContext {
      AVCodecContext* avctx;
+    AVFrame         frame;
      float          *float_buf;
-    float           state[NELLY_BUF_LEN];
+    DECLARE_ALIGNED(16, float, state)[NELLY_BUF_LEN];
      AVLFG           random_state;
      GetBitContext   gb;
      float           scale_bias;
@@ -58,23 +59,6 @@ typedef struct NellyMoserDecodeContext {
      DECLARE_ALIGNED(32, float, imdct_out)[NELLY_BUF_LEN * 2];
  } NellyMoserDecodeContext;
  
-static void overlap_and_window(NellyMoserDecodeContext *s, float *state, float *audio, float *a_in)
-{
-    int bot, top;
-
-    bot = 0;
-    top = NELLY_BUF_LEN-1;
-
-    while (bot < NELLY_BUF_LEN) {
-        audio[bot] = a_in [bot]*ff_sine_128[bot]
-                    +state[bot]*ff_sine_128[top];
-
-        bot++;
-        top--;
-    }
-    memcpy(state, a_in + NELLY_BUF_LEN, sizeof(float)*NELLY_BUF_LEN);
-}
-
  static void nelly_decode_block(NellyMoserDecodeContext *s,
                                 const unsigned char block[NELLY_BLOCK_LEN],
                                 float audio[NELLY_SAMPLES])
@@ -125,7 +109,9 @@ static void nelly_decode_block(NellyMoserDecodeContext *s,
          s->imdct_ctx.imdct_calc(&s->imdct_ctx, s->imdct_out, aptr);
          /* XXX: overlapping and windowing should be part of a more
             generic imdct function */
-        overlap_and_window(s, s->state, aptr, s->imdct_out);
+        s->dsp.vector_fmul_reverse(s->state, s->state, ff_sine_128, NELLY_BUF_LEN);
+        s->dsp.vector_fmul_add(aptr, s->imdct_out, ff_sine_128, s->state, NELLY_BUF_LEN);
+        memcpy(s->state, s->imdct_out + NELLY_BUF_LEN, sizeof(float)*NELLY_BUF_LEN);
      }
  }
  
@@ -157,34 +143,34 @@ static av_cold int decode_init(AVCodecContext * avctx) {
          ff_init_ff_sine_windows(7);
  
      avctx->channel_layout = AV_CH_LAYOUT_MONO;
+
+    avcodec_get_frame_defaults(&s->frame);
+    avctx->coded_frame = &s->frame;
+
      return 0;
  }
  
-static int decode_tag(AVCodecContext * avctx,
-                      void *data, int *data_size,
-                      AVPacket *avpkt) {
+static int decode_tag(AVCodecContext *avctx, void *data,
+                      int *got_frame_ptr, AVPacket *avpkt)
+{
      const uint8_t *buf = avpkt->data;
+    const uint8_t *side=av_packet_get_side_data(avpkt, 'F', NULL);
      int buf_size = avpkt->size;
      NellyMoserDecodeContext *s = avctx->priv_data;
-    int data_max = *data_size;
-    int blocks, i, block_size;
-    int16_t *samples_s16 = data;
-    float   *samples_flt = data;
-    *data_size = 0;
-
-    if (buf_size < avctx->block_align) {
-        return buf_size;
+    int blocks, i, ret;
+    int16_t *samples_s16;
+    float   *samples_flt;
+
+    blocks     = buf_size / NELLY_BLOCK_LEN;
+
+    if (blocks <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
+        return AVERROR_INVALIDDATA;
      }
  
      if (buf_size % NELLY_BLOCK_LEN) {
-        av_log(avctx, AV_LOG_ERROR, "Tag size %d.\n", buf_size);
-        return buf_size;
-    }
-    block_size = NELLY_SAMPLES * av_get_bytes_per_sample(avctx->sample_fmt);
-    blocks     = FFMIN(buf_size / NELLY_BLOCK_LEN, data_max / block_size);
-    if (blocks <= 0) {
-        av_log(avctx, AV_LOG_ERROR, "Output buffer is too small\n");
-        return AVERROR(EINVAL);
+        av_log(avctx, AV_LOG_WARNING, "Leftover bytes: %d.\n",
+               buf_size % NELLY_BLOCK_LEN);
      }
      /* Normal numbers of blocks for sample rates:
       *  8000 Hz - 1
@@ -193,9 +179,20 @@ static int decode_tag(AVCodecContext * avctx,
       * 22050 Hz - 4
       * 44100 Hz - 8
       */
+    if(side && blocks>1 && avctx->sample_rate%11025==0 && (1<<((side[0]>>2)&3)) == blocks)
+        avctx->sample_rate= 11025*(blocks/2);
+
+    /* get output buffer */
+    s->frame.nb_samples = NELLY_SAMPLES * blocks;
+    if ((ret = avctx->get_buffer(avctx, &s->frame)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        return ret;
+    }
+    samples_s16 = (int16_t *)s->frame.data[0];
+    samples_flt = (float   *)s->frame.data[0];
  
      for (i=0 ; i<blocks ; i++) {
-        if (avctx->sample_fmt == SAMPLE_FMT_FLT) {
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
              nelly_decode_block(s, buf, samples_flt);
              samples_flt += NELLY_SAMPLES;
          } else {
@@ -205,7 +202,9 @@ static int decode_tag(AVCodecContext * avctx,
          }
          buf += NELLY_BLOCK_LEN;
      }
-    *data_size = blocks * block_size;
+
+    *got_frame_ptr   = 1;
+    *(AVFrame *)data = s->frame;
  
      return buf_size;
  }
@@ -215,6 +214,7 @@ static av_cold int decode_end(AVCodecContext * avctx) {
  
      av_freep(&s->float_buf);
      ff_mdct_end(&s->imdct_ctx);
+
      return 0;
  }
  
@@ -226,6 +226,7 @@ AVCodec ff_nellymoser_decoder = {
      .init           = decode_init,
      .close          = decode_end,
      .decode         = decode_tag,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_PARAM_CHANGE,
      .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
      .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT,
                                                        AV_SAMPLE_FMT_S16,