Fix the fixed-point MDCT and FFT tests so that they actually compile and work.

[ffmpeg] / libavcodec / adpcm.c
diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c

index eb044ba4b9698e0d5838cf64ac0b257d4a159d98..ee2ce54ff1d9e79e8f7cb7885fb8e6c797794d2d 100644 (file)
--- a/libavcodec/adpcm.c
+++ b/libavcodec/adpcm.c
@@ -163,6 +163,7 @@ typedef struct ADPCMContext {
      TrellisPath *paths;
      TrellisNode *node_buf;
      TrellisNode **nodep_buf;
+    uint8_t *trellis_hash;
  } ADPCMContext;
  
  #define FREEZE_INTERVAL 128
@@ -189,6 +190,7 @@ static av_cold int adpcm_encode_init(AVCodecContext *avctx)
          FF_ALLOC_OR_GOTO(avctx, s->paths,     max_paths * sizeof(*s->paths), error);
          FF_ALLOC_OR_GOTO(avctx, s->node_buf,  2 * frontier * sizeof(*s->node_buf), error);
          FF_ALLOC_OR_GOTO(avctx, s->nodep_buf, 2 * frontier * sizeof(*s->nodep_buf), error);
+        FF_ALLOC_OR_GOTO(avctx, s->trellis_hash, 65536 * sizeof(*s->trellis_hash), error);
      }
  
      switch(avctx->codec->id) {
@@ -242,6 +244,7 @@ error:
      av_freep(&s->paths);
      av_freep(&s->node_buf);
      av_freep(&s->nodep_buf);
+    av_freep(&s->trellis_hash);
      return -1;
  }
  
@@ -252,6 +255,7 @@ static av_cold int adpcm_encode_close(AVCodecContext *avctx)
      av_freep(&s->paths);
      av_freep(&s->node_buf);
      av_freep(&s->nodep_buf);
+    av_freep(&s->trellis_hash);
  
      return 0;
  }
@@ -325,7 +329,9 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
      TrellisNode **nodep_buf = s->nodep_buf;
      TrellisNode **nodes = nodep_buf; // nodes[] is always sorted by .ssd
      TrellisNode **nodes_next = nodep_buf + frontier;
-    int pathn = 0, froze = -1, i, j, k;
+    int pathn = 0, froze = -1, i, j, k, generation = 0;
+    uint8_t *hash = s->trellis_hash;
+    memset(hash, 0xff, 65536 * sizeof(*hash));
  
      memset(nodep_buf, 0, 2 * frontier * sizeof(*nodep_buf));
      nodes[0] = node_buf + frontier;
@@ -352,9 +358,10 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
          TrellisNode *t = node_buf + frontier*(i&1);
          TrellisNode **u;
          int sample = samples[i*stride];
+        int heap_pos = 0;
          memset(nodes_next, 0, frontier*sizeof(TrellisNode*));
          for(j=0; j<frontier && nodes[j]; j++) {
-            // higher j have higher ssd already, so they're unlikely to use a suboptimal next sample too
+            // higher j have higher ssd already, so they're likely to yield a suboptimal next sample too
              const int range = (j < frontier/2) ? 1 : 0;
              const int step = nodes[j]->step;
              int nidx;
@@ -369,38 +376,64 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
  #define STORE_NODE(NAME, STEP_INDEX)\
                      int d;\
                      uint32_t ssd;\
+                    int pos;\
+                    TrellisNode *u;\
+                    uint8_t *h;\
                      dec_sample = av_clip_int16(dec_sample);\
                      d = sample - dec_sample;\
                      ssd = nodes[j]->ssd + d*d;\
-                    if(nodes_next[frontier-1] && ssd >= nodes_next[frontier-1]->ssd)\
-                        continue;\
+                    /* Check for wraparound, skip such samples completely. \
+                     * Note, changing ssd to a 64 bit variable would be \
+                     * simpler, avoiding this check, but it's slower on \
+                     * x86 32 bit at the moment. */\
+                    if (ssd < nodes[j]->ssd)\
+                        goto next_##NAME;\
                      /* Collapse any two states with the same previous sample value. \
                       * One could also distinguish states by step and by 2nd to last
-                     * sample, but the effects of that are negligible. */\
-                    for(k=0; k<frontier && nodes_next[k]; k++) {\
-                        if(dec_sample == nodes_next[k]->sample1) {\
-                            assert(ssd >= nodes_next[k]->ssd);\
+                     * sample, but the effects of that are negligible.
+                     * Since nodes in the previous generation are iterated
+                     * through a heap, they're roughly ordered from better to
+                     * worse, but not strictly ordered. Therefore, an earlier
+                     * node with the same sample value is better in most cases
+                     * (and thus the current is skipped), but not strictly
+                     * in all cases. Only skipping samples where ssd >=
+                     * ssd of the earlier node with the same sample gives
+                     * slightly worse quality, though, for some reason. */ \
+                    h = &hash[(uint16_t) dec_sample];\
+                    if (*h == generation)\
+                        goto next_##NAME;\
+                    if (heap_pos < frontier) {\
+                        pos = heap_pos++;\
+                    } else {\
+                        /* Try to replace one of the leaf nodes with the new \
+                         * one, but try a different slot each time. */\
+                        pos = (frontier >> 1) + (heap_pos & ((frontier >> 1) - 1));\
+                        if (ssd > nodes_next[pos]->ssd)\
                              goto next_##NAME;\
-                        }\
+                        heap_pos++;\
                      }\
-                    for(k=0; k<frontier; k++) {\
-                        if(!nodes_next[k] || ssd < nodes_next[k]->ssd) {\
-                            TrellisNode *u = nodes_next[frontier-1];\
-                            if(!u) {\
-                                assert(pathn < FREEZE_INTERVAL<<avctx->trellis);\
-                                u = t++;\
-                                u->path = pathn++;\
-                            }\
-                            u->ssd = ssd;\
-                            u->step = STEP_INDEX;\
-                            u->sample2 = nodes[j]->sample1;\
-                            u->sample1 = dec_sample;\
-                            paths[u->path].nibble = nibble;\
-                            paths[u->path].prev = nodes[j]->path;\
-                            memmove(&nodes_next[k+1], &nodes_next[k], (frontier-k-1)*sizeof(TrellisNode*));\
-                            nodes_next[k] = u;\
+                    *h = generation;\
+                    u = nodes_next[pos];\
+                    if(!u) {\
+                        assert(pathn < FREEZE_INTERVAL<<avctx->trellis);\
+                        u = t++;\
+                        nodes_next[pos] = u;\
+                        u->path = pathn++;\
+                    }\
+                    u->ssd = ssd;\
+                    u->step = STEP_INDEX;\
+                    u->sample2 = nodes[j]->sample1;\
+                    u->sample1 = dec_sample;\
+                    paths[u->path].nibble = nibble;\
+                    paths[u->path].prev = nodes[j]->path;\
+                    /* Sift the newly inserted node up in the heap to \
+                     * restore the heap property. */\
+                    while (pos > 0) {\
+                        int parent = (pos - 1) >> 1;\
+                        if (nodes_next[parent]->ssd <= ssd)\
                              break;\
-                        }\
+                        FFSWAP(TrellisNode*, nodes_next[parent], nodes_next[pos]);\
+                        pos = parent;\
                      }\
                      next_##NAME:;
                      STORE_NODE(ms, FFMAX(16, (AdaptationTable[nibble] * step) >> 8));
@@ -430,6 +463,12 @@ static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
          nodes = nodes_next;
          nodes_next = u;
  
+        generation++;
+        if (generation == 255) {
+            memset(hash, 0xff, 65536 * sizeof(*hash));
+            generation = 0;
+        }
+
          // prevent overflow
          if(nodes[0]->ssd > (1<<28)) {
              for(j=1; j<frontier && nodes[j]; j++)
@@ -578,6 +617,7 @@ static int adpcm_encode_frame(AVCodecContext *avctx,
              }
          }
  
+        flush_put_bits(&pb);
          dst += put_bits_count(&pb)>>3;
          break;
      }
@@ -721,6 +761,12 @@ static av_cold int adpcm_decode_init(AVCodecContext * avctx)
      case CODEC_ID_ADPCM_CT:
          c->status[0].step = c->status[1].step = 511;
          break;
+    case CODEC_ID_ADPCM_IMA_WAV:
+        if (avctx->bits_per_coded_sample != 4) {
+            av_log(avctx, AV_LOG_ERROR, "Only 4-bit ADPCM IMA WAV files are supported\n");
+            return -1;
+        }
+        break;
      case CODEC_ID_ADPCM_IMA_WS:
          if (avctx->extradata && avctx->extradata_size == 2 * 4) {
              c->status[0].predictor = AV_RL32(avctx->extradata);
@@ -730,7 +776,7 @@ static av_cold int adpcm_decode_init(AVCodecContext * avctx)
      default:
          break;
      }
-    avctx->sample_fmt = SAMPLE_FMT_S16;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
      return 0;
  }
  
@@ -1671,7 +1717,7 @@ AVCodec name ## _encoder = {                    \
      adpcm_encode_frame,                         \
      adpcm_encode_close,                         \
      NULL,                                       \
-    .sample_fmts = (const enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE}, \
+    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, \
      .long_name = NULL_IF_CONFIG_SMALL(long_name_), \
  };
  #else