TrellisPath *paths;
TrellisNode *node_buf;
TrellisNode **nodep_buf;
+ uint8_t *trellis_hash;
} ADPCMContext;
#define FREEZE_INTERVAL 128
FF_ALLOC_OR_GOTO(avctx, s->paths, max_paths * sizeof(*s->paths), error);
FF_ALLOC_OR_GOTO(avctx, s->node_buf, 2 * frontier * sizeof(*s->node_buf), error);
FF_ALLOC_OR_GOTO(avctx, s->nodep_buf, 2 * frontier * sizeof(*s->nodep_buf), error);
+ FF_ALLOC_OR_GOTO(avctx, s->trellis_hash, 65536 * sizeof(*s->trellis_hash), error);
}
switch(avctx->codec->id) {
av_freep(&s->paths);
av_freep(&s->node_buf);
av_freep(&s->nodep_buf);
+ av_freep(&s->trellis_hash);
return -1;
}
av_freep(&s->paths);
av_freep(&s->node_buf);
av_freep(&s->nodep_buf);
+ av_freep(&s->trellis_hash);
return 0;
}
TrellisNode **nodep_buf = s->nodep_buf;
TrellisNode **nodes = nodep_buf; // nodes[] is always sorted by .ssd
TrellisNode **nodes_next = nodep_buf + frontier;
- int pathn = 0, froze = -1, i, j, k;
+ int pathn = 0, froze = -1, i, j, k, generation = 0;
+ uint8_t *hash = s->trellis_hash;
+ memset(hash, 0xff, 65536 * sizeof(*hash));
memset(nodep_buf, 0, 2 * frontier * sizeof(*nodep_buf));
nodes[0] = node_buf + frontier;
TrellisNode *t = node_buf + frontier*(i&1);
TrellisNode **u;
int sample = samples[i*stride];
+ int heap_pos = 0;
memset(nodes_next, 0, frontier*sizeof(TrellisNode*));
for(j=0; j<frontier && nodes[j]; j++) {
- // higher j have higher ssd already, so they're unlikely to use a suboptimal next sample too
+ // higher j have higher ssd already, so they're likely to yield a suboptimal next sample too
const int range = (j < frontier/2) ? 1 : 0;
const int step = nodes[j]->step;
int nidx;
#define STORE_NODE(NAME, STEP_INDEX)\
int d;\
uint32_t ssd;\
+ int pos;\
+ TrellisNode *u;\
+ uint8_t *h;\
dec_sample = av_clip_int16(dec_sample);\
d = sample - dec_sample;\
ssd = nodes[j]->ssd + d*d;\
- if(nodes_next[frontier-1] && ssd >= nodes_next[frontier-1]->ssd)\
- continue;\
+ /* Check for wraparound, skip such samples completely. \
+ * Note, changing ssd to a 64 bit variable would be \
+ * simpler, avoiding this check, but it's slower on \
+ * x86 32 bit at the moment. */\
+ if (ssd < nodes[j]->ssd)\
+ goto next_##NAME;\
/* Collapse any two states with the same previous sample value. \
* One could also distinguish states by step and by 2nd to last
- * sample, but the effects of that are negligible. */\
- for(k=0; k<frontier && nodes_next[k]; k++) {\
- if(dec_sample == nodes_next[k]->sample1) {\
- assert(ssd >= nodes_next[k]->ssd);\
+ * sample, but the effects of that are negligible.
+ * Since nodes in the previous generation are iterated
+ * through a heap, they're roughly ordered from better to
+ * worse, but not strictly ordered. Therefore, an earlier
+ * node with the same sample value is better in most cases
+ * (and thus the current is skipped), but not strictly
+ * in all cases. Only skipping samples where ssd >=
+ * ssd of the earlier node with the same sample gives
+ * slightly worse quality, though, for some reason. */ \
+ h = &hash[(uint16_t) dec_sample];\
+ if (*h == generation)\
+ goto next_##NAME;\
+ if (heap_pos < frontier) {\
+ pos = heap_pos++;\
+ } else {\
+ /* Try to replace one of the leaf nodes with the new \
+ * one, but try a different slot each time. */\
+ pos = (frontier >> 1) + (heap_pos & ((frontier >> 1) - 1));\
+ if (ssd > nodes_next[pos]->ssd)\
goto next_##NAME;\
- }\
+ heap_pos++;\
}\
- for(k=0; k<frontier; k++) {\
- if(!nodes_next[k] || ssd < nodes_next[k]->ssd) {\
- TrellisNode *u = nodes_next[frontier-1];\
- if(!u) {\
- assert(pathn < FREEZE_INTERVAL<<avctx->trellis);\
- u = t++;\
- u->path = pathn++;\
- }\
- u->ssd = ssd;\
- u->step = STEP_INDEX;\
- u->sample2 = nodes[j]->sample1;\
- u->sample1 = dec_sample;\
- paths[u->path].nibble = nibble;\
- paths[u->path].prev = nodes[j]->path;\
- memmove(&nodes_next[k+1], &nodes_next[k], (frontier-k-1)*sizeof(TrellisNode*));\
- nodes_next[k] = u;\
+ *h = generation;\
+ u = nodes_next[pos];\
+ if(!u) {\
+ assert(pathn < FREEZE_INTERVAL<<avctx->trellis);\
+ u = t++;\
+ nodes_next[pos] = u;\
+ u->path = pathn++;\
+ }\
+ u->ssd = ssd;\
+ u->step = STEP_INDEX;\
+ u->sample2 = nodes[j]->sample1;\
+ u->sample1 = dec_sample;\
+ paths[u->path].nibble = nibble;\
+ paths[u->path].prev = nodes[j]->path;\
+ /* Sift the newly inserted node up in the heap to \
+ * restore the heap property. */\
+ while (pos > 0) {\
+ int parent = (pos - 1) >> 1;\
+ if (nodes_next[parent]->ssd <= ssd)\
break;\
- }\
+ FFSWAP(TrellisNode*, nodes_next[parent], nodes_next[pos]);\
+ pos = parent;\
}\
next_##NAME:;
STORE_NODE(ms, FFMAX(16, (AdaptationTable[nibble] * step) >> 8));
nodes = nodes_next;
nodes_next = u;
+ generation++;
+ if (generation == 255) {
+ memset(hash, 0xff, 65536 * sizeof(*hash));
+ generation = 0;
+ }
+
// prevent overflow
if(nodes[0]->ssd > (1<<28)) {
for(j=1; j<frontier && nodes[j]; j++)
}
}
+ flush_put_bits(&pb);
dst += put_bits_count(&pb)>>3;
break;
}
case CODEC_ID_ADPCM_CT:
c->status[0].step = c->status[1].step = 511;
break;
+ case CODEC_ID_ADPCM_IMA_WAV:
+ if (avctx->bits_per_coded_sample != 4) {
+ av_log(avctx, AV_LOG_ERROR, "Only 4-bit ADPCM IMA WAV files are supported\n");
+ return -1;
+ }
+ break;
case CODEC_ID_ADPCM_IMA_WS:
if (avctx->extradata && avctx->extradata_size == 2 * 4) {
c->status[0].predictor = AV_RL32(avctx->extradata);
default:
break;
}
- avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->sample_fmt = AV_SAMPLE_FMT_S16;
return 0;
}
adpcm_encode_frame, \
adpcm_encode_close, \
NULL, \
- .sample_fmts = (const enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE}, \
+ .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, \
.long_name = NULL_IF_CONFIG_SMALL(long_name_), \
};
#else