+static av_always_inline int filter_fast_3320(APEPredictor *p,
+ const int decoded, const int filter,
+ const int delayA)
+{
+ int32_t predictionA;
+
+ p->buf[delayA] = p->lastA[filter];
+ if (p->sample_pos < 3) {
+ p->lastA[filter] = decoded;
+ p->filterA[filter] = decoded;
+ return decoded;
+ }
+
+ predictionA = p->buf[delayA] * 2 - p->buf[delayA - 1];
+ p->lastA[filter] = decoded + (predictionA * p->coeffsA[filter][0] >> 9);
+
+ if ((decoded ^ predictionA) > 0)
+ p->coeffsA[filter][0]++;
+ else
+ p->coeffsA[filter][0]--;
+
+ p->filterA[filter] += p->lastA[filter];
+
+ return p->filterA[filter];
+}
+
+static av_always_inline int filter_3800(APEPredictor *p,
+ const int decoded, const int filter,
+ const int delayA, const int delayB,
+ const int start, const int shift)
+{
+ int32_t predictionA, predictionB, sign;
+ int32_t d0, d1, d2, d3, d4;
+
+ p->buf[delayA] = p->lastA[filter];
+ p->buf[delayB] = p->filterB[filter];
+ if (p->sample_pos < start) {
+ predictionA = decoded + p->filterA[filter];
+ p->lastA[filter] = decoded;
+ p->filterB[filter] = decoded;
+ p->filterA[filter] = predictionA;
+ return predictionA;
+ }
+ d2 = p->buf[delayA];
+ d1 = (p->buf[delayA] - p->buf[delayA - 1]) << 1;
+ d0 = p->buf[delayA] + ((p->buf[delayA - 2] - p->buf[delayA - 1]) << 3);
+ d3 = p->buf[delayB] * 2 - p->buf[delayB - 1];
+ d4 = p->buf[delayB];
+
+ predictionA = d0 * p->coeffsA[filter][0] +
+ d1 * p->coeffsA[filter][1] +
+ d2 * p->coeffsA[filter][2];
+
+ sign = APESIGN(decoded);
+ p->coeffsA[filter][0] += (((d0 >> 30) & 2) - 1) * sign;
+ p->coeffsA[filter][1] += (((d1 >> 28) & 8) - 4) * sign;
+ p->coeffsA[filter][2] += (((d2 >> 28) & 8) - 4) * sign;
+
+ predictionB = d3 * p->coeffsB[filter][0] -
+ d4 * p->coeffsB[filter][1];
+ p->lastA[filter] = decoded + (predictionA >> 11);
+ sign = APESIGN(p->lastA[filter]);
+ p->coeffsB[filter][0] += (((d3 >> 29) & 4) - 2) * sign;
+ p->coeffsB[filter][1] -= (((d4 >> 30) & 2) - 1) * sign;
+
+ p->filterB[filter] = p->lastA[filter] + (predictionB >> shift);
+ p->filterA[filter] = p->filterB[filter] + ((p->filterA[filter] * 31) >> 5);
+
+ return p->filterA[filter];
+}
+
+static void long_filter_high_3800(int32_t *buffer, int order, int shift,
+ int32_t *coeffs, int32_t *delay, int length)
+{
+ int i, j;
+ int32_t dotprod, sign;
+
+ memset(coeffs, 0, order * sizeof(*coeffs));
+ for (i = 0; i < order; i++)
+ delay[i] = buffer[i];
+ for (i = order; i < length; i++) {
+ dotprod = 0;
+ sign = APESIGN(buffer[i]);
+ for (j = 0; j < order; j++) {
+ dotprod += delay[j] * coeffs[j];
+ coeffs[j] -= (((delay[j] >> 30) & 2) - 1) * sign;
+ }
+ buffer[i] -= dotprod >> shift;
+ for (j = 0; j < order - 1; j++)
+ delay[j] = delay[j + 1];
+ delay[order - 1] = buffer[i];
+ }
+}
+
+static void long_filter_ehigh_3830(int32_t *buffer, int length)
+{
+ int i, j;
+ int32_t dotprod, sign;
+ int32_t coeffs[8], delay[8];
+
+ memset(coeffs, 0, sizeof(coeffs));
+ memset(delay, 0, sizeof(delay));
+ for (i = 0; i < length; i++) {
+ dotprod = 0;
+ sign = APESIGN(buffer[i]);
+ for (j = 7; j >= 0; j--) {
+ dotprod += delay[j] * coeffs[j];
+ coeffs[j] -= (((delay[j] >> 30) & 2) - 1) * sign;
+ }
+ for (j = 7; j > 0; j--)
+ delay[j] = delay[j - 1];
+ delay[0] = buffer[i];
+ buffer[i] -= dotprod >> 9;
+ }
+}
+
+static void predictor_decode_stereo_3800(APEContext *ctx, int count)
+{
+ APEPredictor *p = &ctx->predictor;
+ int32_t *decoded0 = ctx->decoded[0];
+ int32_t *decoded1 = ctx->decoded[1];
+ int32_t coeffs[256], delay[256];
+ int start = 4, shift = 10;
+
+ if (ctx->compression_level == COMPRESSION_LEVEL_HIGH) {
+ start = 16;
+ long_filter_high_3800(decoded0, 16, 9, coeffs, delay, count);
+ long_filter_high_3800(decoded1, 16, 9, coeffs, delay, count);
+ } else if (ctx->compression_level == COMPRESSION_LEVEL_EXTRA_HIGH) {
+ int order = 128, shift2 = 11;
+
+ if (ctx->fileversion >= 3830) {
+ order <<= 1;
+ shift++;
+ shift2++;
+ long_filter_ehigh_3830(decoded0 + order, count - order);
+ long_filter_ehigh_3830(decoded1 + order, count - order);
+ }
+ start = order;
+ long_filter_high_3800(decoded0, order, shift2, coeffs, delay, count);
+ long_filter_high_3800(decoded1, order, shift2, coeffs, delay, count);
+ }
+
+ while (count--) {
+ int X = *decoded0, Y = *decoded1;
+ if (ctx->compression_level == COMPRESSION_LEVEL_FAST) {
+ *decoded0 = filter_fast_3320(p, Y, 0, YDELAYA);
+ decoded0++;
+ *decoded1 = filter_fast_3320(p, X, 1, XDELAYA);
+ decoded1++;
+ } else {
+ *decoded0 = filter_3800(p, Y, 0, YDELAYA, YDELAYB,
+ start, shift);
+ decoded0++;
+ *decoded1 = filter_3800(p, X, 1, XDELAYA, XDELAYB,
+ start, shift);
+ decoded1++;
+ }
+
+ /* Combined */
+ p->buf++;
+ p->sample_pos++;
+
+ /* Have we filled the history buffer? */
+ if (p->buf == p->historybuffer + HISTORY_SIZE) {
+ memmove(p->historybuffer, p->buf,
+ PREDICTOR_SIZE * sizeof(*p->historybuffer));
+ p->buf = p->historybuffer;
+ }
+ }
+}
+
+static void predictor_decode_mono_3800(APEContext *ctx, int count)
+{
+ APEPredictor *p = &ctx->predictor;
+ int32_t *decoded0 = ctx->decoded[0];
+ int32_t coeffs[256], delay[256];
+ int start = 4, shift = 10;
+
+ if (ctx->compression_level == COMPRESSION_LEVEL_HIGH) {
+ start = 16;
+ long_filter_high_3800(decoded0, 16, 9, coeffs, delay, count);
+ } else if (ctx->compression_level == COMPRESSION_LEVEL_EXTRA_HIGH) {
+ int order = 128, shift2 = 11;
+
+ if (ctx->fileversion >= 3830) {
+ order <<= 1;
+ shift++;
+ shift2++;
+ long_filter_ehigh_3830(decoded0 + order, count - order);
+ }
+ start = order;
+ long_filter_high_3800(decoded0, order, shift2, coeffs, delay, count);
+ }
+
+ while (count--) {
+ if (ctx->compression_level == COMPRESSION_LEVEL_FAST) {
+ *decoded0 = filter_fast_3320(p, *decoded0, 0, YDELAYA);
+ decoded0++;
+ } else {
+ *decoded0 = filter_3800(p, *decoded0, 0, YDELAYA, YDELAYB,
+ start, shift);
+ decoded0++;
+ }
+
+ /* Combined */
+ p->buf++;
+ p->sample_pos++;
+
+ /* Have we filled the history buffer? */
+ if (p->buf == p->historybuffer + HISTORY_SIZE) {
+ memmove(p->historybuffer, p->buf,
+ PREDICTOR_SIZE * sizeof(*p->historybuffer));
+ p->buf = p->historybuffer;
+ }
+ }
+}
+
+static av_always_inline int predictor_update_3930(APEPredictor *p,
+ const int decoded, const int filter,
+ const int delayA)
+{
+ int32_t predictionA, sign;
+ int32_t d0, d1, d2, d3;
+
+ p->buf[delayA] = p->lastA[filter];
+ d0 = p->buf[delayA ];
+ d1 = p->buf[delayA ] - p->buf[delayA - 1];
+ d2 = p->buf[delayA - 1] - p->buf[delayA - 2];
+ d3 = p->buf[delayA - 2] - p->buf[delayA - 3];
+
+ predictionA = d0 * p->coeffsA[filter][0] +
+ d1 * p->coeffsA[filter][1] +
+ d2 * p->coeffsA[filter][2] +
+ d3 * p->coeffsA[filter][3];
+
+ p->lastA[filter] = decoded + (predictionA >> 9);
+ p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5);
+
+ sign = APESIGN(decoded);
+ p->coeffsA[filter][0] += ((d0 < 0) * 2 - 1) * sign;
+ p->coeffsA[filter][1] += ((d1 < 0) * 2 - 1) * sign;
+ p->coeffsA[filter][2] += ((d2 < 0) * 2 - 1) * sign;
+ p->coeffsA[filter][3] += ((d3 < 0) * 2 - 1) * sign;
+
+ return p->filterA[filter];
+}
+
+static void predictor_decode_stereo_3930(APEContext *ctx, int count)
+{
+ APEPredictor *p = &ctx->predictor;
+ int32_t *decoded0 = ctx->decoded[0];
+ int32_t *decoded1 = ctx->decoded[1];
+
+ ape_apply_filters(ctx, ctx->decoded[0], ctx->decoded[1], count);
+
+ while (count--) {
+ /* Predictor Y */
+ int Y = *decoded1, X = *decoded0;
+ *decoded0 = predictor_update_3930(p, Y, 0, YDELAYA);
+ decoded0++;
+ *decoded1 = predictor_update_3930(p, X, 1, XDELAYA);
+ decoded1++;
+
+ /* Combined */
+ p->buf++;
+
+ /* Have we filled the history buffer? */
+ if (p->buf == p->historybuffer + HISTORY_SIZE) {
+ memmove(p->historybuffer, p->buf,
+ PREDICTOR_SIZE * sizeof(*p->historybuffer));
+ p->buf = p->historybuffer;
+ }
+ }
+}
+
+static void predictor_decode_mono_3930(APEContext *ctx, int count)
+{
+ APEPredictor *p = &ctx->predictor;
+ int32_t *decoded0 = ctx->decoded[0];
+
+ ape_apply_filters(ctx, ctx->decoded[0], NULL, count);
+
+ while (count--) {
+ *decoded0 = predictor_update_3930(p, *decoded0, 0, YDELAYA);
+ decoded0++;
+
+ p->buf++;
+
+ /* Have we filled the history buffer? */
+ if (p->buf == p->historybuffer + HISTORY_SIZE) {
+ memmove(p->historybuffer, p->buf,
+ PREDICTOR_SIZE * sizeof(*p->historybuffer));
+ p->buf = p->historybuffer;
+ }
+ }
+}
+
+static av_always_inline int predictor_update_filter(APEPredictor *p,
+ const int decoded, const int filter,
+ const int delayA, const int delayB,
+ const int adaptA, const int adaptB)