]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/aaccoder.c
Optimize top non_zero_count_cache init.
[ffmpeg] / libavcodec / aaccoder.c
index e5a7a8e7f84bb6ee8775acdfe4409aac7d159058..0a51aa7759a91ceab97e8367f7ee40cb235b4689 100644 (file)
@@ -184,7 +184,7 @@ static float quantize_band_cost(struct AACEncContext *s, const float *in,
                             curbits += 21;
                         } else {
                             int c = av_clip(quant(t, Q), 0, 8191);
-                            di = t - c*cbrt(c)*IQ;
+                            di = t - c*cbrtf(c)*IQ;
                             curbits += av_log2(c)*2 - 4 + 1;
                         }
                     } else {
@@ -299,7 +299,7 @@ static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
                             curbits += 21;
                         } else {
                             int c = av_clip(quant(t, Q), 0, 8191);
-                            di = t - c*cbrt(c)*IQ;
+                            di = t - c*cbrtf(c)*IQ;
                             curbits += av_log2(c)*2 - 4 + 1;
                         }
                     } else {
@@ -463,31 +463,36 @@ typedef struct TrellisPath {
     int max_val;
 } TrellisPath;
 
+#define TRELLIS_STAGES 121
+#define TRELLIS_STATES 256
+
 static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                                        SingleChannelElement *sce,
                                        const float lambda)
 {
     int q, w, w2, g, start = 0;
-    int i;
+    int i, j;
     int idx;
-    TrellisPath paths[256*121];
-    int bandaddr[121];
+    TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
+    int bandaddr[TRELLIS_STAGES];
     int minq;
     float mincost;
 
-    for (i = 0; i < 256; i++) {
-        paths[i].cost    = 0.0f;
-        paths[i].prev    = -1;
-        paths[i].min_val = i;
-        paths[i].max_val = i;
+    for (i = 0; i < TRELLIS_STATES; i++) {
+        paths[0][i].cost    = 0.0f;
+        paths[0][i].prev    = -1;
+        paths[0][i].min_val = i;
+        paths[0][i].max_val = i;
     }
-    for (i = 256; i < 256*121; i++) {
-        paths[i].cost    = INFINITY;
-        paths[i].prev    = -2;
-        paths[i].min_val = INT_MAX;
-        paths[i].max_val = 0;
+    for (j = 1; j < TRELLIS_STAGES; j++) {
+        for (i = 0; i < TRELLIS_STATES; i++) {
+            paths[j][i].cost    = INFINITY;
+            paths[j][i].prev    = -2;
+            paths[j][i].min_val = INT_MAX;
+            paths[j][i].max_val = 0;
+        }
     }
-    idx = 256;
+    idx = 1;
     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
         start = w*128;
@@ -496,7 +501,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
             float qmin, qmax;
             int nz = 0;
 
-            bandaddr[idx >> 8] = w * 16 + g;
+            bandaddr[idx] = w * 16 + g;
             qmin = INT_MAX;
             qmax = 0.0f;
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
@@ -536,66 +541,67 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                         dist = FFMIN(dist, dists[i]);
                     minrd = FFMIN(minrd, dist);
 
-                    for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, 256); i++) {
+                    for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
                         float cost;
                         int minv, maxv;
-                        if (isinf(paths[idx - 256 + i].cost))
+                        if (isinf(paths[idx - 1][i].cost))
                             continue;
-                        cost = paths[idx - 256 + i].cost + dist
+                        cost = paths[idx - 1][i].cost + dist
                                + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
-                        minv = FFMIN(paths[idx - 256 + i].min_val, q);
-                        maxv = FFMAX(paths[idx - 256 + i].max_val, q);
-                        if (cost < paths[idx + q].cost && maxv-minv < SCALE_MAX_DIFF) {
-                            paths[idx + q].cost    = cost;
-                            paths[idx + q].prev    = idx - 256 + i;
-                            paths[idx + q].min_val = minv;
-                            paths[idx + q].max_val = maxv;
+                        minv = FFMIN(paths[idx - 1][i].min_val, q);
+                        maxv = FFMAX(paths[idx - 1][i].max_val, q);
+                        if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
+                            paths[idx][q].cost    = cost;
+                            paths[idx][q].prev    = i;
+                            paths[idx][q].min_val = minv;
+                            paths[idx][q].max_val = maxv;
                         }
                     }
                 }
             } else {
-                for (q = 0; q < 256; q++) {
-                    if (!isinf(paths[idx - 256 + q].cost)) {
-                        paths[idx + q].cost = paths[idx - 256 + q].cost + 1;
-                        paths[idx + q].prev = idx - 256 + q;
-                        paths[idx + q].min_val = FFMIN(paths[idx - 256 + q].min_val, q);
-                        paths[idx + q].max_val = FFMAX(paths[idx - 256 + q].max_val, q);
+                for (q = 0; q < TRELLIS_STATES; q++) {
+                    if (!isinf(paths[idx - 1][q].cost)) {
+                        paths[idx][q].cost = paths[idx - 1][q].cost + 1;
+                        paths[idx][q].prev = q;
+                        paths[idx][q].min_val = FFMIN(paths[idx - 1][q].min_val, q);
+                        paths[idx][q].max_val = FFMAX(paths[idx - 1][q].max_val, q);
                         continue;
                     }
-                    for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, 256); i++) {
+                    for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
                         float cost;
                         int minv, maxv;
-                        if (isinf(paths[idx - 256 + i].cost))
+                        if (isinf(paths[idx - 1][i].cost))
                             continue;
-                        cost = paths[idx - 256 + i].cost + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
-                        minv = FFMIN(paths[idx - 256 + i].min_val, q);
-                        maxv = FFMAX(paths[idx - 256 + i].max_val, q);
-                        if (cost < paths[idx + q].cost && maxv-minv < SCALE_MAX_DIFF) {
-                            paths[idx + q].cost    = cost;
-                            paths[idx + q].prev    = idx - 256 + i;
-                            paths[idx + q].min_val = minv;
-                            paths[idx + q].max_val = maxv;
+                        cost = paths[idx - 1][i].cost + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
+                        minv = FFMIN(paths[idx - 1][i].min_val, q);
+                        maxv = FFMAX(paths[idx - 1][i].max_val, q);
+                        if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
+                            paths[idx][q].cost    = cost;
+                            paths[idx][q].prev    = i;
+                            paths[idx][q].min_val = minv;
+                            paths[idx][q].max_val = maxv;
                         }
                     }
                 }
             }
             sce->zeroes[w*16+g] = !nz;
             start += sce->ics.swb_sizes[g];
-            idx   += 256;
+            idx++;
         }
     }
-    idx -= 256;
-    mincost = paths[idx].cost;
-    minq    = idx;
-    for (i = 1; i < 256; i++) {
-        if (paths[idx + i].cost < mincost) {
-            mincost = paths[idx + i].cost;
-            minq = idx + i;
+    idx--;
+    mincost = paths[idx][0].cost;
+    minq    = 0;
+    for (i = 1; i < TRELLIS_STATES; i++) {
+        if (paths[idx][i].cost < mincost) {
+            mincost = paths[idx][i].cost;
+            minq = i;
         }
     }
-    while (minq >= 256) {
-        sce->sf_idx[bandaddr[minq>>8]] = minq & 0xFF;
-        minq = paths[minq].prev;
+    while (idx) {
+        sce->sf_idx[bandaddr[idx]] = minq;
+        minq = paths[idx][minq].prev;
+        idx--;
     }
     //set the same quantizers inside window groups
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])