]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/flacdsp.c
v210: Add avx2 version of the 10-bit line encoder
[ffmpeg] / libavcodec / flacdsp.c
index fcee8e44c7775c1b8457770a82bf6889e1d68846..b9168693218aca5eecfdfe4d9813aa6777a32206 100644 (file)
 #include "libavutil/attributes.h"
 #include "libavutil/samplefmt.h"
 #include "flacdsp.h"
+#include "config.h"
 
 #define SAMPLE_SIZE 16
+#define PLANAR 0
+#include "flacdsp_template.c"
+#include "flacdsp_lpc_template.c"
+
+#undef  PLANAR
+#define PLANAR 1
 #include "flacdsp_template.c"
 
 #undef  SAMPLE_SIZE
+#undef  PLANAR
 #define SAMPLE_SIZE 32
+#define PLANAR 0
+#include "flacdsp_template.c"
+#include "flacdsp_lpc_template.c"
+
+#undef  PLANAR
+#define PLANAR 1
 #include "flacdsp_template.c"
 
 static void flac_lpc_16_c(int32_t *decoded, const int coeffs[32],
@@ -34,27 +48,26 @@ static void flac_lpc_16_c(int32_t *decoded, const int coeffs[32],
 {
     int i, j;
 
-    for (i = pred_order; i < len - 1; i += 2) {
-        int c;
-        int d = decoded[i-pred_order];
+    for (i = pred_order; i < len - 1; i += 2, decoded += 2) {
+        int c = coeffs[0];
+        int d = decoded[0];
         int s0 = 0, s1 = 0;
-        for (j = pred_order-1; j > 0; j--) {
-            c = coeffs[j];
+        for (j = 1; j < pred_order; j++) {
             s0 += c*d;
-            d = decoded[i-j];
+            d = decoded[j];
             s1 += c*d;
+            c = coeffs[j];
         }
-        c = coeffs[0];
         s0 += c*d;
-        d = decoded[i] += s0 >> qlevel;
+        d = decoded[j] += s0 >> qlevel;
         s1 += c*d;
-        decoded[i+1] += s1 >> qlevel;
+        decoded[j + 1] += s1 >> qlevel;
     }
     if (i < len) {
         int sum = 0;
         for (j = 0; j < pred_order; j++)
-            sum += coeffs[j] * decoded[i-j-1];
-        decoded[i] += sum >> qlevel;
+            sum += coeffs[j] * decoded[j];
+        decoded[j] += sum >> qlevel;
     }
 }
 
@@ -63,24 +76,39 @@ static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32],
 {
     int i, j;
 
-    for (i = pred_order; i < len; i++) {
+    for (i = pred_order; i < len; i++, decoded++) {
         int64_t sum = 0;
         for (j = 0; j < pred_order; j++)
-            sum += (int64_t)coeffs[j] * decoded[i-j-1];
-        decoded[i] += sum >> qlevel;
+            sum += (int64_t)coeffs[j] * decoded[j];
+        decoded[j] += sum >> qlevel;
     }
 
 }
 
-av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt)
+av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt,
+                             int bps)
 {
+    if (bps > 16) {
+        c->lpc            = flac_lpc_32_c;
+        c->lpc_encode     = flac_lpc_encode_c_32;
+    } else {
+        c->lpc            = flac_lpc_16_c;
+        c->lpc_encode     = flac_lpc_encode_c_16;
+    }
+
     switch (fmt) {
     case AV_SAMPLE_FMT_S32:
         c->decorrelate[0] = flac_decorrelate_indep_c_32;
         c->decorrelate[1] = flac_decorrelate_ls_c_32;
         c->decorrelate[2] = flac_decorrelate_rs_c_32;
         c->decorrelate[3] = flac_decorrelate_ms_c_32;
-        c->lpc            = flac_lpc_32_c;
+        break;
+
+    case AV_SAMPLE_FMT_S32P:
+        c->decorrelate[0] = flac_decorrelate_indep_c_32p;
+        c->decorrelate[1] = flac_decorrelate_ls_c_32p;
+        c->decorrelate[2] = flac_decorrelate_rs_c_32p;
+        c->decorrelate[3] = flac_decorrelate_ms_c_32p;
         break;
 
     case AV_SAMPLE_FMT_S16:
@@ -88,7 +116,16 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt)
         c->decorrelate[1] = flac_decorrelate_ls_c_16;
         c->decorrelate[2] = flac_decorrelate_rs_c_16;
         c->decorrelate[3] = flac_decorrelate_ms_c_16;
-        c->lpc            = flac_lpc_16_c;
+        break;
+
+    case AV_SAMPLE_FMT_S16P:
+        c->decorrelate[0] = flac_decorrelate_indep_c_16p;
+        c->decorrelate[1] = flac_decorrelate_ls_c_16p;
+        c->decorrelate[2] = flac_decorrelate_rs_c_16p;
+        c->decorrelate[3] = flac_decorrelate_ms_c_16p;
         break;
     }
+
+    if (ARCH_ARM)
+        ff_flacdsp_init_arm(c, fmt, bps);
 }