]> git.sesse.net Git - kdenlive/commitdiff
Working on FFT based correlation
authorSimon A. Eugster <simon.eu@gmail.com>
Mon, 20 Feb 2012 12:53:38 +0000 (13:53 +0100)
committerSimon A. Eugster <simon.eu@gmail.com>
Mon, 20 Feb 2012 12:53:38 +0000 (13:53 +0100)
Correlation for two samples of 3 min length are calculated in 2 ms
compared to 16 ms with O(n²) correlation.
Not the same result yet however ...

src/audioscopes/ffttools.cpp
src/lib/audio/CMakeLists.txt
src/lib/audio/audioEnvelope.cpp
src/lib/audio/audioEnvelope.h
src/lib/audio/fftCorrelation.cpp [new file with mode: 0644]
src/lib/audio/fftCorrelation.h [new file with mode: 0644]
src/lib/external/kiss_fft/CMakeLists.txt
testingArea/CMakeLists.txt
testingArea/correlation.cpp [new file with mode: 0644]

index 3d1ae83fb624dba7639e56deca026735dbedffc7..4c8ff8518dfbe4159262897912ee487e900e5d0a 100644 (file)
@@ -139,7 +139,7 @@ void FFTTools::fftNormalized(const QVector<int16_t> audioFrame, const uint chann
 #ifdef DEBUG_FFTTOOLS
         qDebug() << "Creating FFT configuration with size " << windowSize;
 #endif
-        myCfg = kiss_fftr_alloc(windowSize, 0,0,0);
+        myCfg = kiss_fftr_alloc(windowSize, false,NULL,NULL);
         m_fftCfgs.insert(cfgSig, myCfg);
     }
 
index bf5cd2b33fddadf3c1577cfbf4fc5d13ec8386d6..d9e0d67881eaeef72a6e654af0de96a50e42f683 100644 (file)
@@ -6,5 +6,6 @@ set(kdenlive_SRCS
     lib/audio/audioEnvelope.cpp
     lib/audio/audioInfo.cpp
     lib/audio/audioStreamInfo.cpp
+    lib/audio/fftCorrelation.cpp
     PARENT_SCOPE
 )
index aaad918cd8d51a22c3140c46d4a71d99d9b38613..403b9c6b98d76079da5ab3b034faf424fd6b1e85 100644 (file)
@@ -55,6 +55,10 @@ int AudioEnvelope::envelopeSize() const
 {
     return m_envelopeSize;
 }
+int64_t AudioEnvelope::maxValue() const
+{
+    return m_envelopeMax;
+}
 
 
 
index 7e12d72422174ca1b1d82ccbb9c4c9d40e83c510..058acec2cf525bec17f3e8733a7a5e82c7f09836 100644 (file)
@@ -32,6 +32,7 @@ public:
     /// Returns the envelope, calculates it if necessary.
     int64_t const* envelope();
     int envelopeSize() const;
+    int64_t maxValue() const;
 
     void loadEnvelope();
     int64_t loadStdDev();
diff --git a/src/lib/audio/fftCorrelation.cpp b/src/lib/audio/fftCorrelation.cpp
new file mode 100644 (file)
index 0000000..229d4ec
--- /dev/null
@@ -0,0 +1,57 @@
+#include "fftCorrelation.h"
+
+extern "C"
+{
+#include "../external/kiss_fft/tools/kiss_fftr.h"
+}
+
+#include <QTime>
+#include <QDebug>
+#include <algorithm>
+
+void FFTCorrelation::correlate(float *left, int leftSize, float *right, int rightSize,
+                          float **out_correlationData, int &out_size)
+{
+    QTime time;
+    time.start();
+
+    int largestSize = leftSize;
+    if (rightSize > largestSize) {
+        largestSize = rightSize;
+    }
+
+    int size = 64;
+    while (size/2 < largestSize) {
+        size = size << 1;
+    }
+
+    kiss_fftr_cfg fftConfig = kiss_fftr_alloc(size, false,NULL,NULL);
+    kiss_fftr_cfg ifftConfig = kiss_fftr_alloc(size, true, NULL,NULL);
+    kiss_fft_cpx leftFFT[size/2];
+    kiss_fft_cpx rightFFT[size/2];
+    kiss_fft_cpx correlatedFFT[size/2];
+
+
+    float leftData[size];
+    float rightData[size];
+    *out_correlationData = new float[size];
+
+    std::fill(leftData, leftData+size, 0);
+    std::fill(rightData, rightData+size, 0);
+
+    std::copy(left, left+leftSize, leftData);
+    std::copy(right, right+rightSize, rightData);
+
+    kiss_fftr(fftConfig, leftData, leftFFT);
+    kiss_fftr(fftConfig, rightData, rightFFT);
+
+    for (int i = 0; i < size/2; i++) {
+        correlatedFFT[i].r = leftFFT[i].r*rightFFT[i].r - leftFFT[i].i*rightFFT[i].i;
+        correlatedFFT[i].i = leftFFT[i].r*rightFFT[i].i + leftFFT[i].i*rightFFT[i].r;
+    }
+
+    kiss_fftri(ifftConfig, correlatedFFT, *out_correlationData);
+    out_size = size;
+
+    qDebug() << "FFT correlation computed. Time taken: " << time.elapsed() << " ms";
+}
diff --git a/src/lib/audio/fftCorrelation.h b/src/lib/audio/fftCorrelation.h
new file mode 100644 (file)
index 0000000..483faf3
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef FFTCORRELATION_H
+#define FFTCORRELATION_H
+
+class FFTCorrelation
+{
+public:
+    static void correlate(float *left, int leftSize, float *right, int rightSize, float **out, int &out_size);
+};
+
+#endif // FFTCORRELATION_H
index 9b9b06439659ed58bb5be6d4d1dfaaf81d73eee7..eff1f9b76cefa8124a62537f68db69d4b0fedca4 100644 (file)
@@ -5,3 +5,16 @@ set(kdenlive_SRCS
   lib/external/kiss_fft/tools/kiss_fftr.c
   PARENT_SCOPE
 )
+
+
+# Create a static library from kiss_fft
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --std=c99")
+include_directories(
+  ${CMAKE_BINARY_DIR}
+)
+add_library(kiss_fft STATIC
+  _kiss_fft_guts.h
+  kiss_fft.c
+  tools/kiss_fftr.c
+)
index 3e561dc871dca46686957b73d1e65f46f82135b3..d523fbcf8dda8c0e568e990d63d3faf31eb662a9 100644 (file)
@@ -2,8 +2,11 @@
 message(STATUS "Building experimental executables")
 
 include_directories(
+  ${CMAKE_BINARY_DIR}
   ${LIBMLT_INCLUDE_DIR}
   ${LIBMLTPLUS_INCLUDE_DIR}
+  ${PROJECT_SOURCE_DIR}/src/lib/extern/kiss_fft
+  ${PROJECT_SOURCE_DIR}/src/lib/extern/kiss_fft/tools
 )
 include(${QT_USE_FILE})
 
@@ -20,3 +23,17 @@ target_link_libraries(audioOffset
   ${LIBMLT_LIBRARY}
   ${LIBMLTPLUS_LIBRARY}
 )
+
+add_executable(correlation
+    correlation.cpp
+    ../src/lib/audio/audioInfo.cpp
+    ../src/lib/audio/audioStreamInfo.cpp
+    ../src/lib/audio/audioEnvelope.cpp
+    ../src/lib/audio/fftCorrelation.cpp
+)
+target_link_libraries(correlation
+  ${QT_LIBRARIES}
+  ${LIBMLT_LIBRARY}
+  ${LIBMLTPLUS_LIBRARY}
+  kiss_fft
+)
diff --git a/testingArea/correlation.cpp b/testingArea/correlation.cpp
new file mode 100644 (file)
index 0000000..a000f6b
--- /dev/null
@@ -0,0 +1,153 @@
+
+#include "../src/lib/audio/audioEnvelope.h"
+#include "../src/lib/audio/fftCorrelation.h"
+
+#include <QCoreApplication>
+#include <QStringList>
+#include <QImage>
+#include <QDebug>
+#include <iostream>
+
+void printUsage(const char *path)
+{
+    std::cout << "This executable takes two audio/video files A and B and determines " << std::endl
+              << "how much B needs to be shifted in order to be synchronized with A." << std::endl << std::endl
+              << path << " <main audio file> <second audio file>" << std::endl
+              << "\t-h, --help\n\t\tDisplay this help" << std::endl
+              << "\t--profile=<profile>\n\t\tUse the given profile for calculation (run: melt -query profiles)" << std::endl
+              << "\t--no-images\n\t\tDo not save envelope and correlation images" << std::endl
+                 ;
+}
+
+int main(int argc, char *argv[])
+{
+    QCoreApplication app(argc, argv);
+    QStringList args = app.arguments();
+    args.removeAt(0);
+
+    std::string profile = "atsc_1080p_24";
+    bool saveImages = true;
+
+    // Load arguments
+    foreach (QString str, args) {
+
+        if (str.startsWith("--profile=")) {
+            QString s = str;
+            s.remove(0, QString("--profile=").length());
+            profile = s.toStdString();
+            args.removeOne(str);
+
+        } else if (str == "-h" || str == "--help") {
+            printUsage(argv[0]);
+            return 0;
+
+        } else if (str == "--no-images") {
+            saveImages = false;
+            args.removeOne(str);
+        }
+
+    }
+
+    if (args.length() < 2) {
+        printUsage(argv[0]);
+        return 1;
+    }
+
+
+
+    std::string fileMain(args.at(0).toStdString());
+    args.removeFirst();
+    std::string fileSub = args.at(0).toStdString();
+    args.removeFirst();
+
+
+    qDebug() << "Unused arguments: " << args;
+
+
+    if (argc > 2) {
+        fileMain = argv[1];
+        fileSub = argv[2];
+    } else {
+        std::cout << "Usage: " << argv[0] << " <main audio file> <second audio file>" << std::endl;
+        return 0;
+    }
+    std::cout << "Trying to align (2)\n\t" << fileSub << "\nto fit on (1)\n\t" << fileMain
+              << "\n, result will indicate by how much (2) has to be moved." << std::endl
+              << "Profile used: " << profile << std::endl
+                 ;
+
+
+    // Initialize MLT
+    Mlt::Factory::init(NULL);
+
+    // Load an arbitrary profile
+    Mlt::Profile prof(profile.c_str());
+
+    // Load the MLT producers
+    Mlt::Producer prodMain(prof, fileMain.c_str());
+    if (!prodMain.is_valid()) {
+        std::cout << fileMain << " is invalid." << std::endl;
+        return 2;
+    }
+    Mlt::Producer prodSub(prof, fileSub.c_str());
+    if (!prodSub.is_valid()) {
+        std::cout << fileSub << " is invalid." << std::endl;
+        return 2;
+    }
+
+
+    // Build the audio envelopes for the correlation
+    AudioEnvelope *envelopeMain = new AudioEnvelope(&prodMain);
+    envelopeMain->loadEnvelope();
+    envelopeMain->loadStdDev();
+    envelopeMain->dumpInfo();
+
+    AudioEnvelope *envelopeSub = new AudioEnvelope(&prodSub);
+    envelopeSub->loadEnvelope();
+    envelopeSub->loadStdDev();
+    envelopeSub->dumpInfo();
+
+    int leftSize = envelopeMain->envelopeSize();
+    int rightSize = envelopeSub->envelopeSize();
+    float left[leftSize];
+    float right[rightSize];
+    const int64_t *leftEnv = envelopeMain->envelope();
+    const int64_t *rightEnv = envelopeSub->envelope();
+
+    for (int i = 0; i < leftSize; i++) {
+        left[i] = double(leftEnv[i])/envelopeMain->maxValue();
+        if (i < 20) std::cout << left[i] << " ";
+    }
+    std::cout << " (max: " << envelopeMain->maxValue() << ")" << std::endl;
+    for (int i = 0; i < rightSize; i++) {
+        right[i] = double(rightEnv[i])/envelopeSub->maxValue();
+    }
+
+    float *correlated;
+    int corrSize = 0;
+    FFTCorrelation::correlate(left, leftSize, right, rightSize, &correlated, corrSize);
+
+    qDebug() << "Correlated: Size " << corrSize;
+
+    float max = 0;
+    for (int i = 0; i < corrSize; i++) {
+        if (correlated[i] > max) {
+            max = correlated[i];
+        }
+    }
+    qDebug() << "Max correlation value is " << max;
+
+    QImage img(corrSize, 400, QImage::Format_ARGB32);
+    img.fill(qRgb(255,255,255));
+    for (int x = 0; x < corrSize; x++) {
+        float val = correlated[x]/max;
+        for (int y = 0; y < 400*val; y++) {
+            img.setPixel(x, 400-1-y, qRgb(50,50,50));
+        }
+    }
+    img.save("correlated-fft.png");
+
+
+    delete correlated;
+
+}