From fc658bb3f49618e4f83a88b48ea1bc06151b99b6 Mon Sep 17 00:00:00 2001 From: "Simon A. Eugster" Date: Mon, 20 Feb 2012 13:53:38 +0100 Subject: [PATCH] Working on FFT based correlation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Correlation for two samples of 3 min length are calculated in 2 ms compared to 16 ms with O(n²) correlation. Not the same result yet however ... --- src/audioscopes/ffttools.cpp | 2 +- src/lib/audio/CMakeLists.txt | 1 + src/lib/audio/audioEnvelope.cpp | 4 + src/lib/audio/audioEnvelope.h | 1 + src/lib/audio/fftCorrelation.cpp | 57 +++++++++ src/lib/audio/fftCorrelation.h | 10 ++ src/lib/external/kiss_fft/CMakeLists.txt | 13 ++ testingArea/CMakeLists.txt | 17 +++ testingArea/correlation.cpp | 153 +++++++++++++++++++++++ 9 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 src/lib/audio/fftCorrelation.cpp create mode 100644 src/lib/audio/fftCorrelation.h create mode 100644 testingArea/correlation.cpp diff --git a/src/audioscopes/ffttools.cpp b/src/audioscopes/ffttools.cpp index 3d1ae83f..4c8ff851 100644 --- a/src/audioscopes/ffttools.cpp +++ b/src/audioscopes/ffttools.cpp @@ -139,7 +139,7 @@ void FFTTools::fftNormalized(const QVector audioFrame, const uint chann #ifdef DEBUG_FFTTOOLS qDebug() << "Creating FFT configuration with size " << windowSize; #endif - myCfg = kiss_fftr_alloc(windowSize, 0,0,0); + myCfg = kiss_fftr_alloc(windowSize, false,NULL,NULL); m_fftCfgs.insert(cfgSig, myCfg); } diff --git a/src/lib/audio/CMakeLists.txt b/src/lib/audio/CMakeLists.txt index bf5cd2b3..d9e0d678 100644 --- a/src/lib/audio/CMakeLists.txt +++ b/src/lib/audio/CMakeLists.txt @@ -6,5 +6,6 @@ set(kdenlive_SRCS lib/audio/audioEnvelope.cpp lib/audio/audioInfo.cpp lib/audio/audioStreamInfo.cpp + lib/audio/fftCorrelation.cpp PARENT_SCOPE ) diff --git a/src/lib/audio/audioEnvelope.cpp b/src/lib/audio/audioEnvelope.cpp index aaad918c..403b9c6b 100644 --- a/src/lib/audio/audioEnvelope.cpp +++ b/src/lib/audio/audioEnvelope.cpp @@ -55,6 +55,10 @@ int AudioEnvelope::envelopeSize() const { return m_envelopeSize; } +int64_t AudioEnvelope::maxValue() const +{ + return m_envelopeMax; +} diff --git a/src/lib/audio/audioEnvelope.h b/src/lib/audio/audioEnvelope.h index 7e12d724..058acec2 100644 --- a/src/lib/audio/audioEnvelope.h +++ b/src/lib/audio/audioEnvelope.h @@ -32,6 +32,7 @@ public: /// Returns the envelope, calculates it if necessary. int64_t const* envelope(); int envelopeSize() const; + int64_t maxValue() const; void loadEnvelope(); int64_t loadStdDev(); diff --git a/src/lib/audio/fftCorrelation.cpp b/src/lib/audio/fftCorrelation.cpp new file mode 100644 index 00000000..229d4ec4 --- /dev/null +++ b/src/lib/audio/fftCorrelation.cpp @@ -0,0 +1,57 @@ +#include "fftCorrelation.h" + +extern "C" +{ +#include "../external/kiss_fft/tools/kiss_fftr.h" +} + +#include +#include +#include + +void FFTCorrelation::correlate(float *left, int leftSize, float *right, int rightSize, + float **out_correlationData, int &out_size) +{ + QTime time; + time.start(); + + int largestSize = leftSize; + if (rightSize > largestSize) { + largestSize = rightSize; + } + + int size = 64; + while (size/2 < largestSize) { + size = size << 1; + } + + kiss_fftr_cfg fftConfig = kiss_fftr_alloc(size, false,NULL,NULL); + kiss_fftr_cfg ifftConfig = kiss_fftr_alloc(size, true, NULL,NULL); + kiss_fft_cpx leftFFT[size/2]; + kiss_fft_cpx rightFFT[size/2]; + kiss_fft_cpx correlatedFFT[size/2]; + + + float leftData[size]; + float rightData[size]; + *out_correlationData = new float[size]; + + std::fill(leftData, leftData+size, 0); + std::fill(rightData, rightData+size, 0); + + std::copy(left, left+leftSize, leftData); + std::copy(right, right+rightSize, rightData); + + kiss_fftr(fftConfig, leftData, leftFFT); + kiss_fftr(fftConfig, rightData, rightFFT); + + for (int i = 0; i < size/2; i++) { + correlatedFFT[i].r = leftFFT[i].r*rightFFT[i].r - leftFFT[i].i*rightFFT[i].i; + correlatedFFT[i].i = leftFFT[i].r*rightFFT[i].i + leftFFT[i].i*rightFFT[i].r; + } + + kiss_fftri(ifftConfig, correlatedFFT, *out_correlationData); + out_size = size; + + qDebug() << "FFT correlation computed. Time taken: " << time.elapsed() << " ms"; +} diff --git a/src/lib/audio/fftCorrelation.h b/src/lib/audio/fftCorrelation.h new file mode 100644 index 00000000..483faf35 --- /dev/null +++ b/src/lib/audio/fftCorrelation.h @@ -0,0 +1,10 @@ +#ifndef FFTCORRELATION_H +#define FFTCORRELATION_H + +class FFTCorrelation +{ +public: + static void correlate(float *left, int leftSize, float *right, int rightSize, float **out, int &out_size); +}; + +#endif // FFTCORRELATION_H diff --git a/src/lib/external/kiss_fft/CMakeLists.txt b/src/lib/external/kiss_fft/CMakeLists.txt index 9b9b0643..eff1f9b7 100644 --- a/src/lib/external/kiss_fft/CMakeLists.txt +++ b/src/lib/external/kiss_fft/CMakeLists.txt @@ -5,3 +5,16 @@ set(kdenlive_SRCS lib/external/kiss_fft/tools/kiss_fftr.c PARENT_SCOPE ) + + +# Create a static library from kiss_fft + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --std=c99") +include_directories( + ${CMAKE_BINARY_DIR} +) +add_library(kiss_fft STATIC + _kiss_fft_guts.h + kiss_fft.c + tools/kiss_fftr.c +) diff --git a/testingArea/CMakeLists.txt b/testingArea/CMakeLists.txt index 3e561dc8..d523fbcf 100644 --- a/testingArea/CMakeLists.txt +++ b/testingArea/CMakeLists.txt @@ -2,8 +2,11 @@ message(STATUS "Building experimental executables") include_directories( + ${CMAKE_BINARY_DIR} ${LIBMLT_INCLUDE_DIR} ${LIBMLTPLUS_INCLUDE_DIR} + ${PROJECT_SOURCE_DIR}/src/lib/extern/kiss_fft + ${PROJECT_SOURCE_DIR}/src/lib/extern/kiss_fft/tools ) include(${QT_USE_FILE}) @@ -20,3 +23,17 @@ target_link_libraries(audioOffset ${LIBMLT_LIBRARY} ${LIBMLTPLUS_LIBRARY} ) + +add_executable(correlation + correlation.cpp + ../src/lib/audio/audioInfo.cpp + ../src/lib/audio/audioStreamInfo.cpp + ../src/lib/audio/audioEnvelope.cpp + ../src/lib/audio/fftCorrelation.cpp +) +target_link_libraries(correlation + ${QT_LIBRARIES} + ${LIBMLT_LIBRARY} + ${LIBMLTPLUS_LIBRARY} + kiss_fft +) diff --git a/testingArea/correlation.cpp b/testingArea/correlation.cpp new file mode 100644 index 00000000..a000f6bc --- /dev/null +++ b/testingArea/correlation.cpp @@ -0,0 +1,153 @@ + +#include "../src/lib/audio/audioEnvelope.h" +#include "../src/lib/audio/fftCorrelation.h" + +#include +#include +#include +#include +#include + +void printUsage(const char *path) +{ + std::cout << "This executable takes two audio/video files A and B and determines " << std::endl + << "how much B needs to be shifted in order to be synchronized with A." << std::endl << std::endl + << path << "
" << std::endl + << "\t-h, --help\n\t\tDisplay this help" << std::endl + << "\t--profile=\n\t\tUse the given profile for calculation (run: melt -query profiles)" << std::endl + << "\t--no-images\n\t\tDo not save envelope and correlation images" << std::endl + ; +} + +int main(int argc, char *argv[]) +{ + QCoreApplication app(argc, argv); + QStringList args = app.arguments(); + args.removeAt(0); + + std::string profile = "atsc_1080p_24"; + bool saveImages = true; + + // Load arguments + foreach (QString str, args) { + + if (str.startsWith("--profile=")) { + QString s = str; + s.remove(0, QString("--profile=").length()); + profile = s.toStdString(); + args.removeOne(str); + + } else if (str == "-h" || str == "--help") { + printUsage(argv[0]); + return 0; + + } else if (str == "--no-images") { + saveImages = false; + args.removeOne(str); + } + + } + + if (args.length() < 2) { + printUsage(argv[0]); + return 1; + } + + + + std::string fileMain(args.at(0).toStdString()); + args.removeFirst(); + std::string fileSub = args.at(0).toStdString(); + args.removeFirst(); + + + qDebug() << "Unused arguments: " << args; + + + if (argc > 2) { + fileMain = argv[1]; + fileSub = argv[2]; + } else { + std::cout << "Usage: " << argv[0] << "
" << std::endl; + return 0; + } + std::cout << "Trying to align (2)\n\t" << fileSub << "\nto fit on (1)\n\t" << fileMain + << "\n, result will indicate by how much (2) has to be moved." << std::endl + << "Profile used: " << profile << std::endl + ; + + + // Initialize MLT + Mlt::Factory::init(NULL); + + // Load an arbitrary profile + Mlt::Profile prof(profile.c_str()); + + // Load the MLT producers + Mlt::Producer prodMain(prof, fileMain.c_str()); + if (!prodMain.is_valid()) { + std::cout << fileMain << " is invalid." << std::endl; + return 2; + } + Mlt::Producer prodSub(prof, fileSub.c_str()); + if (!prodSub.is_valid()) { + std::cout << fileSub << " is invalid." << std::endl; + return 2; + } + + + // Build the audio envelopes for the correlation + AudioEnvelope *envelopeMain = new AudioEnvelope(&prodMain); + envelopeMain->loadEnvelope(); + envelopeMain->loadStdDev(); + envelopeMain->dumpInfo(); + + AudioEnvelope *envelopeSub = new AudioEnvelope(&prodSub); + envelopeSub->loadEnvelope(); + envelopeSub->loadStdDev(); + envelopeSub->dumpInfo(); + + int leftSize = envelopeMain->envelopeSize(); + int rightSize = envelopeSub->envelopeSize(); + float left[leftSize]; + float right[rightSize]; + const int64_t *leftEnv = envelopeMain->envelope(); + const int64_t *rightEnv = envelopeSub->envelope(); + + for (int i = 0; i < leftSize; i++) { + left[i] = double(leftEnv[i])/envelopeMain->maxValue(); + if (i < 20) std::cout << left[i] << " "; + } + std::cout << " (max: " << envelopeMain->maxValue() << ")" << std::endl; + for (int i = 0; i < rightSize; i++) { + right[i] = double(rightEnv[i])/envelopeSub->maxValue(); + } + + float *correlated; + int corrSize = 0; + FFTCorrelation::correlate(left, leftSize, right, rightSize, &correlated, corrSize); + + qDebug() << "Correlated: Size " << corrSize; + + float max = 0; + for (int i = 0; i < corrSize; i++) { + if (correlated[i] > max) { + max = correlated[i]; + } + } + qDebug() << "Max correlation value is " << max; + + QImage img(corrSize, 400, QImage::Format_ARGB32); + img.fill(qRgb(255,255,255)); + for (int x = 0; x < corrSize; x++) { + float val = correlated[x]/max; + for (int y = 0; y < 400*val; y++) { + img.setPixel(x, 400-1-y, qRgb(50,50,50)); + } + } + img.save("correlated-fft.png"); + + + delete correlated; + +} -- 2.39.2