From 69774d0fad6b1e7366ec0790849ad70397a56ee9 Mon Sep 17 00:00:00 2001 From: "Simon A. Eugster" Date: Mon, 20 Feb 2012 23:42:58 +0100 Subject: [PATCH] FFT based correlation works. Previous commit used convolution instead of correlation. --- src/lib/audio/audioEnvelope.cpp | 4 +- src/lib/audio/audioInfo.h | 18 ++-- src/lib/audio/audioStreamInfo.cpp | 18 ++-- src/lib/audio/audioStreamInfo.h | 18 ++-- src/lib/audio/fftCorrelation.cpp | 82 +++++++++++++++++-- src/lib/audio/fftCorrelation.h | 20 ++++- testingArea/CMakeLists.txt | 6 +- .../{correlation.cpp => audioOffsetFFT.cpp} | 68 +++++++++------ 8 files changed, 167 insertions(+), 67 deletions(-) rename testingArea/{correlation.cpp => audioOffsetFFT.cpp} (63%) diff --git a/src/lib/audio/audioEnvelope.cpp b/src/lib/audio/audioEnvelope.cpp index 403b9c6b..a7c02a2f 100644 --- a/src/lib/audio/audioEnvelope.cpp +++ b/src/lib/audio/audioEnvelope.cpp @@ -105,8 +105,8 @@ void AudioEnvelope::loadEnvelope() m_envelopeMax = sum; } - std::cout << position << "|" << m_producer->get_playtime() - << "-" << m_producer->get_in() << "+" << m_producer->get_out() << " "; +// std::cout << position << "|" << m_producer->get_playtime() +// << "-" << m_producer->get_in() << "+" << m_producer->get_out() << " "; delete frame; diff --git a/src/lib/audio/audioInfo.h b/src/lib/audio/audioInfo.h index 5a30fcdd..b1213d80 100644 --- a/src/lib/audio/audioInfo.h +++ b/src/lib/audio/audioInfo.h @@ -1,12 +1,12 @@ -/*************************************************************************** - * Copyright (C) 2012 by Simon Andreas Eugster (simon.eu@gmail.com) * - * This file is part of kdenlive. See www.kdenlive.org. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - ***************************************************************************/ +/* +Copyright (C) 2012 Simon A. Eugster (Granjow) +This file is part of kdenlive. See www.kdenlive.org. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +*/ #ifndef AUDIOINFO_H #define AUDIOINFO_H diff --git a/src/lib/audio/audioStreamInfo.cpp b/src/lib/audio/audioStreamInfo.cpp index c4706527..8fe64cf0 100644 --- a/src/lib/audio/audioStreamInfo.cpp +++ b/src/lib/audio/audioStreamInfo.cpp @@ -1,12 +1,12 @@ -/*************************************************************************** - * Copyright (C) 2012 by Simon Andreas Eugster (simon.eu@gmail.com) * - * This file is part of kdenlive. See www.kdenlive.org. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - ***************************************************************************/ +/* +Copyright (C) 2012 Simon A. Eugster (Granjow) +This file is part of kdenlive. See www.kdenlive.org. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +*/ #include "audioStreamInfo.h" diff --git a/src/lib/audio/audioStreamInfo.h b/src/lib/audio/audioStreamInfo.h index 868bb6cf..46c62bd2 100644 --- a/src/lib/audio/audioStreamInfo.h +++ b/src/lib/audio/audioStreamInfo.h @@ -1,12 +1,12 @@ -/*************************************************************************** - * Copyright (C) 2012 by Simon Andreas Eugster (simon.eu@gmail.com) * - * This file is part of kdenlive. See www.kdenlive.org. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License as published by * - * the Free Software Foundation; either version 2 of the License, or * - * (at your option) any later version. * - ***************************************************************************/ +/* +Copyright (C) 2012 Simon A. Eugster (Granjow) +This file is part of kdenlive. See www.kdenlive.org. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +*/ #ifndef AUDIOSTREAMINFO_H #define AUDIOSTREAMINFO_H diff --git a/src/lib/audio/fftCorrelation.cpp b/src/lib/audio/fftCorrelation.cpp index 229d4ec4..fa314096 100644 --- a/src/lib/audio/fftCorrelation.cpp +++ b/src/lib/audio/fftCorrelation.cpp @@ -1,3 +1,13 @@ +/* +Copyright (C) 2012 Simon A. Eugster (Granjow) +This file is part of kdenlive. See www.kdenlive.org. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +*/ + #include "fftCorrelation.h" extern "C" @@ -6,35 +16,86 @@ extern "C" } #include -#include +#include #include -void FFTCorrelation::correlate(float *left, int leftSize, float *right, int rightSize, - float **out_correlationData, int &out_size) +void FFTCorrelation::correlate(const int64_t *left, const int leftSize, + const int64_t *right, const int rightSize, + float **out_correlated, int &out_size) +{ + QTime t; + t.start(); + + float leftF[leftSize]; + float rightF[rightSize]; + + // First the int64_t values need to be normalized to floats + // Dividing by the max value is maybe not the best solution, but the + // maximum value after correlation should not be larger than the longest + // vector since each value should be at most 1 + int64_t maxLeft = 0; + int64_t maxRight = 0; + for (int i = 0; i < leftSize; i++) { + if (labs(left[i]) > maxLeft) { + maxLeft = labs(left[i]); + } + } + for (int i = 0; i < rightSize; i++) { + if (labs(right[i]) > maxRight) { + maxRight = labs(right[i]); + } + } + + + // One side needs to be reverted, since multiplication in frequency domain (fourier space) + // calculates the convolution: \sum l[x]r[N-x] and not the correlation: \sum l[x]r[x] + for (int i = 0; i < leftSize; i++) { + leftF[leftSize-1 - i] = double(left[i])/maxLeft; + } + for (int i = 0; i < rightSize; i++) { + rightF[i] = double(right[i])/maxRight; + } + + // Now we can convolve to get the correlation + convolute(leftF, leftSize, rightF, rightSize, out_correlated, out_size); + + std::cout << "Correlation (FFT based) computed in " << t.elapsed() << " ms." << std::endl; +} + +void FFTCorrelation::convolute(const float *left, const int leftSize, + const float *right, const int rightSize, + float **out_convolved, int &out_size) { QTime time; time.start(); + + // To avoid issues with repetition (we are dealing with cosine waves + // in the fourier domain) we need to pad the vectors to at least twice their size, + // otherwise convolution would convolve with the repeated pattern as well int largestSize = leftSize; if (rightSize > largestSize) { largestSize = rightSize; } + // The vectors must have the same size (same frequency resolution!) and should + // be a power of 2 (for FFT). int size = 64; while (size/2 < largestSize) { size = size << 1; } - kiss_fftr_cfg fftConfig = kiss_fftr_alloc(size, false,NULL,NULL); + kiss_fftr_cfg fftConfig = kiss_fftr_alloc(size, false, NULL,NULL); kiss_fftr_cfg ifftConfig = kiss_fftr_alloc(size, true, NULL,NULL); kiss_fft_cpx leftFFT[size/2]; kiss_fft_cpx rightFFT[size/2]; kiss_fft_cpx correlatedFFT[size/2]; + // Fill in the data into our new vectors with padding float leftData[size]; float rightData[size]; - *out_correlationData = new float[size]; + *out_convolved = new float[size]; std::fill(leftData, leftData+size, 0); std::fill(rightData, rightData+size, 0); @@ -42,16 +103,23 @@ void FFTCorrelation::correlate(float *left, int leftSize, float *right, int righ std::copy(left, left+leftSize, leftData); std::copy(right, right+rightSize, rightData); + // Fourier transformation of the vectors kiss_fftr(fftConfig, leftData, leftFFT); kiss_fftr(fftConfig, rightData, rightFFT); + // Convolution in spacial domain is a multiplication in fourier domain. O(n). for (int i = 0; i < size/2; i++) { correlatedFFT[i].r = leftFFT[i].r*rightFFT[i].r - leftFFT[i].i*rightFFT[i].i; correlatedFFT[i].i = leftFFT[i].r*rightFFT[i].i + leftFFT[i].i*rightFFT[i].r; } - kiss_fftri(ifftConfig, correlatedFFT, *out_correlationData); + // Inverse fourier tranformation to get the convolved data + kiss_fftri(ifftConfig, correlatedFFT, *out_convolved); out_size = size; - qDebug() << "FFT correlation computed. Time taken: " << time.elapsed() << " ms"; + // Finally some cleanup. + kiss_fftr_free(fftConfig); + kiss_fftr_free(ifftConfig); + + std::cout << "FFT convolution computed. Time taken: " << time.elapsed() << " ms" << std::endl; } diff --git a/src/lib/audio/fftCorrelation.h b/src/lib/audio/fftCorrelation.h index 483faf35..502b5067 100644 --- a/src/lib/audio/fftCorrelation.h +++ b/src/lib/audio/fftCorrelation.h @@ -1,10 +1,28 @@ +/* +Copyright (C) 2012 Simon A. Eugster (Granjow) +This file is part of kdenlive. See www.kdenlive.org. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +*/ + #ifndef FFTCORRELATION_H #define FFTCORRELATION_H +#include + class FFTCorrelation { public: - static void correlate(float *left, int leftSize, float *right, int rightSize, float **out, int &out_size); + static void convolute(const float *left, const int leftSize, + const float *right, const int rightSize, + float **out_convolved, int &out_size); + + static void correlate(const int64_t *left, const int leftSize, + const int64_t *right, const int rightSize, + float **out_correlated, int &out_size); }; #endif // FFTCORRELATION_H diff --git a/testingArea/CMakeLists.txt b/testingArea/CMakeLists.txt index d523fbcf..2efdb49f 100644 --- a/testingArea/CMakeLists.txt +++ b/testingArea/CMakeLists.txt @@ -24,14 +24,14 @@ target_link_libraries(audioOffset ${LIBMLTPLUS_LIBRARY} ) -add_executable(correlation - correlation.cpp +add_executable(audioOffsetFFT + audioOffsetFFT.cpp ../src/lib/audio/audioInfo.cpp ../src/lib/audio/audioStreamInfo.cpp ../src/lib/audio/audioEnvelope.cpp ../src/lib/audio/fftCorrelation.cpp ) -target_link_libraries(correlation +target_link_libraries(audioOffsetFFT ${QT_LIBRARIES} ${LIBMLT_LIBRARY} ${LIBMLTPLUS_LIBRARY} diff --git a/testingArea/correlation.cpp b/testingArea/audioOffsetFFT.cpp similarity index 63% rename from testingArea/correlation.cpp rename to testingArea/audioOffsetFFT.cpp index a000f6bc..98bd3f62 100644 --- a/testingArea/correlation.cpp +++ b/testingArea/audioOffsetFFT.cpp @@ -1,17 +1,30 @@ +/* +Copyright (C) 2012 Simon A. Eugster (Granjow) +This file is part of kdenlive. See www.kdenlive.org. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +*/ #include "../src/lib/audio/audioEnvelope.h" #include "../src/lib/audio/fftCorrelation.h" #include #include +#include #include #include #include +#include void printUsage(const char *path) { std::cout << "This executable takes two audio/video files A and B and determines " << std::endl - << "how much B needs to be shifted in order to be synchronized with A." << std::endl << std::endl + << "how much B needs to be shifted in order to be synchronized with A." << std::endl + << "Other than audioOffset this executable will use Fast Fourier Tranform " << std::endl + << "which should be faster especially for large files." << std::endl << std::endl << path << "
" << std::endl << "\t-h, --help\n\t\tDisplay this help" << std::endl << "\t--profile=\n\t\tUse the given profile for calculation (run: melt -query profiles)" << std::endl @@ -107,45 +120,46 @@ int main(int argc, char *argv[]) envelopeSub->loadStdDev(); envelopeSub->dumpInfo(); - int leftSize = envelopeMain->envelopeSize(); - int rightSize = envelopeSub->envelopeSize(); - float left[leftSize]; - float right[rightSize]; - const int64_t *leftEnv = envelopeMain->envelope(); - const int64_t *rightEnv = envelopeSub->envelope(); - - for (int i = 0; i < leftSize; i++) { - left[i] = double(leftEnv[i])/envelopeMain->maxValue(); - if (i < 20) std::cout << left[i] << " "; - } - std::cout << " (max: " << envelopeMain->maxValue() << ")" << std::endl; - for (int i = 0; i < rightSize; i++) { - right[i] = double(rightEnv[i])/envelopeSub->maxValue(); - } float *correlated; int corrSize = 0; - FFTCorrelation::correlate(left, leftSize, right, rightSize, &correlated, corrSize); - qDebug() << "Correlated: Size " << corrSize; + FFTCorrelation::correlate(envelopeMain->envelope(), envelopeMain->envelopeSize(), + envelopeSub->envelope(), envelopeSub->envelopeSize(), + &correlated, corrSize); + + int maxIndex = 0; float max = 0; for (int i = 0; i < corrSize; i++) { if (correlated[i] > max) { max = correlated[i]; + maxIndex = i; } } - qDebug() << "Max correlation value is " << max; - - QImage img(corrSize, 400, QImage::Format_ARGB32); - img.fill(qRgb(255,255,255)); - for (int x = 0; x < corrSize; x++) { - float val = correlated[x]/max; - for (int y = 0; y < 400*val; y++) { - img.setPixel(x, 400-1-y, qRgb(50,50,50)); + int shift = envelopeMain->envelopeSize() - maxIndex-1; + qDebug() << "Max correlation value is " << max << " at " << maxIndex; + qDebug() << "Will have to move by " << shift << " frames"; + + std::cout << fileSub << " should be shifted by " << shift << " frames" << std::endl + << "\trelative to " << fileMain << std::endl + << "\tin a " << prodMain.get_fps() << " fps profile (" << profile << ")." << std::endl + ; + + if (saveImages) { + QString filename = QString("correlation-fft-%1.png") + .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss")); + QImage img(corrSize/2, 400, QImage::Format_ARGB32); + img.fill(qRgb(255,255,255)); + for (int x = 0; x < img.width(); x++) { + float val = fabs(correlated[x]/max); + for (int y = 0; y < img.height()*val; y++) { + img.setPixel(x, img.height()-1-y, qRgb(50,50,50)); + } } + img.save(filename); + qDebug() << "Saved image to " << filename; } - img.save("correlated-fft.png"); delete correlated; -- 2.39.2