From 753c45e28c73b1366b08ff846d27cba897881420 Mon Sep 17 00:00:00 2001 From: "Simon A. Eugster" Date: Tue, 14 Feb 2012 16:05:01 +0100 Subject: [PATCH] Audio alignment works. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is at the moment still a brute-force attack (correlation currently requires O(n²) time) but /works/ and is reasonnably fast for clips with reasonnable length (not too many minutes). Later the correlation can be replaced with an FFT version which would only require O(n log n) time. Example usage of the executable: $ ./audioOffset mainVideo.avi someSound.wav This will e.g. tell you that someSound.wav needs to be shifted by 42 frames in order to be aligned. The MLT profile can be specified with --profile=. --- testingArea/CMakeLists.txt | 2 +- testingArea/audioCorrelation.cpp | 109 +++++++++++++++++ testingArea/audioCorrelation.h | 29 +++++ testingArea/audioCorrelationInfo.cpp | 83 +++++++++++++ testingArea/audioCorrelationInfo.h | 32 +++++ testingArea/audioOffset.cpp | 171 ++++++++++++++++----------- 6 files changed, 353 insertions(+), 73 deletions(-) create mode 100644 testingArea/audioCorrelation.cpp create mode 100644 testingArea/audioCorrelation.h create mode 100644 testingArea/audioCorrelationInfo.cpp create mode 100644 testingArea/audioCorrelationInfo.h diff --git a/testingArea/CMakeLists.txt b/testingArea/CMakeLists.txt index da4c533e..340acf32 100644 --- a/testingArea/CMakeLists.txt +++ b/testingArea/CMakeLists.txt @@ -7,7 +7,7 @@ include_directories( ) include(${QT_USE_FILE}) -add_executable(audioOffset audioOffset.cpp audioInfo.cpp audioStreamInfo.cpp audioEnvelope.cpp) +add_executable(audioOffset audioOffset.cpp audioInfo.cpp audioStreamInfo.cpp audioEnvelope.cpp audioCorrelation.cpp audioCorrelationInfo.cpp) target_link_libraries(audioOffset ${QT_LIBRARIES} ${LIBMLT_LIBRARY} diff --git a/testingArea/audioCorrelation.cpp b/testingArea/audioCorrelation.cpp new file mode 100644 index 00000000..96adab6c --- /dev/null +++ b/testingArea/audioCorrelation.cpp @@ -0,0 +1,109 @@ +#include "audioCorrelation.h" + +#include +#include +#include + +AudioCorrelation::AudioCorrelation(AudioEnvelope *mainTrackEnvelope) : + m_mainTrackEnvelope(mainTrackEnvelope) +{ + m_mainTrackEnvelope->normalizeEnvelope(); +} + +AudioCorrelation::~AudioCorrelation() +{ +} + +int AudioCorrelation::addChild(AudioEnvelope *envelope) +{ + envelope->normalizeEnvelope(); + + const int sizeMain = m_mainTrackEnvelope->envelopeSize(); + const int sizeSub = envelope->envelopeSize(); + + + AudioCorrelationInfo *info = new AudioCorrelationInfo(sizeMain, sizeSub); + int64_t *correlation = info->correlationVector(); + + const int64_t *envMain = m_mainTrackEnvelope->envelope(); + const int64_t *envSub = envelope->envelope(); + int64_t const* left; + int64_t const* right; + int size; + int64_t sum; + int64_t max = 0; + + + /* + Correlation: + + SHIFT \in [-sS..sM] + + <--sS---- + [ sub ]----sM--->[ sub ] + [ main ] + + ^ correlation vector index = SHIFT + sS + + main is fixed, sub is shifted along main. + + */ + + + QTime t; + t.start(); + for (int shift = -sizeSub; shift <= sizeMain; shift++) { + + if (shift <= 0) { + left = envSub-shift; + right = envMain; + size = std::min(sizeSub+shift, sizeMain); + } else { + left = envSub; + right = envMain+shift; + size = std::min(sizeSub, sizeMain-shift); + } + + sum = 0; + for (int i = 0; i < size; i++) { + sum += (*left) * (*right); + left++; + right++; + } + correlation[sizeSub+shift] = std::abs(sum); + + if (sum > max) { + max = sum; + } + + } + info->setMax(max); + std::cout << "Correlation calculated. Time taken: " << t.elapsed() << " ms." << std::endl; + + + m_children.append(envelope); + m_correlations.append(info); + + Q_ASSERT(m_correlations.size() == m_children.size()); + + return m_children.indexOf(envelope); +} + +int AudioCorrelation::getShift(int childIndex) const +{ + Q_ASSERT(childIndex >= 0); + Q_ASSERT(childIndex < m_correlations.size()); + + int indexOffset = m_correlations.at(childIndex)->maxIndex(); + indexOffset -= m_children.at(childIndex)->envelopeSize(); + + return indexOffset; +} + +AudioCorrelationInfo const* AudioCorrelation::info(int childIndex) const +{ + Q_ASSERT(childIndex >= 0); + Q_ASSERT(childIndex < m_correlations.size()); + + return m_correlations.at(childIndex); +} diff --git a/testingArea/audioCorrelation.h b/testingArea/audioCorrelation.h new file mode 100644 index 00000000..89c53de3 --- /dev/null +++ b/testingArea/audioCorrelation.h @@ -0,0 +1,29 @@ +#ifndef AUDIOCORRELATION_H +#define AUDIOCORRELATION_H + +#include "audioCorrelationInfo.h" +#include "audioEnvelope.h" +#include + +class AudioCorrelationInfo; +class AudioCorrelation +{ +public: + AudioCorrelation(AudioEnvelope *mainTrackEnvelope); + ~AudioCorrelation(); + + int addChild(AudioEnvelope *envelope); +// int childIndex(AudioEnvelope *envelope) const; + + const AudioCorrelationInfo *info(int childIndex) const; + int getShift(int childIndex) const; + + +private: + AudioEnvelope *m_mainTrackEnvelope; + + QList m_children; + QList m_correlations; +}; + +#endif // AUDIOCORRELATION_H diff --git a/testingArea/audioCorrelationInfo.cpp b/testingArea/audioCorrelationInfo.cpp new file mode 100644 index 00000000..1d33aee1 --- /dev/null +++ b/testingArea/audioCorrelationInfo.cpp @@ -0,0 +1,83 @@ +#include "audioCorrelationInfo.h" +#include + +AudioCorrelationInfo::AudioCorrelationInfo(int mainSize, int subSize) : + m_mainSize(mainSize), + m_subSize(subSize), + m_max(-1) +{ + m_correlationVector = new int64_t[m_mainSize+m_subSize+1]; +} + +AudioCorrelationInfo::~AudioCorrelationInfo() +{ + delete m_correlationVector; +} + +int AudioCorrelationInfo::size() const +{ + return m_mainSize+m_subSize+1; +} + +void AudioCorrelationInfo::setMax(int64_t max) +{ + m_max = max; +} + +int64_t AudioCorrelationInfo::max() const +{ + Q_ASSERT(m_max > 0); + if (m_max <= 0) { + int width = size(); + int64_t max = 0; + for (int i = 0; i < width; i++) { + if (m_correlationVector[i] > max) { + max = m_correlationVector[i]; + } + } + Q_ASSERT(max > 0); + return max; + } + return m_max; +} + +int AudioCorrelationInfo::maxIndex() const +{ + int64_t max = 0; + int index = 0; + int width = size(); + + for (int i = 0; i < width; i++) { + if (m_correlationVector[i] > max) { + max = m_correlationVector[i]; + index = i; + } + } + + return index; +} + +int64_t* AudioCorrelationInfo::correlationVector() +{ + return m_correlationVector; +} + +QImage AudioCorrelationInfo::toImage(int height) const +{ + int width = size(); + int64_t maxVal = max(); + + QImage img(width, height, QImage::Format_ARGB32); + img.fill(qRgb(255,255,255)); + + int val; + + for (int x = 0; x < width; x++) { + val = m_correlationVector[x]/double(maxVal)*img.height(); + for (int y = img.height()-1; y > img.height() - val - 1; y--) { + img.setPixel(x, y, qRgb(50, 50, 50)); + } + } + + return img; +} diff --git a/testingArea/audioCorrelationInfo.h b/testingArea/audioCorrelationInfo.h new file mode 100644 index 00000000..ee2ea82d --- /dev/null +++ b/testingArea/audioCorrelationInfo.h @@ -0,0 +1,32 @@ +#ifndef AUDIOCORRELATIONINFO_H +#define AUDIOCORRELATIONINFO_H + +#include + +class AudioCorrelationInfo +{ +public: + AudioCorrelationInfo(int mainSize, int subSize); + ~AudioCorrelationInfo(); + + int size() const; + int64_t* correlationVector(); + int64_t const* correlationVector() const; + + int64_t max() const; + void setMax(int64_t max); ///< Can be set to avoid calculating the max again in this function + + int maxIndex() const; + + QImage toImage(int height = 400) const; + +private: + int m_mainSize; + int m_subSize; + + int64_t *m_correlationVector; + int64_t m_max; + +}; + +#endif // AUDIOCORRELATIONINFO_H diff --git a/testingArea/audioOffset.cpp b/testingArea/audioOffset.cpp index 30bb936d..842096aa 100644 --- a/testingArea/audioOffset.cpp +++ b/testingArea/audioOffset.cpp @@ -13,8 +13,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -23,11 +26,62 @@ #include "audioInfo.h" #include "audioStreamInfo.h" #include "audioEnvelope.h" +#include "audioCorrelation.h" + +void printUsage(const char *path) +{ + std::cout << "Usage: " << path << "
" << std::endl + << "\t-h, --help\tDisplay this help" << std::endl + << "\t--profile=\tUse the given profile for calculation (run: melt -query profiles)" << std::endl + << "\t--no-images\tDo not save envelope and correlation images" << std::endl + ; +} int main(int argc, char *argv[]) { - char *fileMain; - char *fileSub; + QCoreApplication app(argc, argv); + QStringList args = app.arguments(); + args.removeAt(0); + + std::string profile = "atsc_1080p_24"; + bool saveImages = true; + + // Load arguments + foreach (QString str, args) { + + if (str.startsWith("--profile=")) { + QString s = str; + s.remove(0, QString("--profile=").length()); + profile = s.toStdString(); + args.removeOne(str); + + } else if (str == "-h" || str == "--help") { + printUsage(argv[0]); + return 0; + + } else if (str == "--no-images") { + saveImages = false; + args.removeOne(str); + } + + } + + if (args.length() < 2) { + printUsage(argv[0]); + return 1; + } + + + + std::string fileMain(args.at(0).toStdString()); + args.removeFirst(); + std::string fileSub = args.at(0).toStdString(); + args.removeFirst(); + + + qDebug() << "Unused arguments: " << args; + + if (argc > 2) { fileMain = argv[1]; fileSub = argv[2]; @@ -35,106 +89,79 @@ int main(int argc, char *argv[]) std::cout << "Usage: " << argv[0] << "
" << std::endl; return 0; } - std::cout << "Trying to align (1)\n\t" << fileSub << "\nto fit on (2)\n\t" << fileMain - << "\n, result will indicate by how much (1) has to be moved." << std::endl; + std::cout << "Trying to align (2)\n\t" << fileSub << "\nto fit on (1)\n\t" << fileMain + << "\n, result will indicate by how much (2) has to be moved." << std::endl + << "Profile used: " << profile << std::endl + ; + // Initialize MLT Mlt::Factory::init(NULL); // Load an arbitrary profile - Mlt::Profile prof("hdv_1080_25p"); + Mlt::Profile prof(profile.c_str()); // Load the MLT producers - Mlt::Producer prodMain(prof, fileMain); + Mlt::Producer prodMain(prof, fileMain.c_str()); if (!prodMain.is_valid()) { std::cout << fileMain << " is invalid." << std::endl; return 2; } - Mlt::Producer prodSub(prof, fileSub); + Mlt::Producer prodSub(prof, fileSub.c_str()); if (!prodSub.is_valid()) { std::cout << fileSub << " is invalid." << std::endl; return 2; } + + // Build the audio envelopes for the correlation AudioEnvelope envelopeMain(&prodMain); envelopeMain.loadEnvelope(); envelopeMain.loadStdDev(); envelopeMain.dumpInfo(); - envelopeMain.normalizeEnvelope(); - envelopeMain.dumpInfo(); AudioEnvelope envelopeSub(&prodSub); envelopeSub.loadEnvelope(); - envelopeMain.normalizeEnvelope(); + envelopeSub.loadStdDev(); envelopeSub.dumpInfo(); - QString outImg = QString("envelope-%1.png") - .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss")); - envelopeMain.drawEnvelope().save(outImg); - std::cout << "Saved volume envelope as " - << QFileInfo(outImg).absoluteFilePath().toStdString() - << std::endl; - - - - const int sizeX = envelopeMain.envelopeSize(); - const int sizeY = envelopeSub.envelopeSize(); - int64_t correlation[sizeX + sizeY + 1]; - const int64_t *envX = envelopeMain.envelope(); - const int64_t *envY = envelopeSub.envelope(); - int64_t const* left; - int64_t const* right; - int size; - int64_t sum; - int64_t max = 0; - - QTime t; - t.start(); - for (int shift = -sizeX; shift <= sizeY; shift++) { - - if (shift <= 0) { - left = envX-shift; - right = envY; - size = std::min(sizeX+shift, sizeY); - } else { - left = envX; - right = envY+shift; - size = std::min(sizeX, sizeY-shift); - } - sum = 0; - for (int i = 0; i < size; i++) { - sum += (*left) * (*right); - left++; - right++; - } - correlation[sizeX+shift] = std::abs(sum); - std::cout << sum << " "; - if (sum > max) { - max = sum; - } - } - std::cout << "Correlation calculated. Time taken: " << t.elapsed() << " ms." << std::endl; - - int val; - QImage img(sizeX + sizeY + 1, 400, QImage::Format_ARGB32); - img.fill(qRgb(255,255,255)); - for (int x = 0; x < sizeX+sizeY+1; x++) { - val = correlation[x]/double(max)*img.height(); - for (int y = img.height()-1; y > img.height() - val - 1; y--) { - img.setPixel(x, y, qRgb(50, 50, 50)); - } - } - outImg = QString("correlation-%1.png") - .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss")); - img.save(outImg); - std::cout << "Saved volume envelope as " - << QFileInfo(outImg).absoluteFilePath().toStdString() - << std::endl; + // Calculate the correlation and hereby the audio shift + AudioCorrelation corr(&envelopeMain); + int index = corr.addChild(&envelopeSub); + + int shift = corr.getShift(index); + std::cout << fileSub << " should be shifted by " << shift << " frames" << std::endl + << "\trelative to " << fileMain << std::endl + << "\tin a " << prodMain.get_fps() << " fps profile (" << profile << ")." << std::endl + ; + + + if (saveImages) { + QString outImg; + outImg = QString("envelope-main-%1.png") + .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss")); + envelopeMain.drawEnvelope().save(outImg); + std::cout << "Saved volume envelope as " + << QFileInfo(outImg).absoluteFilePath().toStdString() + << std::endl; + outImg = QString("envelope-sub-%1.png") + .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss")); + envelopeSub.drawEnvelope().save(outImg); + std::cout << "Saved volume envelope as " + << QFileInfo(outImg).absoluteFilePath().toStdString() + << std::endl; + outImg = QString("correlation-%1.png") + .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss")); + corr.info(index)->toImage().save(outImg); + std::cout << "Saved correlation image as " + << QFileInfo(outImg).absoluteFilePath().toStdString() + << std::endl; + } return 0; -- 2.39.5