]> git.sesse.net Git - kdenlive/commitdiff
Audio alignment works.
authorSimon A. Eugster <simon.eu@gmail.com>
Tue, 14 Feb 2012 15:05:01 +0000 (16:05 +0100)
committerSimon A. Eugster <simon.eu@gmail.com>
Tue, 14 Feb 2012 15:05:01 +0000 (16:05 +0100)
This is at the moment still a brute-force attack (correlation
currently requires O(n²) time) but /works/ and is reasonnably
fast for clips with reasonnable length (not too many minutes).

Later the correlation can be replaced with an FFT version which
would only require O(n log n) time.

Example usage of the executable:
$ ./audioOffset mainVideo.avi someSound.wav

This will e.g. tell you that someSound.wav needs to be shifted
by 42 frames in order to be aligned. The MLT profile can be
specified with --profile=<mlt profile>.

testingArea/CMakeLists.txt
testingArea/audioCorrelation.cpp [new file with mode: 0644]
testingArea/audioCorrelation.h [new file with mode: 0644]
testingArea/audioCorrelationInfo.cpp [new file with mode: 0644]
testingArea/audioCorrelationInfo.h [new file with mode: 0644]
testingArea/audioOffset.cpp

index da4c533eab9606a25aad736fade7145c5a4bc497..340acf321bbdf2efe75c410fd8c07931da3f99f9 100644 (file)
@@ -7,7 +7,7 @@ include_directories(
 )
 include(${QT_USE_FILE})
 
-add_executable(audioOffset  audioOffset.cpp audioInfo.cpp audioStreamInfo.cpp audioEnvelope.cpp)
+add_executable(audioOffset  audioOffset.cpp audioInfo.cpp audioStreamInfo.cpp audioEnvelope.cpp audioCorrelation.cpp audioCorrelationInfo.cpp)
 target_link_libraries(audioOffset 
   ${QT_LIBRARIES}
   ${LIBMLT_LIBRARY}
diff --git a/testingArea/audioCorrelation.cpp b/testingArea/audioCorrelation.cpp
new file mode 100644 (file)
index 0000000..96adab6
--- /dev/null
@@ -0,0 +1,109 @@
+#include "audioCorrelation.h"
+
+#include <QTime>
+#include <cmath>
+#include <iostream>
+
+AudioCorrelation::AudioCorrelation(AudioEnvelope *mainTrackEnvelope) :
+    m_mainTrackEnvelope(mainTrackEnvelope)
+{
+    m_mainTrackEnvelope->normalizeEnvelope();
+}
+
+AudioCorrelation::~AudioCorrelation()
+{
+}
+
+int AudioCorrelation::addChild(AudioEnvelope *envelope)
+{
+    envelope->normalizeEnvelope();
+
+    const int sizeMain = m_mainTrackEnvelope->envelopeSize();
+    const int sizeSub = envelope->envelopeSize();
+
+
+    AudioCorrelationInfo *info = new AudioCorrelationInfo(sizeMain, sizeSub);
+    int64_t *correlation = info->correlationVector();
+
+    const int64_t *envMain = m_mainTrackEnvelope->envelope();
+    const int64_t *envSub = envelope->envelope();
+    int64_t const* left;
+    int64_t const* right;
+    int size;
+    int64_t sum;
+    int64_t max = 0;
+
+
+    /*
+      Correlation:
+
+      SHIFT \in [-sS..sM]
+
+      <--sS----
+      [  sub  ]----sM--->[ sub ]
+               [  main  ]
+
+            ^ correlation vector index = SHIFT + sS
+
+      main is fixed, sub is shifted along main.
+
+    */
+
+
+    QTime t;
+    t.start();
+    for (int shift = -sizeSub; shift <= sizeMain; shift++) {
+
+        if (shift <= 0) {
+            left = envSub-shift;
+            right = envMain;
+            size = std::min(sizeSub+shift, sizeMain);
+        } else {
+            left = envSub;
+            right = envMain+shift;
+            size = std::min(sizeSub, sizeMain-shift);
+        }
+
+        sum = 0;
+        for (int i = 0; i < size; i++) {
+            sum += (*left) * (*right);
+            left++;
+            right++;
+        }
+        correlation[sizeSub+shift] = std::abs(sum);
+
+        if (sum > max) {
+            max = sum;
+        }
+
+    }
+    info->setMax(max);
+    std::cout << "Correlation calculated. Time taken: " << t.elapsed() << " ms." << std::endl;
+
+
+    m_children.append(envelope);
+    m_correlations.append(info);
+
+    Q_ASSERT(m_correlations.size() == m_children.size());
+
+    return m_children.indexOf(envelope);
+}
+
+int AudioCorrelation::getShift(int childIndex) const
+{
+    Q_ASSERT(childIndex >= 0);
+    Q_ASSERT(childIndex < m_correlations.size());
+
+    int indexOffset = m_correlations.at(childIndex)->maxIndex();
+    indexOffset -= m_children.at(childIndex)->envelopeSize();
+
+    return indexOffset;
+}
+
+AudioCorrelationInfo const* AudioCorrelation::info(int childIndex) const
+{
+    Q_ASSERT(childIndex >= 0);
+    Q_ASSERT(childIndex < m_correlations.size());
+
+    return m_correlations.at(childIndex);
+}
diff --git a/testingArea/audioCorrelation.h b/testingArea/audioCorrelation.h
new file mode 100644 (file)
index 0000000..89c53de
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef AUDIOCORRELATION_H
+#define AUDIOCORRELATION_H
+
+#include "audioCorrelationInfo.h"
+#include "audioEnvelope.h"
+#include <QList>
+
+class AudioCorrelationInfo;
+class AudioCorrelation
+{
+public:
+    AudioCorrelation(AudioEnvelope *mainTrackEnvelope);
+    ~AudioCorrelation();
+
+    int addChild(AudioEnvelope *envelope);
+//    int childIndex(AudioEnvelope *envelope) const;
+
+    const AudioCorrelationInfo *info(int childIndex) const;
+    int getShift(int childIndex) const;
+
+
+private:
+    AudioEnvelope *m_mainTrackEnvelope;
+
+    QList<AudioEnvelope*> m_children;
+    QList<AudioCorrelationInfo*> m_correlations;
+};
+
+#endif // AUDIOCORRELATION_H
diff --git a/testingArea/audioCorrelationInfo.cpp b/testingArea/audioCorrelationInfo.cpp
new file mode 100644 (file)
index 0000000..1d33aee
--- /dev/null
@@ -0,0 +1,83 @@
+#include "audioCorrelationInfo.h"
+#include <iostream>
+
+AudioCorrelationInfo::AudioCorrelationInfo(int mainSize, int subSize) :
+    m_mainSize(mainSize),
+    m_subSize(subSize),
+    m_max(-1)
+{
+    m_correlationVector = new int64_t[m_mainSize+m_subSize+1];
+}
+
+AudioCorrelationInfo::~AudioCorrelationInfo()
+{
+    delete m_correlationVector;
+}
+
+int AudioCorrelationInfo::size() const
+{
+    return m_mainSize+m_subSize+1;
+}
+
+void AudioCorrelationInfo::setMax(int64_t max)
+{
+    m_max = max;
+}
+
+int64_t AudioCorrelationInfo::max() const
+{
+    Q_ASSERT(m_max > 0);
+    if (m_max <= 0) {
+        int width = size();
+        int64_t max = 0;
+        for (int i = 0; i < width; i++) {
+            if (m_correlationVector[i] > max) {
+                max = m_correlationVector[i];
+            }
+        }
+        Q_ASSERT(max > 0);
+        return max;
+    }
+    return m_max;
+}
+
+int AudioCorrelationInfo::maxIndex() const
+{
+    int64_t max = 0;
+    int index = 0;
+    int width = size();
+
+    for (int i = 0; i < width; i++) {
+        if (m_correlationVector[i] > max) {
+            max = m_correlationVector[i];
+            index = i;
+        }
+    }
+
+    return index;
+}
+
+int64_t* AudioCorrelationInfo::correlationVector()
+{
+    return m_correlationVector;
+}
+
+QImage AudioCorrelationInfo::toImage(int height) const
+{
+    int width = size();
+    int64_t maxVal = max();
+
+    QImage img(width, height, QImage::Format_ARGB32);
+    img.fill(qRgb(255,255,255));
+
+    int val;
+
+    for (int x = 0; x < width; x++) {
+        val = m_correlationVector[x]/double(maxVal)*img.height();
+        for (int y = img.height()-1; y > img.height() - val - 1; y--) {
+            img.setPixel(x, y, qRgb(50, 50, 50));
+        }
+    }
+
+    return img;
+}
diff --git a/testingArea/audioCorrelationInfo.h b/testingArea/audioCorrelationInfo.h
new file mode 100644 (file)
index 0000000..ee2ea82
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef AUDIOCORRELATIONINFO_H
+#define AUDIOCORRELATIONINFO_H
+
+#include <QImage>
+
+class AudioCorrelationInfo
+{
+public:
+    AudioCorrelationInfo(int mainSize, int subSize);
+    ~AudioCorrelationInfo();
+
+    int size() const;
+    int64_t* correlationVector();
+    int64_t const* correlationVector() const;
+
+    int64_t max() const;
+    void setMax(int64_t max); ///< Can be set to avoid calculating the max again in this function
+
+    int maxIndex() const;
+
+    QImage toImage(int height = 400) const;
+
+private:
+    int m_mainSize;
+    int m_subSize;
+
+    int64_t *m_correlationVector;
+    int64_t m_max;
+
+};
+
+#endif // AUDIOCORRELATIONINFO_H
index 30bb936d5b155ddffee4e29f6095b91ccd8ce8f0..842096aa61c4113c0748925197730f352760f474 100644 (file)
 #include <QFile>
 #include <QTime>
 #include <QImage>
+#include <QDebug>
 #include <QFileInfo>
 #include <QDateTime>
+#include <QStringList>
+#include <QCoreApplication>
 #include <mlt++/Mlt.h>
 #include <iostream>
 #include <cstdlib>
 #include "audioInfo.h"
 #include "audioStreamInfo.h"
 #include "audioEnvelope.h"
+#include "audioCorrelation.h"
+
+void printUsage(const char *path)
+{
+    std::cout << "Usage: " << path << " <main audio file> <second audio file>" << std::endl
+              << "\t-h, --help\tDisplay this help" << std::endl
+              << "\t--profile=<profile>\tUse the given profile for calculation (run: melt -query profiles)" << std::endl
+              << "\t--no-images\tDo not save envelope and correlation images" << std::endl
+                 ;
+}
 
 int main(int argc, char *argv[])
 {
-    char *fileMain;
-    char *fileSub;
+    QCoreApplication app(argc, argv);
+    QStringList args = app.arguments();
+    args.removeAt(0);
+
+    std::string profile = "atsc_1080p_24";
+    bool saveImages = true;
+
+    // Load arguments
+    foreach (QString str, args) {
+
+        if (str.startsWith("--profile=")) {
+            QString s = str;
+            s.remove(0, QString("--profile=").length());
+            profile = s.toStdString();
+            args.removeOne(str);
+
+        } else if (str == "-h" || str == "--help") {
+            printUsage(argv[0]);
+            return 0;
+
+        } else if (str == "--no-images") {
+            saveImages = false;
+            args.removeOne(str);
+        }
+
+    }
+
+    if (args.length() < 2) {
+        printUsage(argv[0]);
+        return 1;
+    }
+
+
+
+    std::string fileMain(args.at(0).toStdString());
+    args.removeFirst();
+    std::string fileSub = args.at(0).toStdString();
+    args.removeFirst();
+
+
+    qDebug() << "Unused arguments: " << args;
+
+
     if (argc > 2) {
         fileMain = argv[1];
         fileSub = argv[2];
@@ -35,106 +89,79 @@ int main(int argc, char *argv[])
         std::cout << "Usage: " << argv[0] << " <main audio file> <second audio file>" << std::endl;
         return 0;
     }
-    std::cout << "Trying to align (1)\n\t" << fileSub << "\nto fit on (2)\n\t" << fileMain
-              << "\n, result will indicate by how much (1) has to be moved." << std::endl;
+    std::cout << "Trying to align (2)\n\t" << fileSub << "\nto fit on (1)\n\t" << fileMain
+              << "\n, result will indicate by how much (2) has to be moved." << std::endl
+              << "Profile used: " << profile << std::endl
+                 ;
+
 
     // Initialize MLT
     Mlt::Factory::init(NULL);
 
     // Load an arbitrary profile
-    Mlt::Profile prof("hdv_1080_25p");
+    Mlt::Profile prof(profile.c_str());
 
     // Load the MLT producers
-    Mlt::Producer prodMain(prof, fileMain);
+    Mlt::Producer prodMain(prof, fileMain.c_str());
     if (!prodMain.is_valid()) {
         std::cout << fileMain << " is invalid." << std::endl;
         return 2;
     }
-    Mlt::Producer prodSub(prof, fileSub);
+    Mlt::Producer prodSub(prof, fileSub.c_str());
     if (!prodSub.is_valid()) {
         std::cout << fileSub << " is invalid." << std::endl;
         return 2;
     }
 
+
+    // Build the audio envelopes for the correlation
     AudioEnvelope envelopeMain(&prodMain);
     envelopeMain.loadEnvelope();
     envelopeMain.loadStdDev();
     envelopeMain.dumpInfo();
-    envelopeMain.normalizeEnvelope();
-    envelopeMain.dumpInfo();
 
     AudioEnvelope envelopeSub(&prodSub);
     envelopeSub.loadEnvelope();
-    envelopeMain.normalizeEnvelope();
+    envelopeSub.loadStdDev();
     envelopeSub.dumpInfo();
 
 
-    QString outImg = QString("envelope-%1.png")
-            .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
-    envelopeMain.drawEnvelope().save(outImg);
-    std::cout << "Saved volume envelope as "
-              << QFileInfo(outImg).absoluteFilePath().toStdString()
-              << std::endl;
-
-
-
-    const int sizeX = envelopeMain.envelopeSize();
-    const int sizeY = envelopeSub.envelopeSize();
-    int64_t correlation[sizeX + sizeY + 1];
-    const int64_t *envX = envelopeMain.envelope();
-    const int64_t *envY = envelopeSub.envelope();
-    int64_t const* left;
-    int64_t const* right;
-    int size;
-    int64_t sum;
-    int64_t max = 0;
-
-    QTime t;
-    t.start();
-    for (int shift = -sizeX; shift <= sizeY; shift++) {
-
-        if (shift <= 0) {
-            left = envX-shift;
-            right = envY;
-            size = std::min(sizeX+shift, sizeY);
-        } else {
-            left = envX;
-            right = envY+shift;
-            size = std::min(sizeX, sizeY-shift);
-        }
 
-        sum = 0;
-        for (int i = 0; i < size; i++) {
-            sum += (*left) * (*right);
-            left++;
-            right++;
-        }
-        correlation[sizeX+shift] = std::abs(sum);
-        std::cout << sum << " ";
 
-        if (sum > max) {
-            max = sum;
-        }
 
-    }
-    std::cout << "Correlation calculated. Time taken: " << t.elapsed() << " ms." << std::endl;
-
-    int val;
-    QImage img(sizeX + sizeY + 1, 400, QImage::Format_ARGB32);
-    img.fill(qRgb(255,255,255));
-    for (int x = 0; x < sizeX+sizeY+1; x++) {
-        val = correlation[x]/double(max)*img.height();
-        for (int y = img.height()-1; y > img.height() - val - 1; y--) {
-            img.setPixel(x, y, qRgb(50, 50, 50));
-        }
-    }
 
-    outImg = QString("correlation-%1.png")
-            .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
-    img.save(outImg);
-    std::cout << "Saved volume envelope as "
-              << QFileInfo(outImg).absoluteFilePath().toStdString()
-              << std::endl;
+    // Calculate the correlation and hereby the audio shift
+    AudioCorrelation corr(&envelopeMain);
+    int index = corr.addChild(&envelopeSub);
+
+    int shift = corr.getShift(index);
+    std::cout << fileSub << " should be shifted by " << shift << " frames" << std::endl
+              << "\trelative to " << fileMain << std::endl
+              << "\tin a " << prodMain.get_fps() << " fps profile (" << profile << ")." << std::endl
+                 ;
+
+
+    if (saveImages) {
+        QString outImg;
+        outImg = QString("envelope-main-%1.png")
+                .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
+        envelopeMain.drawEnvelope().save(outImg);
+        std::cout << "Saved volume envelope as "
+                  << QFileInfo(outImg).absoluteFilePath().toStdString()
+                  << std::endl;
+        outImg = QString("envelope-sub-%1.png")
+                .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
+        envelopeSub.drawEnvelope().save(outImg);
+        std::cout << "Saved volume envelope as "
+                  << QFileInfo(outImg).absoluteFilePath().toStdString()
+                  << std::endl;
+        outImg = QString("correlation-%1.png")
+                .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
+        corr.info(index)->toImage().save(outImg);
+        std::cout << "Saved correlation image as "
+                  << QFileInfo(outImg).absoluteFilePath().toStdString()
+                  << std::endl;
+    }
 
 
     return 0;