From: Simon A. Eugster Date: Tue, 14 Feb 2012 12:36:21 +0000 (+0100) Subject: Calculating correlation to synchronize audio tracks X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=9e475c571a1a3f985663879cbc40678168f7a323;p=kdenlive Calculating correlation to synchronize audio tracks --- diff --git a/testingArea/audioEnvelope.cpp b/testingArea/audioEnvelope.cpp index 23184e32..5f1bbc5d 100644 --- a/testingArea/audioEnvelope.cpp +++ b/testingArea/audioEnvelope.cpp @@ -19,7 +19,9 @@ AudioEnvelope::AudioEnvelope(Mlt::Producer *producer) : m_envelope(NULL), m_producer(producer), - m_envelopeStdDevCalculated(false) + m_envelopeSize(producer->get_length()), + m_envelopeStdDevCalculated(false), + m_envelopeIsNormalized(false) { m_info = new AudioInfo(m_producer); } @@ -33,14 +35,28 @@ AudioEnvelope::~AudioEnvelope() } + +const int64_t *AudioEnvelope::envelope() +{ + if (m_envelope == NULL) { + loadEnvelope(); + } + return m_envelope; +} +int AudioEnvelope::envelopeSize() const +{ + return m_envelopeSize; +} + + + + void AudioEnvelope::loadEnvelope() { Q_ASSERT(m_envelope == NULL); std::cout << "Loading envelope ..." << std::endl; - m_envelopeSize = m_producer->get_length(); - int samplingRate = m_info->info(0)->samplingRate(); mlt_audio_format format_s16 = mlt_audio_s16; int channels = 1; @@ -49,7 +65,7 @@ void AudioEnvelope::loadEnvelope() int64_t position; int samples; - m_envelope = new uint64_t[m_envelopeSize]; + m_envelope = new int64_t[m_envelopeSize]; m_envelopeMax = 0; m_envelopeMean = 0; @@ -63,7 +79,7 @@ void AudioEnvelope::loadEnvelope() int16_t *data = static_cast(frame->get_audio(format_s16, samplingRate, channels, samples)); - uint64_t sum = 0; + int64_t sum = 0; for (int k = 0; k < samples; k++) { sum += fabs(data[k]); } @@ -99,6 +115,37 @@ int64_t AudioEnvelope::loadStdDev() return m_envelopeStdDev; } +void AudioEnvelope::normalizeEnvelope(bool clampTo0) +{ + if (m_envelope == NULL) { + loadEnvelope(); + } + + if (!m_envelopeIsNormalized) { + + m_envelopeMax = 0; + int64_t newMean = 0; + for (int i = 0; i < m_envelopeSize; i++) { + + m_envelope[i] -= m_envelopeMean; + + if (clampTo0) { + if (m_envelope[i] < 0) { m_envelope[i] = 0; } + } + + if (m_envelope[i] > m_envelopeMax) { + m_envelopeMax = m_envelope[i]; + } + + newMean += m_envelope[i]; + } + m_envelopeMean = newMean / m_envelopeSize; + + m_envelopeIsNormalized = true; + } + +} + QImage AudioEnvelope::drawEnvelope() { if (m_envelope == NULL) { diff --git a/testingArea/audioEnvelope.h b/testingArea/audioEnvelope.h index b679d367..bdf4a0b8 100644 --- a/testingArea/audioEnvelope.h +++ b/testingArea/audioEnvelope.h @@ -21,24 +21,30 @@ public: AudioEnvelope(Mlt::Producer *producer); ~AudioEnvelope(); + /// Returns the envelope, calculates it if necessary. + int64_t const* envelope(); + int envelopeSize() const; + void loadEnvelope(); int64_t loadStdDev(); + void normalizeEnvelope(bool clampTo0 = false); QImage drawEnvelope(); void dumpInfo() const; private: - uint64_t *m_envelope; + int64_t *m_envelope; Mlt::Producer *m_producer; AudioInfo *m_info; int m_envelopeSize; - uint64_t m_envelopeMax; - uint64_t m_envelopeMean; - uint64_t m_envelopeStdDev; + int64_t m_envelopeMax; + int64_t m_envelopeMean; + int64_t m_envelopeStdDev; bool m_envelopeStdDevCalculated; + bool m_envelopeIsNormalized; }; #endif // AUDIOENVELOPE_H diff --git a/testingArea/audioOffset.cpp b/testingArea/audioOffset.cpp index 6eb54985..30bb936d 100644 --- a/testingArea/audioOffset.cpp +++ b/testingArea/audioOffset.cpp @@ -50,32 +50,23 @@ int main(int argc, char *argv[]) std::cout << fileMain << " is invalid." << std::endl; return 2; } - Mlt::Producer profSub(prof, fileSub); - if (!profSub.is_valid()) { + Mlt::Producer prodSub(prof, fileSub); + if (!prodSub.is_valid()) { std::cout << fileSub << " is invalid." << std::endl; return 2; } - AudioInfo infoMain(&prodMain); - AudioInfo infoSub(&profSub); - infoMain.dumpInfo(); - infoSub.dumpInfo(); - - prodMain.get_fps(); - - - int framesToFetch = prodMain.get_length(); - std::cout << "Length: " << framesToFetch - << " (Seconds: " << framesToFetch/prodMain.get_fps() << ")" - << std::endl; - if (framesToFetch > 5000) { - framesToFetch = 5000; - } - AudioEnvelope envelopeMain(&prodMain); envelopeMain.loadEnvelope(); envelopeMain.loadStdDev(); envelopeMain.dumpInfo(); + envelopeMain.normalizeEnvelope(); + envelopeMain.dumpInfo(); + + AudioEnvelope envelopeSub(&prodSub); + envelopeSub.loadEnvelope(); + envelopeMain.normalizeEnvelope(); + envelopeSub.dumpInfo(); QString outImg = QString("envelope-%1.png") @@ -86,6 +77,66 @@ int main(int argc, char *argv[]) << std::endl; + + const int sizeX = envelopeMain.envelopeSize(); + const int sizeY = envelopeSub.envelopeSize(); + int64_t correlation[sizeX + sizeY + 1]; + const int64_t *envX = envelopeMain.envelope(); + const int64_t *envY = envelopeSub.envelope(); + int64_t const* left; + int64_t const* right; + int size; + int64_t sum; + int64_t max = 0; + + QTime t; + t.start(); + for (int shift = -sizeX; shift <= sizeY; shift++) { + + if (shift <= 0) { + left = envX-shift; + right = envY; + size = std::min(sizeX+shift, sizeY); + } else { + left = envX; + right = envY+shift; + size = std::min(sizeX, sizeY-shift); + } + + sum = 0; + for (int i = 0; i < size; i++) { + sum += (*left) * (*right); + left++; + right++; + } + correlation[sizeX+shift] = std::abs(sum); + std::cout << sum << " "; + + if (sum > max) { + max = sum; + } + + } + std::cout << "Correlation calculated. Time taken: " << t.elapsed() << " ms." << std::endl; + + int val; + QImage img(sizeX + sizeY + 1, 400, QImage::Format_ARGB32); + img.fill(qRgb(255,255,255)); + for (int x = 0; x < sizeX+sizeY+1; x++) { + val = correlation[x]/double(max)*img.height(); + for (int y = img.height()-1; y > img.height() - val - 1; y--) { + img.setPixel(x, y, qRgb(50, 50, 50)); + } + } + + outImg = QString("correlation-%1.png") + .arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss")); + img.save(outImg); + std::cout << "Saved volume envelope as " + << QFileInfo(outImg).absoluteFilePath().toStdString() + << std::endl; + + return 0; }