]> git.sesse.net Git - pitch/blobdiff - pitch.cpp
Fix an off-by-one in the undertone detection.
[pitch] / pitch.cpp
index 92c7b085cf026ce6a9aa9a2f5ff070bb875e7115..91afda5fb0618d7f0e611223f1089980f1a123ee 100644 (file)
--- a/pitch.cpp
+++ b/pitch.cpp
 #define EQUAL_TEMPERAMENT     0
 #define WELL_TEMPERED_GUITAR  1
 
-#define TUNING EQUAL_TEMPERAMENT
+#define TUNING WELL_TEMPERED_GUITAR
 
 int get_dsp_fd();
 void read_chunk(int fd, double *in, unsigned num_samples);
 void apply_window(double *in, double *out, unsigned num_samples);
 std::pair<double, double> find_peak(double *in, unsigned num_samples);
 void find_peak_magnitudes(std::complex<double> *in, double *out, unsigned num_samples);
+std::pair<double, double> adjust_for_overtones(std::pair<double, double> base, double *in, unsigned num_samples);
 double bin_to_freq(double bin, unsigned num_samples);
+double freq_to_bin(double freq, unsigned num_samples);
 std::string freq_to_tonename(double freq);
 std::pair<double, double> interpolate_peak(double ym1, double y0, double y1);
 void print_spectrogram(double freq, double amp);
@@ -44,7 +46,7 @@ int main()
        in = reinterpret_cast<double *> (fftw_malloc(sizeof(double) * FFT_LENGTH / PAD_FACTOR));
        in_windowed = reinterpret_cast<double *> (fftw_malloc(sizeof(double) * FFT_LENGTH));
        out = reinterpret_cast<std::complex<double> *> (fftw_malloc(sizeof(std::complex<double>) * (FFT_LENGTH / 2 + 1)));
-       bins = reinterpret_cast<double *> (fftw_malloc(sizeof(double) * FFT_LENGTH / 2 + 1));
+       bins = reinterpret_cast<double *> (fftw_malloc(sizeof(double) * (FFT_LENGTH / 2 + 1)));
 
        memset(in, 0, sizeof(double) * FFT_LENGTH / PAD_FACTOR);
 
@@ -58,6 +60,8 @@ int main()
                fftw_execute(p);
                find_peak_magnitudes(out, bins, FFT_LENGTH);
                std::pair<double, double> peak = find_peak(bins, FFT_LENGTH);
+               if (peak.first > 0.0)
+                       peak = adjust_for_overtones(peak, bins, FFT_LENGTH);
 
                if (peak.first < 50.0 || peak.second - log10(FFT_LENGTH) < 0.0) {
 #if TUNING == WELL_TEMPERED_GUITAR
@@ -87,9 +91,14 @@ int get_dsp_fd()
        int chan = 1;
        ioctl(fd, SOUND_PCM_WRITE_CHANNELS, &chan);
        
-       int rate = 22050;
+       int rate = SAMPLE_RATE;
        ioctl(fd, SOUND_PCM_WRITE_RATE, &rate);
 
+       int max_fragments = 2;
+       int frag_shift = ffs(FFT_LENGTH / OVERLAP) - 1;
+       int fragments = (max_fragments << 16) | frag_shift;
+        ioctl(fd, SNDCTL_DSP_SETFRAGMENT, &fragments);
+       
        ioctl(3, SNDCTL_DSP_SYNC, 0);
        
        return fd;
@@ -216,9 +225,9 @@ std::pair<double, double> find_peak(double *in, unsigned num_samples)
                        best_bin /= i;
 
                        // consider sliding one bin up or down
-                       if (best_bin > 0 && in[best_bin - 1] > in[best_bin] && in[best_bin - 1] > in[best_bin - 2]) {
+                       if (best_bin > 1 && in[best_bin - 1] > in[best_bin] && in[best_bin - 1] > in[best_bin - 2]) {
                                --best_bin;
-                       } else if (best_bin < num_samples / 2 && in[best_bin + 1] > in[best_bin] && in[best_bin + 1] > in[best_bin + 2]) {
+                       } else if (best_bin < num_samples / 2 - 1 && in[best_bin + 1] > in[best_bin] && in[best_bin + 1] > in[best_bin + 2]) {
                                ++best_bin;
                        }
                           
@@ -226,6 +235,9 @@ std::pair<double, double> find_peak(double *in, unsigned num_samples)
                }
        }
 
+       if (best_bin == 0 || best_bin == num_samples / 2) {
+               return std::make_pair(-1.0, 0.0);
+       }
        std::pair<double, double> peak = 
                interpolate_peak(in[best_bin - 1],
                                 in[best_bin],
@@ -234,10 +246,65 @@ std::pair<double, double> find_peak(double *in, unsigned num_samples)
        return std::make_pair(bin_to_freq(double(best_bin) + peak.first, num_samples), peak.second);
 }
 
+// it's perhaps not ideal to _first_ find the peak and _then_ the harmonics --
+// ideally, one should find the set of all peaks and then determine the likely
+// base from that... something for later. :-)
+std::pair<double, double> adjust_for_overtones(std::pair<double, double> base, double *in, unsigned num_samples)
+{
+       double mu = base.first, var = 1.0 / (base.second * base.second);
+               
+       //printf("Base at %.2f (amp=%5.2fdB)\n", base.first, base.second);
+
+       for (unsigned i = 2; i < 10; ++i) {
+               unsigned middle = unsigned(floor(freq_to_bin(base.first, num_samples) * i + 0.5));
+               unsigned lower = middle - (i+1)/2, upper = middle + (i+1)/2;
+
+               if (upper >= num_samples)
+                       upper = num_samples - 2;
+
+               // printf("Searching in [%u,%u] = %f..%f\n", lower, upper, bin_to_freq(lower, num_samples), bin_to_freq(upper, num_samples));
+
+               // search for a peak in this interval
+               double best_harmonics_freq = -1.0;
+               double best_harmonics_amp = -1.0;
+               for (unsigned j = lower; j <= upper; ++j) {
+                       if (in[j] > in[j-1] && in[j] > in[j+1]) {
+                               std::pair<double, double> peak =
+                                       interpolate_peak(in[j - 1],
+                                               in[j],
+                                               in[j + 1]);
+                               
+                               if (peak.second > best_harmonics_amp) {
+                                       best_harmonics_freq = bin_to_freq(j + peak.first, num_samples);
+                                       best_harmonics_amp = peak.second;
+                               }
+                       }
+               }
+
+               if (best_harmonics_amp <= 0.0)
+                       continue;
+
+               //printf("Found overtone %u at %.2f (amp=%5.2fdB)\n", i, best_harmonics_freq,
+               //       best_harmonics_amp);
+
+               double this_mu = best_harmonics_freq / double(i);
+               double this_var = 1.0 / (best_harmonics_amp * best_harmonics_amp);
+
+               double k = var / (var + this_var);
+               mu = (1.0 - k) * mu + k * this_mu;
+               var *= (1.0 - k);
+       }
+       return std::make_pair(mu, base.second);
+}
+
 double bin_to_freq(double bin, unsigned num_samples)
 {
        return bin * SAMPLE_RATE / double(num_samples);
 }
+double freq_to_bin(double freq, unsigned num_samples)
+{
+       return freq * double(num_samples) / double(SAMPLE_RATE);
+}
 
 /*
  * Given three bins, find the interpolated real peak based