Entropy Piano Tuner  1.1.3 (documentation not yet complete)
An open-source experimental software for piano tuning by entropy minimization
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
keyrecognizer.cpp
Go to the documentation of this file.
1 /*****************************************************************************
2  * Copyright 2015 Haye Hinrichsen, Christoph Wick
3  *
4  * This file is part of Entropy Piano Tuner.
5  *
6  * Entropy Piano Tuner is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by the
8  * Free Software Foundation, either version 3 of the License, or (at your
9  * option) any later version.
10  *
11  * Entropy Piano Tuner is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * Entropy Piano Tuner. If not, see http://www.gnu.org/licenses/.
18  *****************************************************************************/
19 
20 //=============================================================================
21 // Key recognizer
22 //=============================================================================
23 
24 #include "keyrecognizer.h"
25 
26 #include <iostream>
27 #include <fstream>
28 #include <functional>
29 #include <algorithm>
30 
31 #include "../config.h"
32 #include "../math/mathtools.h"
33 #include "../piano/piano.h"
34 #include "../system/log.h"
35 
36 const int KeyRecognizer::M = 1024;
37 const double KeyRecognizer::fmin = 20;
38 const double KeyRecognizer::fmax = 10000;
39 const double KeyRecognizer::logfmin = log(fmin);
40 const double KeyRecognizer::logfmax = log(fmax);
41 
42 
43 //-----------------------------------------------------------------------------
44 // Constructor
45 //-----------------------------------------------------------------------------
46 
51 
53  mCallback(callback), // Pointer to the caller
54  mFFTPtr(nullptr), // Pointer to the Fourier transform
55  mConcertPitch(0), // Concert pitch in Hz (normally 440)
56  mPiano(nullptr), // Pointer to the piano data
57  mNumberOfKeys(0), // Number of piano keys (normally 88)
58  mKeyNumberOfA(0), // Index of the A-key (normally 48)
59  mFFT(), // Instance of FFT implementation
60  mLogSpec(M), // Vector holding the log. spectrum
61  mFlatSpectrum(M), // Vector holding the loglog. spectrum
62  mKernelFFT(M/2+1), // Vector holding the complex FFT of the kernel
63  mFlatFFT(M/2+1), // Vector holding the complex FFT of the loglogspec
64  mConvolution(M), // Convolution vector, real
65  mSelectedKey(-1), // Selected key
66  mKeyForced(false) // selected key is forced
67 {}
68 
69 
70 //-----------------------------------------------------------------------------
71 // Initialization
72 //-----------------------------------------------------------------------------
73 
81 
82 void KeyRecognizer::init(bool optimize)
83 {
84  LogV("KeyRecognizer: starting initialization");
85  defineKernel();
86  if (optimize)
87  {
90  }
91  LogV("KeyRecognizer: initialization finished");
92 }
93 
94 
95 //-----------------------------------------------------------------------------
96 // Start the key recognition thread
97 //-----------------------------------------------------------------------------
98 
110 
112  bool forceRestart,
113  const Piano *piano,
114  FFTDataPointer fftPointer,
115  int selectedKey,
116  bool keyForced)
117 {
118  EptAssert(piano, "The piano has to be set.");
119  EptAssert(fftPointer, "The fft data has to exist.");
120  EptAssert(fftPointer->isValid(), "Invaild fft data");
121 
122  if (forceRestart) stop(); // if restart forced stop thread
123  else if (isThreadRunning()) return; // if it is running do nothing
124 
125  // copy data from the piano
126  mPiano = piano;
127  mConcertPitch = piano->getConcertPitch();
130  mFFTPtr = fftPointer; // save pointer to the Fourier transform
131  mSelectedKey = selectedKey; // copy selected key
132  mKeyForced = keyForced; // copy forcing flag
133 
134  start(); // start the thread
135 }
136 
137 
138 //-----------------------------------------------------------------------------
139 // Worker function running in an independent thread
140 //-----------------------------------------------------------------------------
141 
145 
147 {
148  setThreadName("KeyRecognizer");
149  EptAssert(mFFTPtr, "FFT Data have to non zero");
150  EptAssert(mFFTPtr->isValid(), "FFT Data have to exist");
151  EptAssert(mCallback, "Callback class has to exist");
152 
153  double f=0;
155  else f = detectFrequencyInTreble();
157 
158  if (f==0)
159  {
160  constructLogSpec(); // Compute log spectrum
162 
163  signalPreprocessing(); // Compute flattened loglog spectrum
165 
166  f = estimateFrequency(); // Estimate frequency
168  }
169 
170  int keynumber = findNearestKey(f); // determine keynumber
171 
172  mCallback->keyRecognized(keynumber, f); // notify callback
173 }
174 
175 
176 //-----------------------------------------------------------------------------
177 // Measure forced keys
178 //-----------------------------------------------------------------------------
179 
187 
189 {
190  if (mSelectedKey<0 or not mKeyForced) return 0;
191  std::vector<double> &fft = mFFTPtr->fft;
192  int n = mFFTPtr->fft.size();
193  int sr = mFFTPtr->samplingRate;
194  auto ftoq = [n,sr] (double f) { return MathTools::roundToInteger(2*n*f/sr); };
195  auto qtof = [n,sr] (double q) { return sr*q/(2*n); };
197  int q1 = std::max(0,ftoq(f/1.04));
198  int q2 = std::min(ftoq(f*1.04),n);
199  double max=0;
200  for (int q=q1; q<=q2; ++q) if (fft[q]>max)
201  {
202  max=fft[q];
203  f=qtof(q);
204  }
205  return f;
206 }
207 
208 
209 //-----------------------------------------------------------------------------
210 // Detect treble keys
211 //-----------------------------------------------------------------------------
212 
223 
225 {
226  const double threshold = 0.09;
227  std::vector<double> &fft = mFFTPtr->fft;
228  int n = mFFTPtr->fft.size();
229  int sr = mFFTPtr->samplingRate;
230  auto ftoq = [n,sr] (double f) { return MathTools::roundToInteger(2*n*f/sr); };
231  auto qtof = [n,sr] (double q) { return sr*q/(2*n); };
232  const int q1=ftoq(20), q2=ftoq(1000), q3=ftoq(4500);
233  double m1=0, m2=0;
234  if (n<=0 or q1<0 or q3>=n) return 0;
235  for (int q=q1; q<=q3; ++q)
236  {
237  m1 += fft[q]*q;
238  m2 += fft[q]*q*q;
239  }
240  if (m1<=0) return 0;
241  double f=0;
242  if (m2/m1/n > threshold)
243  {
244  double max=0;
245  for (int q=q2; q<q3; ++q) if (fft[q]>max)
246  {
247  max=fft[q];
248  f=qtof(q);
249  }
250  }
251  return f;
252 }
253 
254 
255 //-----------------------------------------------------------------------------
256 // Index mapping functions
257 //-----------------------------------------------------------------------------
258 
270 
271 int KeyRecognizer::ftom (double f) {
272  return static_cast<int>(0.5+M*(log(f)-logfmin)/(logfmax-logfmin));
273 }
274 
275 
285 
286 double KeyRecognizer::mtof (int m) {
287  return fmin * pow(fmax/fmin,static_cast<double>(m)/M);
288 }
289 
290 
291 //-----------------------------------------------------------------------------
292 // Construct logarithmic spectrum
293 //-----------------------------------------------------------------------------
294 
303 
305 {
306  const int Q = mFFTPtr->fft.size();
307  std::function<double(double)> mtoq = [this,Q] (double m)
308  { return 2*fmin*Q/mFFTPtr->samplingRate*pow(fmax/fmin,m/M); };
310 }
311 
312 
313 //-----------------------------------------------------------------------------
314 // Preprocessing of the singal
315 //-----------------------------------------------------------------------------
316 
331 
333 {
334  Write("01-logspec.dat",mLogSpec);
335  double norm = MathTools::computeNorm(mLogSpec);
336  if (norm<=0) return;
337  auto decibel = [norm](double x) {return 10*log10(x/norm);};
338  auto dB= MathTools::transformVector<double>(mLogSpec,decibel);
339  Write("02-dB.dat",dB);
340 
341 
342  // Flatten noise by subtracting a gliding average
343  for (int i=0; i<M; ++i)
344  {
345  //const int w=MathTools::roundToInteger(1+5000/mtof(i)); // width of the average
346  const int w=30;
347  int a = std::max(0,i-w);
348  int b = std::min(M,i+1);
349  double sum=0;
350  for (int k=a; k<b; ++k) sum+=dB[k]*dB[k];
351  mFlatSpectrum[i]=std::max(0.0,dB[i]+sqrt(sum/(b-a))-5);
352  }
353  Write("03-dBflat.dat",mFlatSpectrum);
354 
355 
356  // If key is selected decrease all spectral lines below
357  // and slightly increase the main spectral line.
358  if (mSelectedKey>=0 and not mKeyForced)
359  {
362  if (m1>=0 and m1<=m2 and m2<=M)
363  {
364  for (int k=0; k<m1; ++k) mFlatSpectrum[k] *= 0.75;
365  for (int k=m1; k<m2; ++k) mFlatSpectrum[k] *= 1.2;
366  }
367  }
368  Write("04-FlatSpectrum.dat",mFlatSpectrum);
369 
370 }
371 
372 
373 //-----------------------------------------------------------------------------
374 // Define the kernel
375 //-----------------------------------------------------------------------------
376 
387 
389 {
390 
391  static const int width=M/300; // width of the peaks in bins
392  static const int partials=20; // number of partials to be detected (20)
393  static const double B=0.000; // here still with a constant inharmonicity
394 
395  static std::vector<double> kernel(M); //must be static for using fftw3
396  kernel.assign(M,0);
397 
398  // lambda function for setting a peak
399  auto setpeak = [] (int m, double amplitude)
400  { for (int n=m-width; n<=m+width; n++) kernel[(n+M)%M]=amplitude*(width-std::abs(n-m)); };
401 
402  // lambda function for computing the frequency ratio of the nth partial
403  auto partial = [] (int n, double B)
404  { return n * sqrt((1+B*n*n)/(1+B)); };
405 
406  // lambda function for computing the index of the nth partial
407  auto partialindex = [this,partial] (int n, double B, int div)
408  { return ftom(fmin*partial(n,B)/partial(div,B)); };
409 
410  // lambda function for the intensity decay of the peaks
411  auto intensity = [] (int n) { return pow(static_cast<double>(n),-0.3); };
412 
413  // Define the kernel function
414  for (int div=2; div<=4; div++) for (int n=1; n<=30; ++n) if (n%div>0) if (n>div-2)
415  setpeak(partialindex(n,B,div),-0.3*intensity(n));
416 
417  for (int n=1; n<=partials; ++n) setpeak(partialindex(n, B, 1),intensity(n));
418 
419 
420  mFFT.calculateFFT(kernel,mKernelFFT); // calculate FFT
421 
422  Write("05-keyrecog-kernel.dat",kernel,true);
423 }
424 
425 
426 //-----------------------------------------------------------------------------
427 // Estimate frequency
428 //-----------------------------------------------------------------------------
429 
442 
444 {
446  for (size_t n = 0; n < mFlatFFT.size(); ++n)
447  mFlatFFT[n] *= std::conj(mKernelFFT[n]);
450  Write("06-convolution.dat",mConvolution,false);
451  return mtof(m);
452 }
453 
454 
455 //-----------------------------------------------------------------------------
456 // Find the nearest key to a given frequency, assuming average stretch
457 //-----------------------------------------------------------------------------
458 
467 
469 {
470  if (mConcertPitch<=390 or mConcertPitch>500) return -1;
471  // Approximate distance in keys from A-440:
472  double d = 17.3123*log(f/mConcertPitch);
473  // Average stretch polynomial, giving the expected deviation in cents
474  double c = 0.000019394+0.079694594*d-0.003718646*d*d
475  + 0.000450934*d*d*d + 0.000003724*d*d*d*d;
476  int k=-1; k = static_cast<int>(mKeyNumberOfA+d-c/100+0.5);
477  return (k>=0 and k<mNumberOfKeys ? k : -1);
478 }
479 
480 
481 //-----------------------------------------------------------------------------
482 // Write function for development purposes
483 //-----------------------------------------------------------------------------
484 
485 void KeyRecognizer::Write(std::string filename, std::vector<double> &v, bool log)
486 {
487 #if CONFIG_ENABLE_XMGRACE
488  std::ofstream os(filename);
489  for (uint m=0; m<v.size(); ++m)
490  {
491  //os << m << "\t" << v[m] << std::endl;
492  if (log) os << mtof(m) << "\t" << v[m] << std::endl;
493  else os << m << "\t" << v[m] << std::endl;
494  }
495  os.close();
496 #else
497  (void)filename; (void)v; (void)log; // suppress warnings
498 #endif // CONFIG_ENABLE_XMGRACE
499 }
500 
FFTDataPointer mFFTPtr
Pointer to Fourier transform.
Definition: keyrecognizer.h:87
double detectForcedFrequency()
Detect forced key.
virtual void stop()
Stop the thread.
#define CHECK_CANCEL_THREAD
#define LogV(...)
Definition: log.h:38
FFT_Implementation mFFT
Instance of FFT implementation.
Definition: keyrecognizer.h:92
std::vector< double > mFlatSpectrum
DoubleLogarithmic spectrum (LogLogSpec)
Definition: keyrecognizer.h:94
void signalPreprocessing()
Preprocessing of the signal.
Callback class for KeyRecognizer.
Definition: keyrecognizer.h:40
void init(bool optimize)
Initialization of the KeyRecognizer.
void constructLogSpec()
Construct logarithmic spectrum.
int mKeyNumberOfA
Index of the A-key.
Definition: keyrecognizer.h:91
int roundToInteger(T x)
Round a floating point number to an integer.
Definition: mathtools.h:43
static const int M
Number of bins (powers of 2,3,5)
Definition: keyrecognizer.h:60
Definition: piano.h:40
KeyRecognizer(KeyRecognizerCallback *callback)
Constructor of the KeyRecognizer.
void coarseGrainSpectrum(const std::vector< double > &X, std::vector< double > &Y, std::function< double(double y)> f, double exponent=0)
Definition: mathtools.cpp:120
int ftom(double f)
Map frequency to bin index.
double getEqualTempFrequency(int keynumber, double cents=0, double A4=0) const
Function returning the equal temperament.
Definition: piano.cpp:123
bool isThreadRunning() const
Flag to check if the thread is running.
static void setThreadName(std::string s)
Specify the name of the thread.
int findMaximum(const std::vector< double > &X, int i, int j)
Find the component where the vector has its maximum.
Definition: mathtools.cpp:151
void Write(std::string filename, std::vector< double > &v, bool log=true)
FFTComplexVector mFlatFFT
Fourier transform of LogLogSpec.
Definition: keyrecognizer.h:96
double mConcertPitch
Actual frequency of the A-key.
Definition: keyrecognizer.h:88
bool mKeyForced
Flag indicating that the key is forced.
Definition: keyrecognizer.h:99
void defineKernel()
Define the kernel vector for key recognition.
double estimateFrequency()
Estimate frequency for a given log-log spectrum.
const Keyboard & getKeyboard() const
Definition: piano.h:83
void workerFunction() overridefinal
Main worker function for executing the key recognition thread.
std::vector< double > mLogSpec
Logarithmic spectrum (LogSpec)
Definition: keyrecognizer.h:93
virtual void start()
Start the thread.
static const double logfmax
Log of maximal frequency.
double detectFrequencyInTreble()
Detect keys in the treble.
const Piano * mPiano
Pointer to the piano data.
Definition: keyrecognizer.h:89
void recognizeKey(bool forceRestart, const Piano *piano, FFTDataPointer fftPointer, int selectedKey, bool keyForced)
Start key recognition.
virtual void keyRecognized(int keyIndex, double frequency)=0
int mNumberOfKeys
Number of piano keys.
Definition: keyrecognizer.h:90
int getKeyNumberOfA4() const
Definition: keyboard.h:73
const double & getConcertPitch() const
Definition: piano.h:80
#define EptAssert(a, b)
Definition: eptexception.h:47
static const double fmax
Frequency of bin M-1.
Definition: keyrecognizer.h:62
int mSelectedKey
Number of the actually selected key.
Definition: keyrecognizer.h:98
FFTComplexVector mKernelFFT
Fourier transform of the kernel.
Definition: keyrecognizer.h:95
KeyRecognizerCallback * mCallback
Pointer to the caller.
Definition: keyrecognizer.h:86
std::shared_ptr< FFTData > FFTDataPointer
Shared pointer of FFTData.
Definition: fftadapter.h:86
FFTRealVector mConvolution
Convolution vector.
Definition: keyrecognizer.h:97
static const double fmin
Frequency of bin 0.
Definition: keyrecognizer.h:61
static const double logfmin
Log of minimal frequency.
int getNumberOfKeys() const
Definition: keyboard.h:72
void optimize(FFTRealVector &in)
Optimize plan for the forward transfomation.
void calculateFFT(const FFTRealVector &in, FFTComplexVector &out)
Foward FFT, mapping a real vector with size N to a complex one with size N/2+1.
double mtof(int m)
Map bin index to frequency.
int findNearestKey(double f)
Find the nearest key for a given frequency.
double computeNorm(std::vector< double > &vec)
Compute the norm of a vector.
Definition: mathtools.cpp:82