1 | /* |
---|
2 | Copyright (C) 2006 Amaury Hazan |
---|
3 | Ported to aubio from LibXtract |
---|
4 | http://libxtract.sourceforge.net/ |
---|
5 | |
---|
6 | |
---|
7 | This program is free software; you can redistribute it and/or modify |
---|
8 | it under the terms of the GNU General Public License as published by |
---|
9 | the Free Software Foundation; either version 2 of the License, or |
---|
10 | (at your option) any later version. |
---|
11 | |
---|
12 | This program is distributed in the hope that it will be useful, |
---|
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | GNU General Public License for more details. |
---|
16 | |
---|
17 | You should have received a copy of the GNU General Public License |
---|
18 | along with this program; if not, write to the Free Software |
---|
19 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
20 | |
---|
21 | */ |
---|
22 | |
---|
23 | #include "aubio_priv.h" |
---|
24 | #include "sample.h" |
---|
25 | #include "fft.h" |
---|
26 | #include "filterbank.h" |
---|
27 | #include "mfcc.h" |
---|
28 | #include "math.h" |
---|
29 | |
---|
30 | #define VERY_SMALL_NUMBER 2e-42 |
---|
31 | #define USE_EQUAL_GAIN 1 |
---|
32 | |
---|
33 | |
---|
34 | /** Internal structure for mfcc object **/ |
---|
35 | |
---|
36 | struct aubio_mfcc_t_{ |
---|
37 | uint_t win_s; /** grain length */ |
---|
38 | uint_t samplerate; /** sample rate (needed?) */ |
---|
39 | uint_t channels; /** number of channels */ |
---|
40 | uint_t n_coefs; /** number of coefficients (= fb->n_filters/2 +1) */ |
---|
41 | smpl_t lowfreq; /** lowest frequency for filters */ |
---|
42 | smpl_t highfreq; /** highest frequency for filters */ |
---|
43 | aubio_filterbank_t * fb; /** filter bank */ |
---|
44 | fvec_t * in_dct; /** input buffer for dct * [fb->n_filters] */ |
---|
45 | aubio_mfft_t * fft_dct; /** fft object for dct */ |
---|
46 | cvec_t * fftgrain_dct; /** output buffer for dct */ |
---|
47 | }; |
---|
48 | |
---|
49 | |
---|
50 | /** filterbank initialization for mel filters |
---|
51 | |
---|
52 | \param fb filterbank, as returned by new_aubio_filterbank method |
---|
53 | \param nyquist nyquist frequency, i.e. half of the sampling rate |
---|
54 | \param style libxtract style |
---|
55 | \param freqmin lowest filter frequency |
---|
56 | \param freqmax highest filter frequency |
---|
57 | |
---|
58 | */ |
---|
59 | void aubio_filterbank_mfcc_init(aubio_filterbank_t * fb, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max); |
---|
60 | |
---|
61 | aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate ,uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels){ |
---|
62 | /** allocating space for mfcc object */ |
---|
63 | aubio_mfcc_t * mfcc = AUBIO_NEW(aubio_mfcc_t); |
---|
64 | |
---|
65 | //we need (n_coefs-1)*2 filters to obtain n_coefs coefficients after dct |
---|
66 | uint_t n_filters = (n_coefs-1)*2; |
---|
67 | |
---|
68 | mfcc->win_s=win_s; |
---|
69 | mfcc->samplerate=samplerate; |
---|
70 | mfcc->channels=channels; |
---|
71 | mfcc->n_coefs=n_coefs; |
---|
72 | mfcc->lowfreq=lowfreq; |
---|
73 | mfcc->highfreq=highfreq; |
---|
74 | |
---|
75 | /** filterbank allocation */ |
---|
76 | mfcc->fb = new_aubio_filterbank(n_filters, mfcc->win_s); |
---|
77 | |
---|
78 | /** allocating space for fft object (used for dct) */ |
---|
79 | mfcc->fft_dct=new_aubio_mfft(mfcc->win_s, 1); |
---|
80 | |
---|
81 | /** allocating buffers */ |
---|
82 | mfcc->in_dct=new_fvec(mfcc->win_s, 1); |
---|
83 | |
---|
84 | mfcc->fftgrain_dct=new_cvec(n_filters, 1); |
---|
85 | |
---|
86 | /** populating the filterbank */ |
---|
87 | aubio_filterbank_mfcc_init(mfcc->fb, (mfcc->samplerate)/2, mfcc->lowfreq, mfcc->highfreq); |
---|
88 | |
---|
89 | return mfcc; |
---|
90 | }; |
---|
91 | |
---|
92 | void del_aubio_mfcc(aubio_mfcc_t *mf){ |
---|
93 | /** deleting filterbank */ |
---|
94 | del_aubio_filterbank(mf->fb); |
---|
95 | /** deleting mfft object */ |
---|
96 | del_aubio_mfft(mf->fft_dct); |
---|
97 | /** deleting buffers */ |
---|
98 | del_fvec(mf->in_dct); |
---|
99 | del_cvec(mf->fftgrain_dct); |
---|
100 | |
---|
101 | /** deleting mfcc object */ |
---|
102 | AUBIO_FREE(mf); |
---|
103 | } |
---|
104 | |
---|
105 | void aubio_mfcc_do(aubio_mfcc_t * mf, cvec_t *in, fvec_t *out){ |
---|
106 | |
---|
107 | aubio_filterbank_t *f = mf->fb; |
---|
108 | uint_t n, filter_cnt; |
---|
109 | |
---|
110 | for(filter_cnt = 0; filter_cnt < f->n_filters; filter_cnt++){ |
---|
111 | mf->in_dct->data[0][filter_cnt] = 0.f; |
---|
112 | for(n = 0; n < mf->win_s; n++){ |
---|
113 | mf->in_dct->data[0][filter_cnt] += in->norm[0][n] * f->filters[filter_cnt]->data[0][n]; |
---|
114 | } |
---|
115 | mf->in_dct->data[0][filter_cnt] = LOG(mf->in_dct->data[0][filter_cnt] < VERY_SMALL_NUMBER ? VERY_SMALL_NUMBER : mf->in_dct->data[0][filter_cnt]); |
---|
116 | } |
---|
117 | |
---|
118 | //TODO: check that zero padding |
---|
119 | // the following line seems useless since the in_dct buffer has the correct size |
---|
120 | //for(n = filter + 1; n < N; n++) result[n] = 0; |
---|
121 | |
---|
122 | aubio_dct_do(mf, mf->in_dct, out); |
---|
123 | |
---|
124 | return; |
---|
125 | } |
---|
126 | |
---|
127 | void aubio_dct_do(aubio_mfcc_t * mf, fvec_t *in, fvec_t *out){ |
---|
128 | //compute mag spectrum |
---|
129 | aubio_mfft_do (mf->fft_dct, in, mf->fftgrain_dct); |
---|
130 | |
---|
131 | int i; |
---|
132 | //extract real part of fft grain |
---|
133 | for(i=0; i<mf->n_coefs ;i++){ |
---|
134 | out->data[0][i]= mf->fftgrain_dct->norm[0][i]*COS(mf->fftgrain_dct->phas[0][i]); |
---|
135 | } |
---|
136 | |
---|
137 | return; |
---|
138 | } |
---|
139 | |
---|
140 | void aubio_filterbank_mfcc_init(aubio_filterbank_t * fb, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max){ |
---|
141 | |
---|
142 | int n, i, k, *fft_peak, M, next_peak; |
---|
143 | smpl_t norm, mel_freq_max, mel_freq_min, norm_fact, height, inc, val, |
---|
144 | freq_bw_mel, *mel_peak, *height_norm, *lin_peak; |
---|
145 | |
---|
146 | mel_peak = height_norm = lin_peak = NULL; |
---|
147 | fft_peak = NULL; |
---|
148 | norm = 1; |
---|
149 | |
---|
150 | mel_freq_max = 1127 * log(1 + freq_max / 700); |
---|
151 | mel_freq_min = 1127 * log(1 + freq_min / 700); |
---|
152 | freq_bw_mel = (mel_freq_max - mel_freq_min) / fb->n_filters; |
---|
153 | |
---|
154 | mel_peak = (smpl_t *)malloc((fb->n_filters + 2) * sizeof(smpl_t)); |
---|
155 | /* +2 for zeros at start and end */ |
---|
156 | lin_peak = (smpl_t *)malloc((fb->n_filters + 2) * sizeof(smpl_t)); |
---|
157 | fft_peak = (int *)malloc((fb->n_filters + 2) * sizeof(int)); |
---|
158 | height_norm = (smpl_t *)malloc(fb->n_filters * sizeof(smpl_t)); |
---|
159 | |
---|
160 | if(mel_peak == NULL || height_norm == NULL || |
---|
161 | lin_peak == NULL || fft_peak == NULL) |
---|
162 | return NULL; |
---|
163 | |
---|
164 | M = fb->win_s >> 1; |
---|
165 | |
---|
166 | mel_peak[0] = mel_freq_min; |
---|
167 | lin_peak[0] = 700 * (exp(mel_peak[0] / 1127) - 1); |
---|
168 | fft_peak[0] = lin_peak[0] / nyquist * M; |
---|
169 | |
---|
170 | |
---|
171 | for (n = 1; n <= fb->n_filters; n++){ |
---|
172 | /*roll out peak locations - mel, linear and linear on fft window scale */ |
---|
173 | mel_peak[n] = mel_peak[n - 1] + freq_bw_mel; |
---|
174 | lin_peak[n] = 700 * (exp(mel_peak[n] / 1127) -1); |
---|
175 | fft_peak[n] = lin_peak[n] / nyquist * M; |
---|
176 | } |
---|
177 | |
---|
178 | for (n = 0; n < fb->n_filters; n++){ |
---|
179 | /*roll out normalised gain of each peak*/ |
---|
180 | if (style == USE_EQUAL_GAIN){ |
---|
181 | height = 1; |
---|
182 | norm_fact = norm; |
---|
183 | } |
---|
184 | else{ |
---|
185 | height = 2 / (lin_peak[n + 2] - lin_peak[n]); |
---|
186 | norm_fact = norm / (2 / (lin_peak[2] - lin_peak[0])); |
---|
187 | } |
---|
188 | height_norm[n] = height * norm_fact; |
---|
189 | } |
---|
190 | |
---|
191 | i = 0; |
---|
192 | |
---|
193 | for(n = 0; n < fb->n_filters; n++){ |
---|
194 | |
---|
195 | /*calculate the rise increment*/ |
---|
196 | if(n > 0) |
---|
197 | inc = height_norm[n] / (fft_peak[n] - fft_peak[n - 1]); |
---|
198 | else |
---|
199 | inc = height_norm[n] / fft_peak[n]; |
---|
200 | val = 0; |
---|
201 | |
---|
202 | /*zero the start of the array*/ |
---|
203 | for(k = 0; k < i; k++) |
---|
204 | //fft_tables[n][k] = 0.f; |
---|
205 | fb->filters[n]->data[0][k]=0.f; |
---|
206 | |
---|
207 | /*fill in the rise */ |
---|
208 | for(; i <= fft_peak[n]; i++){ |
---|
209 | // fft_tables[n][i] = val; |
---|
210 | fb->filters[n]->data[0][k]=val; |
---|
211 | val += inc; |
---|
212 | } |
---|
213 | |
---|
214 | /*calculate the fall increment */ |
---|
215 | inc = height_norm[n] / (fft_peak[n + 1] - fft_peak[n]); |
---|
216 | |
---|
217 | val = 0; |
---|
218 | next_peak = fft_peak[n + 1]; |
---|
219 | |
---|
220 | /*reverse fill the 'fall' */ |
---|
221 | for(i = next_peak; i > fft_peak[n]; i--){ |
---|
222 | //fft_tables[n][i] = val; |
---|
223 | fb->filters[n]->data[0][k]=val; |
---|
224 | val += inc; |
---|
225 | } |
---|
226 | |
---|
227 | /*zero the rest of the array*/ |
---|
228 | for(k = next_peak + 1; k < fb->win_s; k++) |
---|
229 | //fft_tables[n][k] = 0.f; |
---|
230 | fb->filters[n]->data[0][k]=0.f; |
---|
231 | } |
---|
232 | |
---|
233 | free(mel_peak); |
---|
234 | free(lin_peak); |
---|
235 | free(height_norm); |
---|
236 | free(fft_peak); |
---|
237 | |
---|
238 | } |
---|
239 | |
---|