Changeset 8708556

Sep 7, 2007, 3:47:55 PM (18 years ago)
Amaury Hazan <>
feature/autosink, feature/cnn, feature/cnn_org, feature/constantq, feature/crepe, feature/crepe_org, feature/pitchshift, feature/pydocstrings, feature/timestretch, fix/ffmpeg5, master, pitchshift, sampler, timestretch, yinfft+

80% wrapped-up filterbank and mfcc

4 edited


  • TabularUnified src/filterbank.c

    rfdf39ba r8708556  
    2727// Struct Declaration
    29 /** \brief A structure to store a set of n_filters Mel filters */
    30 typedef struct aubio_mel_filter_ {
    31     int n_filters;
    32     smpl_t **filters;
     29/** \brief A structure to store a set of n_filters filters of lenghts win_s */
     30struct aubio_filterbank_t_ {
     31    uint_t win_s;
     32    uint_t n_filters;
     33    fvec_t *filters;
     36aubio_filterbank_t * new_aubio_filterbank(uint_t n_filters, uint_t win_s){
     38  int filter_cnt;
     39  /** allocating space for filterbank object */
     40  aubio_filterbank_t * fb = AUBIO_NEW(aubio_filterbank_t);
     41  fb->win_s=win_s;
     42  fb->n_filters=n_filters;
     44  /** allocating filter tables */
     45  fb->filters=AUBIO_ARRAY(n_filters,f_vec_t);
     46  for (filter_cnt=0; filter_cnt<n_filters; filter_cnt++)
     47    /* considering one-channel filters */
     48    filters[filter_cnt]=new_fvec(win_s, 1);
     52void del_aubio_filterbank(aubio_filterbank_t * fb){
     54  int filter_cnt;
     55  /** deleting filter tables first */
     56  for (filter_cnt=0; filter_cnt<fb->n_filters; filter_cnt++)
     57    del_fvec(fb->filters[filter_cnt]);
     58  AUBIO_FREE(fb->filters);
     59  AUBIO_FREE(fb);
    3563// Initialization
    37 int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max, int freq_bands, smpl_t **fft_tables){
     65void aubio_filterbank_mfcc_init(aubio_filterbank_t * fb, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max){
    3967    int n, i, k, *fft_peak, M, next_peak;
    4775    mel_freq_max = 1127 * log(1 + freq_max / 700);
    4876    mel_freq_min = 1127 * log(1 + freq_min / 700);
    49     freq_bw_mel = (mel_freq_max - mel_freq_min) / freq_bands;
    51     mel_peak = (smpl_t *)malloc((freq_bands + 2) * sizeof(smpl_t));
     77    freq_bw_mel = (mel_freq_max - mel_freq_min) / fb->n_filters;
     79    mel_peak = (smpl_t *)malloc((fb->n_filters + 2) * sizeof(smpl_t));
    5280    /* +2 for zeros at start and end */
    53     lin_peak = (smpl_t *)malloc((freq_bands + 2) * sizeof(smpl_t));
    54     fft_peak = (int *)malloc((freq_bands + 2) * sizeof(int));
    55     height_norm = (smpl_t *)malloc(freq_bands * sizeof(smpl_t));
     81    lin_peak = (smpl_t *)malloc((fb->n_filters + 2) * sizeof(smpl_t));
     82    fft_peak = (int *)malloc((fb->n_filters + 2) * sizeof(int));
     83    height_norm = (smpl_t *)malloc(fb->n_filters * sizeof(smpl_t));
    5785    if(mel_peak == NULL || height_norm == NULL ||
    5987                    return XTRACT_MALLOC_FAILED;
    61     M = N >> 1;
     89    M = fb->win_s >> 1;
    6391    mel_peak[0] = mel_freq_min;
    68     for (n = 1; n <= freq_bands; n++){ 
     96    for (n = 1; n <= fb->n_filters; n++){ 
    6997    /*roll out peak locations - mel, linear and linear on fft window scale */
    7098        mel_peak[n] = mel_peak[n - 1] + freq_bw_mel;
    73101    }
    75     for (n = 0; n < freq_bands; n++){
     103    for (n = 0; n < fb->n_filters; n++){
    76104        /*roll out normalised gain of each peak*/
    77105        if (style == XTRACT_EQUAL_GAIN){
    88116    i = 0;
    90     for(n = 0; n < freq_bands; n++){
     118    for(n = 0; n < fb->n_filters; n++){
    92120  /*calculate the rise increment*/
    99127  /*zero the start of the array*/
    100128  for(k = 0; k < i; k++)
    101      fft_tables[n][k] = 0.f;
     129     //fft_tables[n][k] = 0.f;
     130     fb->filters[n]->data[0][k]=0.f;
    103132  /*fill in the rise */
    104133        for(; i <= fft_peak[n]; i++){
    105             fft_tables[n][i] = val;
     134         // fft_tables[n][i] = val;
     135            fb->filters[n]->data[0][k]=val;
    106136            val += inc;
    107137        }
    115145  /*reverse fill the 'fall' */
    116146        for(i = next_peak; i > fft_peak[n]; i--){
    117             fft_tables[n][i] = val;
     147            //fft_tables[n][i] = val;
     148            fb->filters[n]->data[0][k]=val;
    118149            val += inc;
    119150        }
    121152  /*zero the rest of the array*/
    122   for(k = next_peak + 1; k < N; k++)
    123       fft_tables[n][k] = 0.f;
     153  for(k = next_peak + 1; k < fb->win_s; k++)
     154      //fft_tables[n][k] = 0.f;
     155      fb->filters[n]->data[0][k]=0.f;
    124156    }
    129161    free(fft_peak);
    131     return XTRACT_SUCCESS;
     163    //return XTRACT_SUCCESS;
     167//to be deleted code
     170// int aubio_mfcc_init(int N, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max, int freq_bands, smpl_t **fft_tables){
     172//     int n, i, k, *fft_peak, M, next_peak;
     173//     smpl_t norm, mel_freq_max, mel_freq_min, norm_fact, height, inc, val,
     174//         freq_bw_mel, *mel_peak, *height_norm, *lin_peak;
     176//     mel_peak = height_norm = lin_peak = NULL;
     177//     fft_peak = NULL;
     178//     norm = 1;
     180//     mel_freq_max = 1127 * log(1 + freq_max / 700);
     181//     mel_freq_min = 1127 * log(1 + freq_min / 700);
     182//     freq_bw_mel = (mel_freq_max - mel_freq_min) / freq_bands;
     184//     mel_peak = (smpl_t *)malloc((freq_bands + 2) * sizeof(smpl_t));
     185//     /* +2 for zeros at start and end */
     186//     lin_peak = (smpl_t *)malloc((freq_bands + 2) * sizeof(smpl_t));
     187//     fft_peak = (int *)malloc((freq_bands + 2) * sizeof(int));
     188//     height_norm = (smpl_t *)malloc(freq_bands * sizeof(smpl_t));
     190//     if(mel_peak == NULL || height_norm == NULL ||
     191//                     lin_peak == NULL || fft_peak == NULL)
     192//                     return XTRACT_MALLOC_FAILED;
     194//     M = N >> 1;
     196//     mel_peak[0] = mel_freq_min;
     197//     lin_peak[0] = 700 * (exp(mel_peak[0] / 1127) - 1);
     198//     fft_peak[0] = lin_peak[0] / nyquist * M;
     201//     for (n = 1; n <= freq_bands; n++){ 
     202//     /*roll out peak locations - mel, linear and linear on fft window scale */
     203//         mel_peak[n] = mel_peak[n - 1] + freq_bw_mel;
     204//         lin_peak[n] = 700 * (exp(mel_peak[n] / 1127) -1);
     205//         fft_peak[n] = lin_peak[n] / nyquist * M;
     206//     }
     208//     for (n = 0; n < freq_bands; n++){
     209//         /*roll out normalised gain of each peak*/
     210//         if (style == XTRACT_EQUAL_GAIN){
     211//             height = 1;
     212//             norm_fact = norm;
     213//         }
     214//         else{
     215//             height = 2 / (lin_peak[n + 2] - lin_peak[n]);
     216//             norm_fact = norm / (2 / (lin_peak[2] - lin_peak[0]));
     217//         }
     218//         height_norm[n] = height * norm_fact;
     219//     }
     221//     i = 0;
     223//     for(n = 0; n < freq_bands; n++){
     225//   /*calculate the rise increment*/
     226//         if(n > 0)
     227//             inc = height_norm[n] / (fft_peak[n] - fft_peak[n - 1]);
     228//         else
     229//             inc = height_norm[n] / fft_peak[n];
     230//         val = 0; 
     232//   /*zero the start of the array*/
     233//   for(k = 0; k < i; k++)
     234//      fft_tables[n][k] = 0.f;
     236//   /*fill in the rise */
     237//         for(; i <= fft_peak[n]; i++){
     238//             fft_tables[n][i] = val;
     239//             val += inc;
     240//         }
     242//         /*calculate the fall increment */
     243//         inc = height_norm[n] / (fft_peak[n + 1] - fft_peak[n]);
     245//         val = 0;
     246//   next_peak = fft_peak[n + 1];
     248//   /*reverse fill the 'fall' */
     249//         for(i = next_peak; i > fft_peak[n]; i--){
     250//             fft_tables[n][i] = val;
     251//             val += inc;
     252//         }
     254//   /*zero the rest of the array*/
     255//   for(k = next_peak + 1; k < N; k++)
     256//       fft_tables[n][k] = 0.f;
     257//     }
     259//     free(mel_peak);
     260//     free(lin_peak);
     261//     free(height_norm);
     262//     free(fft_peak);
     264//     return XTRACT_SUCCESS;
     266// }
  • TabularUnified src/filterbank.h

    rfdf39ba r8708556  
    22   Copyright (C) 2007 Amaury Hazan
    3    Ported to aubio from LibXtract
     3   adapted to aubio from LibXtract
     22/** \file
     24  Filterbank object
     26  General-purpose spectral filterbank object. Comes with mel-filter initialization function.
    2230#ifndef AUBIOFILTERBANK_H
    2331#define AUBIOFILTERBANK_H
    31 typedef struct aubio_mel_filter_ aubio_mel_filter;
     39typedef struct aubio_filterbank_t_ aubio_filterbank_t;
     41/** create filterbank object
     43  \param win_s size of analysis buffer (and length the FFT transform)
     44  \param n_filters number of filters to create
     48aubio_filterbank_t * new_aubio_filterbank(uint_t n_filters, uint_t win_s);
     50/** destroy filterbank object
     52  \param fb filterbank, as returned by new_aubio_filterbank method
     55void del_aubio_filterbank(aubio_filterbank_t * fb);
     57/** filterbank initialization for mel filters
     59  \param fb filterbank, as returned by new_aubio_filterbank method
     60  \param nyquist nyquist frequency, i.e. half of the sampling rate
     61  \param style libxtract style
     62  \param freqmin lowest filter frequency
     63  \param freqmax highest filter frequency
     66void aubio_filterbank_mfcc_init(aubio_filterbank_t * fb, smpl_t nyquist, int style, smpl_t freq_min, smpl_t freq_max);
    3368// Initialization
  • TabularUnified src/mfcc.c

    rfdf39ba r8708556  
    2424#include "sample.h"
    2525#include "fft.h"
     26#include "filterbank.h"
    2627#include "mfcc.h"
    2728#include "math.h"
    29 /*
    30 new_aubio_mfcc
    31 aubio_mfcc_do
    32 del_aubio_mfcc
    33 */
     32/** Internal structure for mfcc object **/
     34struct aubio_mfcc_t_{
     36  /** grain length */
     37  uint_t win_s;
     39  /** sample rate (needed?) */
     40  uint_t samplerate;
     42  /** number of channels */
     43  uint_t channels;
     45  /** filter bank */
     46  aubio_filterbank_t * fb;
     48  /** number of coefficients (= fb->n_filters/2 +1) */
     49  uint_t n_coefs;
     51  /** lowest frequency for filters */
     52  smpl_t lowfreq;
     54  /** highest frequency for filters */
     55  smpl_t highfreq;
     57  /** input buffer for dct * [fb->n_filters] */
     58  fvec_t * in_dct;
     60  /** fft object for dct */
     61  aubio_mfft_t * fft_dct;
     63  /** output buffer for dct */
     64  cvec_t * fftgrain_dct;
     69aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate ,uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels){
     72  /** allocating space for mfcc object */
     74  aubio_mfcc_t * mfcc = AUBIO_NEW(aubio_mfcc_t);
     76  mfcc->win_s=win_s;
     77  mfcc->samplerate=samplerate;
     78  mfcc->channels=channels;
     79  mfcc->n_coefs=n_coefs;
     80  mfcc->lowfreq=lowfreq;
     81  mfcc->highfreq=highfreq;
     83  /** filterbank allocation */
     84  //we need (n_coefs-1)*2 filters to obtain n_coefs coefficients after dct
     85  mfcc->fb=new_aubio_filterbank((n_coefs-1)*2, mfcc->win_s);
     87  /** allocating space for fft object (used for dct) */
     88  mfcc->fft_dct=new_aubio_mfft(mfcc->win_s, 1);
     90  /** allocating buffers */
     92  mfcc->in_dct=new_fvec(mfcc->win_s, 1);
     94  mfcc->fftgrain_dct=new_cvec(mfcc->fb->n_filters, 1);
     96  /** populating the filterbank */
     98  aubio_filterbank_mfcc_init(mfcc->fb, (mfcc->samplerate)/2, XTRACT_EQUAL_GAIN, mfcc->lowfreq, mfcc->highfreq);
     100  return mfcc;
     105void del_aubio_mfcc(aubio_mfcc_t *mf){
     107  /** deleting filterbank */
     108  del_aubio_filterbank(mf->fb);
     109  /** deleting mfft object */
     110  del_aubio_mfft(mf->fft_dct);
     111  /** deleting buffers */
     112  del_fvec(mf->in_dct);
     113  del_cvec(mf->fftgrain_dct);
     115  /** deleting mfcc object */
     116  AUBIO_FREE(mf);
    35121// Computation
    36 // Added last two arguments to be able to pass from example
    40 int aubio_mfcc_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t * fft_dct, cvec_t * fftgrain_dct){
    42     aubio_mel_filter *f;
    43     int n, filter;
    45     f = (aubio_mel_filter *)argv;
    47     for(filter = 0; filter < f->n_filters; filter++){
    48         result[filter] = 0.f;
    49         for(n = 0; n < N; n++){
    50             result[filter] += data[n] * f->filters[filter][n];
     123void aubio_mfcc_do(aubio_mfcc_t * mf, cvec_t *in, fvec_t *out){
     125    aubio_filterbank_t *f = mf->fb;
     126    uint_t n, filter_cnt;
     128    for(filter_cnt = 0; filter_cnt < f->n_filters; filter_cnt++){
     129        mf->in_dct->data[0][filter_cnt] = 0.f;
     130        for(n = 0; n < mf->win_s; n++){
     131            mf->in_dct->data[0][filter_cnt] += in->norm[0][n] * f->filters[filter_cnt]->data[0][n];
    51132        }
    52         result[filter] = LOG(result[filter] < XTRACT_LOG_LIMIT ? XTRACT_LOG_LIMIT : result[filter]);
     133        mf->in_dct->data[0][filter_cnt] = LOG(mf->in_dct->data[0][filter_cnt] < XTRACT_LOG_LIMIT ? XTRACT_LOG_LIMIT : mf->in_dct->data[0][filter_cnt]);
    53134    }
    55136    //TODO: check that zero padding
    56     for(n = filter + 1; n < N; n++) result[n] = 0;
    58     aubio_dct_do(result, f->n_filters, NULL, result, fft_dct, fftgrain_dct);
    60     return XTRACT_SUCCESS;
     137    // the following line seems useless since the in_dct buffer has the correct size
     138    //for(n = filter + 1; n < N; n++) result[n] = 0;
     140    aubio_dct_do(mf, mf->in_dct, out);
     142    //return XTRACT_SUCCESS;
    63 // Added last two arguments to be able to pass from example
    65 int aubio_dct_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t * fft_dct, cvec_t * fftgrain_dct){
    68     //call aubio p_voc in dct setting
    70     //TODO: fvec as input? Remove data length, N?
    72     fvec_t * momo = new_fvec(20, 1);
    73     momo->data = data;
     145void aubio_dct_do(aubio_mfcc_t * mf, fvec_t *in, fvec_t *out){
     149    //fvec_t * momo = new_fvec(20, 1);
     150    //momo->data = data;
    75152    //compute mag spectrum
    76     aubio_mfft_do (fft_dct, data, fftgrain_dct);
     153    aubio_mfft_do (mf->fft_dct, in, mf->fftgrain_dct);
    78155    int i;
    79156    //extract real part of fft grain
    80     for(i=0; i<N ;i++){
    81       result[i]= fftgrain_dct->norm[0][i]*COS(fftgrain_dct->phas[0][i]);
     157    for(i=0; i<mf->n_coefs ;i++){
     158      out->data[0][i]= mf->fftgrain_dct->norm[0][i]*COS(mf->fftgrain_dct->phas[0][i]);
    82159    }
    85     return XTRACT_SUCCESS;
     162    //return XTRACT_SUCCESS;
     166///////// OLD CODE
     168// int aubio_mfcc_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t * fft_dct, cvec_t * fftgrain_dct){
     170//     aubio_mel_filter *f;
     171//     uint_t n, filter;
     173//     f = (aubio_mel_filter *)argv;
     174//     printf("%d",f->n_filters);
     176//     for(filter = 0; filter < f->n_filters; filter++){
     177//         result[filter] = 0.f;
     178//         for(n = 0; n < N; n++){
     179//             result[filter] += data[n] * f->filters[filter][n];
     180//         }
     181//         result[filter] = LOG(result[filter] < XTRACT_LOG_LIMIT ? XTRACT_LOG_LIMIT : result[filter]);
     182//     }
     184//     //TODO: check that zero padding
     185//     for(n = filter + 1; n < N; n++) result[n] = 0;
     187//     aubio_dct_do(result, f->n_filters, NULL, result, fft_dct, fftgrain_dct);
     189//     return XTRACT_SUCCESS;
     190// }
     192// Added last two arguments to be able to pass from example
     194// int aubio_dct_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t * fft_dct, cvec_t * fftgrain_dct){
     197//     //call aubio p_voc in dct setting
     199//     //TODO: fvec as input? Remove data length, N?
     201//     fvec_t * momo = new_fvec(20, 1);
     202//     momo->data = data;
     204//     //compute mag spectrum
     205//     aubio_mfft_do (fft_dct, data, fftgrain_dct);
     207//     int i;
     208//     //extract real part of fft grain
     209//     for(i=0; i<N ;i++){
     210//       result[i]= fftgrain_dct->norm[0][i]*COS(fftgrain_dct->phas[0][i]);
     211//     }
     214//     return XTRACT_SUCCESS;
     215// }
  • TabularUnified src/mfcc.h

    rfdf39ba r8708556  
    152 // Computation
    154 /** \brief Extract Mel Frequency Cepstral Coefficients based on a method described by Rabiner
    155  *
    156  * \param *data: a pointer to the first element in an array of spectral magnitudes, e.g. the first half of the array pointed to by *resul from xtract_spectrum()
    157  * \param N: the number of array elements to be considered
    158  * \param *argv: a pointer to a data structure of type xtract_mel_filter, containing n_filters coefficient tables to make up a mel-spaced filterbank
    159  * \param *result: a pointer to an array containing the resultant MFCC
    160  *
    161  * The data structure pointed to by *argv must be obtained by first calling xtract_init_mfcc
    162  */
     150typedef struct aubio_mfcc_t_ aubio_mfcc_t;
     152// Creation
     154/** create mfcc object
     156  \param win_s size of analysis buffer (and length the FFT transform)
     157  \param samplerate
     158  \param n_coefs: number of desired coefs
     159  \param lowfreq: lowest frequency to use in filterbank
     160  \param highfreq highest frequency to use in filterbank
     161  \param channels number of channels
     164aubio_mfcc_t * new_aubio_mfcc (uint_t win_s, uint_t samplerate ,uint_t n_coefs, smpl_t lowfreq, smpl_t highfreq, uint_t channels);
     166// Deletion
     168/** delete mfcc object
     170  \param mf mfcc object as returned by new_aubio_mfcc
     173void del_aubio_mfcc(aubio_mfcc_t *mf);
     175// Process
     177/** mfcc object processing
     179  \param mf mfcc object as returned by new_aubio_mfcc
     180  \param in input spectrum (win_s long)
     181  \param out output mel coefficients buffer (n_filters/2 +1 long)
     185void aubio_mfcc_do(aubio_mfcc_t * mf, cvec_t *in, fvec_t *out);
     187/** intermediate dct involved in aubio_mfcc_do
     189  \param mf mfcc object as returned by new_aubio_mfcc
     190  \param in input spectrum (n_filters long)
     191  \param out output mel coefficients buffer (n_filters/2 +1 long)
     195void aubio_dct_do(aubio_mfcc_t * mf, fvec_t *in, fvec_t *out);
     200//old code
    165204int aubio_mfcc_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t *fft_dct, cvec_t *fftgrain_dct);
    167 /** \brief Extract the Discrete Cosine transform of a time domain signal
    168  * \param *data: a pointer to the first element in an array of floats representing an audio vector
    169  * \param N: the number of array elements to be considered
    170  * \param *argv: a pointer to NULL
    171  * \param *result: a pointer to an array containing resultant dct coefficients
    172  */
    173 int aubio_dct_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t *fft_dct, cvec_t *fftgrain_dct);
     206int aubio_dct_do(const float *data, const int N, const void *argv, float *result, aubio_mfft_t *fft_dct, cvec_t *fftgrain_dct);*/
    175211#ifdef __cplusplus
Note: See TracChangeset for help on using the changeset viewer.