Changeset 986131d for src/spectral/fft.c


Ignore:
Timestamp:
Jul 29, 2017, 5:55:35 PM (7 years ago)
Author:
Eduard Müller <mueller.eduard@googlemail.com>
Branches:
feature/autosink, feature/cnn, feature/cnn_org, feature/constantq, feature/crepe, feature/crepe_org, feature/pitchshift, feature/pydocstrings, feature/timestretch, fix/ffmpeg5, master
Children:
7100895
Parents:
34ce715
Message:

Intel IPP support for aubio

See emuell/aubio/ intel_ipp2 for details please

File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/spectral/fft.c

    r34ce715 r986131d  
    7878pthread_mutex_t aubio_fftw_mutex = PTHREAD_MUTEX_INITIALIZER;
    7979
    80 #else
    81 #ifdef HAVE_ACCELERATE        // using ACCELERATE
     80#elif defined HAVE_ACCELERATE        // using ACCELERATE
    8281// https://developer.apple.com/library/mac/#documentation/Accelerate/Reference/vDSPRef/Reference/reference.html
    8382#include <Accelerate/Accelerate.h>
     
    113112#endif /* HAVE_AUBIO_DOUBLE */
    114113
    115 #else                         // using OOURA
     114#elif defined HAVE_INTEL_IPP // using INTEL IPP
     115
     116#include <ippcore.h>
     117#include <ippvm.h>
     118#include <ipps.h>
     119
     120#else // using OOURA
    116121// let's use ooura instead
    117122extern void aubio_ooura_rdft(int, int, smpl_t *, int *, smpl_t *);
    118123
    119 #endif /* HAVE_ACCELERATE */
    120 #endif /* HAVE_FFTW3 */
     124#endif
    121125
    122126struct _aubio_fft_t {
    123127  uint_t winsize;
    124128  uint_t fft_size;
     129
    125130#ifdef HAVE_FFTW3             // using FFTW3
    126131  real_t *in, *out;
    127132  fftw_plan pfw, pbw;
    128   fft_data_t * specdata;      /* complex spectral data */
    129 #else
    130 #ifdef HAVE_ACCELERATE        // using ACCELERATE
     133  fft_data_t * specdata; /* complex spectral data */
     134
     135#elif defined HAVE_ACCELERATE  // using ACCELERATE
    131136  int log2fftsize;
    132137  aubio_FFTSetup fftSetup;
    133138  aubio_DSPSplitComplex spec;
    134139  smpl_t *in, *out;
     140 
     141#elif defined HAVE_INTEL_IPP  // using Intel IPP
     142  // mark FFT impl as Intel IPP
     143  #define INTEL_IPP_FFT 1
     144  smpl_t *in, *out;
     145  Ipp8u* memSpec;
     146  Ipp8u* memInit;
     147  Ipp8u* memBuffer;
     148  #if HAVE_AUBIO_DOUBLE
     149    struct FFTSpec_R_64f* fftSpec;
     150    Ipp64fc* complexOut;
     151  #else
     152    struct FFTSpec_R_32f* fftSpec;
     153    Ipp32fc* complexOut;
     154  #endif
    135155#else                         // using OOURA
    136156  smpl_t *in, *out;
    137157  smpl_t *w;
    138158  int *ip;
    139 #endif /* HAVE_ACCELERATE */
    140 #endif /* HAVE_FFTW3 */
     159#endif /* using OOURA */
     160
    141161  fvec_t * compspec;
    142162};
     
    148168    goto beach;
    149169  }
     170
    150171#ifdef HAVE_FFTW3
    151172  uint_t i;
     
    176197    s->specdata[i] = 0.;
    177198  }
    178 #else
    179 #ifdef HAVE_ACCELERATE        // using ACCELERATE
     199
     200#elif defined HAVE_ACCELERATE  // using ACCELERATE
    180201  s->winsize = winsize;
    181202  s->fft_size = winsize;
    182203  s->compspec = new_fvec(winsize);
    183   s->log2fftsize = (uint_t)log2f(s->fft_size);
     204  s->log2fftsize = aubio_power_of_two_order(s->fft_size);
    184205  s->in = AUBIO_ARRAY(smpl_t, s->fft_size);
    185206  s->out = AUBIO_ARRAY(smpl_t, s->fft_size);
     
    187208  s->spec.imagp = AUBIO_ARRAY(smpl_t, s->fft_size/2);
    188209  s->fftSetup = aubio_vDSP_create_fftsetup(s->log2fftsize, FFT_RADIX2);
     210
     211#elif defined HAVE_INTEL_IPP  // using Intel IPP
     212  const IppHintAlgorithm qualityHint = ippAlgHintAccurate; // OR ippAlgHintFast;
     213  const int flags = IPP_FFT_NODIV_BY_ANY; // we're scaling manually afterwards
     214  int order = aubio_power_of_two_order(winsize);
     215  int sizeSpec, sizeInit, sizeBuffer;
     216  IppStatus status;
     217
     218  if (winsize <= 4 || aubio_is_power_of_two(winsize) != 1)
     219  {
     220    AUBIO_ERR("intel IPP fft: can only create with sizes > 4 and power of two, requested %d,"
     221      " try recompiling aubio with --enable-fftw3\n", winsize);
     222    goto beach;
     223  }
     224
     225#if HAVE_AUBIO_DOUBLE
     226  status = ippsFFTGetSize_R_64f(order, flags, qualityHint,
     227      &sizeSpec, &sizeInit, &sizeBuffer);
     228#else
     229  status = ippsFFTGetSize_R_32f(order, flags, qualityHint,
     230    &sizeSpec, &sizeInit, &sizeBuffer);
     231#endif
     232  if (status != ippStsNoErr) {
     233    AUBIO_ERR("fft: failed to initialize fft. IPP error: %d\n", status);
     234    goto beach;
     235  }
     236  s->fft_size = s->winsize = winsize;
     237  s->compspec = new_fvec(winsize);
     238  s->in = AUBIO_ARRAY(smpl_t, s->winsize);
     239  s->out = AUBIO_ARRAY(smpl_t, s->winsize);
     240  s->memSpec = ippsMalloc_8u(sizeSpec);
     241  s->memBuffer = ippsMalloc_8u(sizeBuffer);
     242  if (sizeInit > 0 ) {
     243    s->memInit = ippsMalloc_8u(sizeInit);
     244  }
     245#if HAVE_AUBIO_DOUBLE
     246  s->complexOut = ippsMalloc_64fc(s->fft_size / 2 + 1);
     247  status = ippsFFTInit_R_64f(
     248    &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);
     249#else
     250  s->complexOut = ippsMalloc_32fc(s->fft_size / 2 + 1);
     251  status = ippsFFTInit_R_32f(
     252    &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);
     253#endif
     254  if (status != ippStsNoErr) {
     255    AUBIO_ERR("fft: failed to initialize. IPP error: %d\n", status);
     256    goto beach;
     257  }
     258
    189259#else                         // using OOURA
    190260  if (aubio_is_power_of_two(winsize) != 1) {
     
    201271  s->w     = AUBIO_ARRAY(smpl_t, s->fft_size);
    202272  s->ip[0] = 0;
    203 #endif /* HAVE_ACCELERATE */
    204 #endif /* HAVE_FFTW3 */
     273#endif /* using OOURA */
     274
    205275  return s;
     276
    206277beach:
    207278  AUBIO_FREE(s);
     
    211282void del_aubio_fft(aubio_fft_t * s) {
    212283  /* destroy data */
    213   del_fvec(s->compspec);
    214284#ifdef HAVE_FFTW3             // using FFTW3
    215285  pthread_mutex_lock(&aubio_fftw_mutex);
     
    218288  fftw_free(s->specdata);
    219289  pthread_mutex_unlock(&aubio_fftw_mutex);
    220 #else /* HAVE_FFTW3 */
    221 #ifdef HAVE_ACCELERATE        // using ACCELERATE
     290
     291#elif defined HAVE_ACCELERATE // using ACCELERATE
    222292  AUBIO_FREE(s->spec.realp);
    223293  AUBIO_FREE(s->spec.imagp);
    224294  aubio_vDSP_destroy_fftsetup(s->fftSetup);
     295
     296#elif defined HAVE_INTEL_IPP  // using Intel IPP
     297  ippFree(s->memSpec);
     298  ippFree(s->memInit);
     299  ippFree(s->memBuffer);
     300  ippFree(s->complexOut);
     301
    225302#else                         // using OOURA
    226303  AUBIO_FREE(s->w);
    227304  AUBIO_FREE(s->ip);
    228 #endif /* HAVE_ACCELERATE */
    229 #endif /* HAVE_FFTW3 */
     305#endif
     306
     307  del_fvec(s->compspec);
     308  AUBIO_FREE(s->in);
    230309  AUBIO_FREE(s->out);
    231   AUBIO_FREE(s->in);
    232310  AUBIO_FREE(s);
    233311}
     
    235313void aubio_fft_do(aubio_fft_t * s, const fvec_t * input, cvec_t * spectrum) {
    236314  aubio_fft_do_complex(s, input, s->compspec);
    237   aubio_fft_get_spectrum(s->compspec, spectrum);
     315  aubio_fft_get_spectrum(s, s->compspec, spectrum);
    238316}
    239317
    240318void aubio_fft_rdo(aubio_fft_t * s, const cvec_t * spectrum, fvec_t * output) {
    241   aubio_fft_get_realimag(spectrum, s->compspec);
     319  aubio_fft_get_realimag(s, spectrum, s->compspec);
    242320  aubio_fft_rdo_complex(s, s->compspec, output);
    243321}
     
    252330  memcpy(s->in, input->data, s->winsize * sizeof(smpl_t));
    253331#endif /* HAVE_MEMCPY_HACKS */
     332
    254333#ifdef HAVE_FFTW3             // using FFTW3
    255334  fftw_execute(s->pfw);
     
    266345  }
    267346#endif /* HAVE_COMPLEX_H */
    268 #else /* HAVE_FFTW3 */
    269 #ifdef HAVE_ACCELERATE        // using ACCELERATE
     347
     348#elif defined HAVE_ACCELERATE // using ACCELERATE
    270349  // convert real data to even/odd format used in vDSP
    271350  aubio_vDSP_ctoz((aubio_DSPComplex*)s->in, 2, &s->spec, 1, s->fft_size/2);
     
    282361  smpl_t scale = 1./2.;
    283362  aubio_vDSP_vsmul(compspec->data, 1, &scale, compspec->data, 1, s->fft_size);
     363
     364#elif defined HAVE_INTEL_IPP  // using Intel IPP
     365
     366  // apply fft
     367#if HAVE_AUBIO_DOUBLE
     368  ippsFFTFwd_RToCCS_64f(s->in, (Ipp64f*)s->complexOut, s->fftSpec, s->memBuffer);
     369#else
     370  ippsFFTFwd_RToCCS_32f(s->in, (Ipp32f*)s->complexOut, s->fftSpec, s->memBuffer);
     371#endif
     372  // convert complex buffer to [ r0, r1, ..., rN, iN-1, .., i2, i1]
     373  compspec->data[0] = s->complexOut[0].re;
     374  compspec->data[s->fft_size / 2] = s->complexOut[s->fft_size / 2].re;
     375  for (i = 1; i < s->fft_size / 2; i++) {
     376    compspec->data[i] = s->complexOut[i].re;
     377    compspec->data[s->fft_size - i] = s->complexOut[i].im;
     378  }
     379  // apply scaling
     380#if HAVE_AUBIO_DOUBLE
     381  ippsMulC_64f(compspec->data, 1.0 / 2.0, compspec->data, s->fft_size);
     382#else
     383  ippsMulC_32f(compspec->data, 1.0 / 2.0, compspec->data, s->fft_size);
     384#endif
     385
    284386#else                         // using OOURA
    285387  aubio_ooura_rdft(s->winsize, 1, s->in, s->ip, s->w);
     
    290392    compspec->data[s->winsize - i] = - s->in[2 * i + 1];
    291393  }
    292 #endif /* HAVE_ACCELERATE */
    293 #endif /* HAVE_FFTW3 */
     394#endif /* using OOURA */
    294395}
    295396
     
    314415    output->data[i] = s->out[i]*renorm;
    315416  }
    316 #else /* HAVE_FFTW3 */
    317 #ifdef HAVE_ACCELERATE        // using ACCELERATE
     417
     418#elif defined HAVE_ACCELERATE // using ACCELERATE
    318419  // convert from real imag  [ r0, r1, ..., rN, iN-1, .., i2, i1]
    319420  // to vDSP packed format   [ r0, rN, r1, i1, ..., rN-1, iN-1 ]
     
    333434  smpl_t scale = 1.0 / s->winsize;
    334435  aubio_vDSP_vsmul(output->data, 1, &scale, output->data, 1, s->fft_size);
     436
     437#elif defined HAVE_INTEL_IPP  // using Intel IPP
     438
     439  // convert from real imag  [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format
     440  s->complexOut[0].re = compspec->data[0];
     441  s->complexOut[0].im = 0;
     442  s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];
     443  s->complexOut[s->fft_size / 2].im = 0.0;
     444  for (i = 1; i < s->fft_size / 2; i++) {
     445    s->complexOut[i].re = compspec->data[i];
     446    s->complexOut[i].im = compspec->data[s->fft_size - i];
     447  }
     448#if HAVE_AUBIO_DOUBLE
     449  // apply fft
     450  ippsFFTInv_CCSToR_64f((const Ipp64f *)s->complexOut, output->data, s->fftSpec, s->memBuffer);
     451  // apply scaling
     452  ippsMulC_64f(output->data, 1.0 / s->winsize, output->data, s->fft_size);
     453#else
     454  // apply fft
     455  ippsFFTInv_CCSToR_32f((const Ipp32f *)s->complexOut, output->data, s->fftSpec, s->memBuffer);
     456  // apply scaling
     457  ippsMulC_32f(output->data, 1.0f / s->winsize, output->data, s->fft_size);
     458#endif /* HAVE_AUBIO_DOUBLE */
     459
    335460#else                         // using OOURA
    336   smpl_t scale = 2.0 / s->winsize;
     461  smpl_t scale = 1.0 / s->winsize;
    337462  s->out[0] = compspec->data[0];
    338463  s->out[1] = compspec->data[s->winsize / 2];
     
    345470    output->data[i] = s->out[i] * scale;
    346471  }
    347 #endif /* HAVE_ACCELERATE */
    348 #endif /* HAVE_FFTW3 */
    349 }
    350 
    351 void aubio_fft_get_spectrum(const fvec_t * compspec, cvec_t * spectrum) {
    352   aubio_fft_get_phas(compspec, spectrum);
    353   aubio_fft_get_norm(compspec, spectrum);
    354 }
    355 
    356 void aubio_fft_get_realimag(const cvec_t * spectrum, fvec_t * compspec) {
    357   aubio_fft_get_imag(spectrum, compspec);
    358   aubio_fft_get_real(spectrum, compspec);
    359 }
    360 
    361 void aubio_fft_get_phas(const fvec_t * compspec, cvec_t * spectrum) {
     472#endif
     473}
     474
     475void aubio_fft_get_spectrum(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
     476  aubio_fft_get_phas(s, compspec, spectrum);
     477  aubio_fft_get_norm(s, compspec, spectrum);
     478}
     479
     480void aubio_fft_get_realimag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
     481  aubio_fft_get_imag(s, spectrum, compspec);
     482  aubio_fft_get_real(s, spectrum, compspec);
     483}
     484
     485void aubio_fft_get_phas(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
     486
     487#ifdef INTEL_IPP_FFT // using Intel IPP FFT
     488  uint_t i;
     489 
     490  // convert from real imag  [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format
     491  s->complexOut[0].re = compspec->data[0];
     492  s->complexOut[0].im = 0;
     493  s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];
     494  s->complexOut[s->fft_size / 2].im = 0.0;
     495  for (i = 1; i < spectrum->length - 1; i++) {
     496    s->complexOut[i].re = compspec->data[i];
     497    s->complexOut[i].im = compspec->data[compspec->length - i];
     498  }
     499 
     500#if HAVE_AUBIO_DOUBLE
     501  IppStatus status = ippsPhase_64fc(s->complexOut, spectrum->phas, spectrum->length);
     502#else
     503  IppStatus status = ippsPhase_32fc(s->complexOut, spectrum->phas, spectrum->length);
     504#endif
     505  if (status != ippStsNoErr) {
     506    AUBIO_ERR("fft: failed to extract phase from fft. IPP error: %d\n", status);
     507  }
     508
     509#else                 // NOT using Intel IPP
    362510  uint_t i;
    363511  if (compspec->data[0] < 0) {
     
    375523    spectrum->phas[spectrum->length - 1] = 0.;
    376524  }
    377 }
    378 
    379 void aubio_fft_get_norm(const fvec_t * compspec, cvec_t * spectrum) {
     525#endif
     526}
     527
     528void aubio_fft_get_norm(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
    380529  uint_t i = 0;
    381530  spectrum->norm[0] = ABS(compspec->data[0]);
     
    388537}
    389538
    390 void aubio_fft_get_imag(const cvec_t * spectrum, fvec_t * compspec) {
     539void aubio_fft_get_imag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
    391540  uint_t i;
    392541  for (i = 1; i < ( compspec->length + 1 ) / 2 /*- 1 + 1*/; i++) {
     
    396545}
    397546
    398 void aubio_fft_get_real(const cvec_t * spectrum, fvec_t * compspec) {
     547void aubio_fft_get_real(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
    399548  uint_t i;
    400549  for (i = 0; i < compspec->length / 2 + 1; i++) {
Note: See TracChangeset for help on using the changeset viewer.