Context Navigation

← Previous Change
Next Change →

Changeset 986131d for src

Timestamp:

Jul 29, 2017, 5:55:35 PM (8 years ago)

Author:

Eduard Müller <mueller.eduard@googlemail.com>

Branches:

feature/autosink, feature/cnn, feature/cnn_org, feature/constantq, feature/crepe, feature/crepe_org, feature/pitchshift, feature/pydocstrings, feature/timestretch, fix/applefworks, fix/ffmpeg5, master

Children:

Parents:

Message:

Intel IPP support for aubio

See emuell/aubio/ intel_ipp2 for details please

Location:

Files:

: 7 edited

cvec.c (modified) (5 diffs)
fvec.c (modified) (4 diffs)
mathutils.c (modified) (6 diffs)
mathutils.h (modified) (1 diff)
musicutils.h (modified) (1 diff)
spectral/fft.c (modified) (19 diffs)
spectral/fft.h (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

src/cvec.c

-                      r34ce715
+                      r986131d
 #include "aubio_priv.h"
 #include "cvec.h"
+#if defined HAVE_INTEL_IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#endif
 cvec_t * new_cvec(uint_t length) {
 …
     return;
+  }
+#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsCopy_64f(s->phas, t->phas, (int)s->length);
+    ippsCopy_64f(s->norm, t->norm, (int)s->length);
+  #else
+    ippsCopy_32f(s->phas, t->phas, (int)s->length);
+    ippsCopy_32f(s->norm, t->norm, (int)s->length);
+  #endif
+#elif defined(HAVE_MEMCPY_HACKS)
   memcpy(t->norm, s->norm, t->length * sizeof(smpl_t));
   memcpy(t->phas, s->phas, t->length * sizeof(smpl_t));
 #else /* HAVE_MEMCPY_HACKS */
+#else
   uint_t j;
   for (j=0; j< t->length; j++) {
 …
     t->phas[j] = s->phas[j];
+  }
+#endif /* HAVE_MEMCPY_HACKS */
+}
+void cvec_norm_set_all (cvec_t *s, smpl_t val) {
+#endif
+}
+void cvec_norm_set_all(cvec_t *s, smpl_t val) {
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsSet_64f(val, s->norm, (int)s->length);
+  #else
+    ippsSet_32f(val, s->norm, (int)s->length);
+  #endif
+#else
   uint_t j;
   for (j=0; j< s->length; j++) {
     s->norm[j] = val;
+  }
+#endif
+}
 void cvec_norm_zeros(cvec_t *s) {
+#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsZero_64f(s->norm, (int)s->length);
+  #else
+    ippsZero_32f(s->norm, (int)s->length);
+  #endif
+#elif defined(HAVE_MEMCPY_HACKS)
   memset(s->norm, 0, s->length * sizeof(smpl_t));
 #else /* HAVE_MEMCPY_HACKS */
+#else
   cvec_norm_set_all (s, 0.);
 #endif /* HAVE_MEMCPY_HACKS */
+#endif
+}
 …
 void cvec_phas_set_all (cvec_t *s, smpl_t val) {
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsSet_64f(val, s->phas, (int)s->length);
+  #else
+    ippsSet_32f(val, s->phas, (int)s->length);
+  #endif
+#else
   uint_t j;
   for (j=0; j< s->length; j++) {
     s->phas[j] = val;
+  }
+#endif
+}
 void cvec_phas_zeros(cvec_t *s) {
+#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsZero_64f(s->phas, (int)s->length);
+  #else
+    ippsZero_32f(s->phas, (int)s->length);
+  #endif
+#elif defined(HAVE_MEMCPY_HACKS)
   memset(s->phas, 0, s->length * sizeof(smpl_t));
 #else
 …
 void cvec_logmag(cvec_t *s, smpl_t lambda) {
+  uint_t j;
+  for (j=0; j< s->length; j++) {
+    s->norm[j] = LOG(lambda * s->norm[j] + 1);
+  }
+}
+  #if defined(HAVE_INTEL_IPP)
+    #if HAVE_AUBIO_DOUBLE
+      ippsMulC_64f(s->norm, lambda, s->norm, (int)s->length);
+      ippsAddC_64f(s->norm, 1.0, s->norm, (int)s->length);
+      ippsLn_64f_A26(s->norm, s->norm, (int)s->length);
+    #else
+      ippsMulC_32f(s->norm, lambda, s->norm, (int)s->length);
+      ippsAddC_32f(s->norm, 1.0, s->norm, (int)s->length);
+      ippsLn_32f_A21(s->norm, s->norm, (int)s->length);
+    #endif
+  #else
+    uint_t j;
+    for (j=0; j< s->length; j++) {
+      s->norm[j] = LOG(lambda * s->norm[j] + 1);
+    }
+  #endif
+}

src/fvec.c

-                      r34ce715
+                      r986131d
 #include "aubio_priv.h"
 #include "fvec.h"
+#if defined HAVE_INTEL_IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#endif
 fvec_t * new_fvec(uint_t length) {
 …
 void fvec_set_all (fvec_t *s, smpl_t val) {
+#if !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS)
+  uint_t j;
+  for (j=0; j< s->length; j++) {
+    s->data[j] = val;
+  }
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsSet_64f(val, s->data, (int)s->length);
+  #else
+    ippsSet_32f(val, s->data, (int)s->length);
+  #endif
 #elif defined(HAVE_ATLAS)
   aubio_catlas_set(s->length, val, s->data, 1);
 #elif defined(HAVE_ACCELERATE)
   aubio_vDSP_vfill(&val, s->data, 1, s->length);
+#else
+  uint_t j;
+  for ( j = 0; j< s->length; j++ )
+  {
+    s->data[j] = val;
+  }
 #endif
+}
 void fvec_zeros(fvec_t *s) {
+#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE)
+  fvec_set_all (s, 0.);
+#else
+#if defined(HAVE_MEMCPY_HACKS)
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsZero_64f(s->data, (int)s->length);
+  #else
+    ippsZero_32f(s->data, (int)s->length);
+  #endif
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_vclr(s->data, 1, s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
   memset(s->data, 0, s->length * sizeof(smpl_t));
 #else
+  aubio_vDSP_vclr(s->data, 1, s->length);
+#endif
+  fvec_set_all(s, 0.);
 #endif
+}
 …
 void fvec_weight(fvec_t *s, const fvec_t *weight) {
+#ifndef HAVE_ACCELERATE
+  uint_t length = MIN(s->length, weight->length);
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsMul_64f(s->data, weight->data, s->data, (int)length);
+  #else
+    ippsMul_32f(s->data, weight->data, s->data, (int)length);
+  #endif
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_vmul( s->data, 1, weight->data, 1, s->data, 1, length );
+#else
   uint_t j;
+  uint_t length = MIN(s->length, weight->length);
+  for (j=0; j< length; j++) {
+  for (j = 0; j < length; j++) {
     s->data[j] *= weight->data[j];
+  }
-#else
-  aubio_vDSP_vmul(s->data, 1, weight->data, 1, s->data, 1, s->length);
 #endif /* HAVE_ACCELERATE */
+}
 void fvec_weighted_copy(const fvec_t *in, const fvec_t *weight, fvec_t *out) {
+#ifndef HAVE_ACCELERATE
+  uint_t length = MIN(in->length, MIN(out->length, weight->length));
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsMul_64f(in->data, weight->data, out->data, (int)length);
+  #else
+    ippsMul_32f(in->data, weight->data, out->data, (int)length);
+  #endif
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, length);
+#else
   uint_t j;
+  uint_t length = MIN(out->length, weight->length);
+  for (j=0; j< length; j++) {
+  for (j = 0; j < length; j++) {
     out->data[j] = in->data[j] * weight->data[j];
+  }
+#else
+  aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, out->length);
+#endif /* HAVE_ACCELERATE */
+#endif
+}
 …
     return;
+  }
+#ifdef HAVE_NOOPT
+  uint_t j;
+  for (j=0; j< t->length; j++) {
+    t->data[j] = s->data[j];
+  }
+#elif defined(HAVE_MEMCPY_HACKS)
+  memcpy(t->data, s->data, t->length * sizeof(smpl_t));
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsCopy_64f(s->data, t->data, (int)s->length);
+  #else
+    ippsCopy_32f(s->data, t->data, (int)s->length);
+  #endif
 #elif defined(HAVE_ATLAS)
   aubio_cblas_copy(s->length, s->data, 1, t->data, 1);
 #elif defined(HAVE_ACCELERATE)
   aubio_vDSP_mmov(s->data, t->data, 1, s->length, 1, 1);
+#elif defined(HAVE_MEMCPY_HACKS)
+  memcpy(t->data, s->data, t->length * sizeof(smpl_t));
+#else
+  uint_t j;
+  for (j = 0; j < t->length; j++) {
+    t->data[j] = s->data[j];
+  }
 #endif
+}

src/mathutils.c

-                      r34ce715
+                      r986131d
 #include "mathutils.h"
 #include "musicutils.h"
+#if defined HAVE_INTEL_IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#endif
 /** Window types */
 …
+{
   smpl_t tmp = 0.0;
+#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsMean_64f(s->data, (int)s->length, &tmp);
+  #else
+    ippsMean_32f(s->data, (int)s->length, &tmp, ippAlgHintFast);
+  #endif
+    return tmp;
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_meanv(s->data, 1, &tmp, s->length);
+  return tmp;
+#else
   uint_t j;
   for (j = 0; j < s->length; j++) {
     tmp += s->data[j];
+  }
+  return tmp / (smpl_t) (s->length);
+#else
+  aubio_vDSP_meanv(s->data, 1, &tmp, s->length);
+  return tmp;
+#endif /* HAVE_ACCELERATE */
+  return tmp / (smpl_t)(s->length);
+#endif
+}
 …
+{
   smpl_t tmp = 0.0;
+#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  #if HAVE_AUBIO_DOUBLE
+    ippsSum_64f(s->data, (int)s->length, &tmp);
+  #else
+    ippsSum_32f(s->data, (int)s->length, &tmp, ippAlgHintFast);
+  #endif
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_sve(s->data, 1, &tmp, s->length);
+#else
   uint_t j;
   for (j = 0; j < s->length; j++) {
     tmp += s->data[j];
+  }
+#else
+  aubio_vDSP_sve(s->data, 1, &tmp, s->length);
+#endif /* HAVE_ACCELERATE */
+#endif
   return tmp;
+}
 …
 fvec_max (fvec_t * s)
+{
+#ifndef HAVE_ACCELERATE
+  uint_t j;
+  smpl_t tmp = 0.0;
+  for (j = 0; j < s->length; j++) {
+#if defined(HAVE_INTEL_IPP)
+  smpl_t tmp = 0.;
+  #if HAVE_AUBIO_DOUBLE
+    ippsMax_64f( s->data, (int)s->length, &tmp);
+  #else
+    ippsMax_32f( s->data, (int)s->length, &tmp);
+#endif
+#elif defined(HAVE_ACCELERATE)
+  smpl_t tmp = 0.;
+  aubio_vDSP_maxv( s->data, 1, &tmp, s->length );
+#else
+  uint_t j;
+  smpl_t tmp = s->data[0];
+  for (j = 1; j < s->length; j++) {
     tmp = (tmp > s->data[j]) ? tmp : s->data[j];
+  }
+#else
+#endif
+  return tmp;
+}
+smpl_t
+fvec_min (fvec_t * s)
+{
+#if defined(HAVE_INTEL_IPP)
   smpl_t tmp = 0.;
+  aubio_vDSP_maxv(s->data, 1, &tmp, s->length);
+#endif
+  return tmp;
+}
+smpl_t
+fvec_min (fvec_t * s)
+{
+#ifndef HAVE_ACCELERATE
+  uint_t j;
+  smpl_t tmp = s->data[0];
+  for (j = 0; j < s->length; j++) {
+    tmp = (tmp < s->data[j]) ? tmp : s->data[j];
+  }
+#else
+  #if HAVE_AUBIO_DOUBLE
+    ippsMin_64f(s->data, (int)s->length, &tmp);
+  #else
+    ippsMin_32f(s->data, (int)s->length, &tmp);
+#endif
+#elif defined(HAVE_ACCELERATE)
   smpl_t tmp = 0.;
   aubio_vDSP_minv(s->data, 1, &tmp, s->length);
+#else
+  uint_t j;
+  smpl_t tmp = s->data[0];
+  for (j = 1; j < s->length; j++) {
+    tmp = (tmp < s->data[j]) ? tmp : s->data[j];
+  }
 #endif
   return tmp;
 …
+}
+uint_t
+aubio_power_of_two_order (uint_t a)
+{
+  int order = 0;
+  int temp = aubio_next_power_of_two(a);
+  while (temp >>= 1) {
+    ++order;
+  }
+  return order;
+}
 smpl_t
 aubio_db_spl (const fvec_t * o)
 …
 void
+aubio_init (void)
+{
+/* initialize intel IPP */
+#ifdef HAVE_INTEL_IPP
+  IppStatus status = ippInit();
+  if (status != ippStsNoErr) {
+    fprintf (stderr, "Error: failed to initialize Intel IPP - status %d\n", status);
+  }
+#endif
+}
+void
 aubio_cleanup (void)
+{

src/mathutils.h

-                      r34ce715
+                      r986131d
 uint_t aubio_next_power_of_two(uint_t a);
+/** return the log2 factor of the given power of 2 value a */
+uint_t aubio_power_of_two_order(uint_t a);
 /** compute normalised autocorrelation function

src/musicutils.h

-                      r34ce715
+                      r986131d
 smpl_t aubio_miditofreq (smpl_t midi);
+/** initialize global status at beginning of program
+  This function should be used before doing anything else in aubio.
+  So far it is only used to initialize the Intel IPP library, when it's used.
+*/
+void aubio_init (void);
 /** clean up cached memory at the end of program

src/spectral/fft.c

-                      r34ce715
+                      r986131d
 pthread_mutex_t aubio_fftw_mutex = PTHREAD_MUTEX_INITIALIZER;
+#else
+#ifdef HAVE_ACCELERATE        // using ACCELERATE
+#elif defined HAVE_ACCELERATE        // using ACCELERATE
 // https://developer.apple.com/library/mac/#documentation/Accelerate/Reference/vDSPRef/Reference/reference.html
 #include <Accelerate/Accelerate.h>
 …
 #endif /* HAVE_AUBIO_DOUBLE */
+#else                         // using OOURA
+#elif defined HAVE_INTEL_IPP // using INTEL IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#else // using OOURA
 // let's use ooura instead
 extern void aubio_ooura_rdft(int, int, smpl_t *, int *, smpl_t *);
+#endif /* HAVE_ACCELERATE */
+#endif /* HAVE_FFTW3 */
+#endif
 struct _aubio_fft_t {
   uint_t winsize;
   uint_t fft_size;
 #ifdef HAVE_FFTW3             // using FFTW3
   real_t *in, *out;
   fftw_plan pfw, pbw;
   fft_data_t * specdata;      /* complex spectral data */
+#else
 #ifdef HAVE_ACCELERATE        // using ACCELERATE
+  fft_data_t * specdata; /* complex spectral data */
+#elif defined HAVE_ACCELERATE  // using ACCELERATE
   int log2fftsize;
   aubio_FFTSetup fftSetup;
   aubio_DSPSplitComplex spec;
   smpl_t *in, *out;
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  // mark FFT impl as Intel IPP
+  #define INTEL_IPP_FFT 1
+  smpl_t *in, *out;
+  Ipp8u* memSpec;
+  Ipp8u* memInit;
+  Ipp8u* memBuffer;
+  #if HAVE_AUBIO_DOUBLE
+    struct FFTSpec_R_64f* fftSpec;
+    Ipp64fc* complexOut;
+  #else
+    struct FFTSpec_R_32f* fftSpec;
+    Ipp32fc* complexOut;
+  #endif
 #else                         // using OOURA
   smpl_t *in, *out;
   smpl_t *w;
   int *ip;
 #endif /* HAVE_ACCELERATE */
+#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
   fvec_t * compspec;
 };
 …
     goto beach;
+  }
 #ifdef HAVE_FFTW3
   uint_t i;
 …
     s->specdata[i] = 0.;
+  }
+#else
 #ifdef HAVE_ACCELERATE        // using ACCELERATE
+#elif defined HAVE_ACCELERATE  // using ACCELERATE
   s->winsize = winsize;
   s->fft_size = winsize;
   s->compspec = new_fvec(winsize);
   s->log2fftsize = (uint_t)log2f(s->fft_size);
+  s->log2fftsize = aubio_power_of_two_order(s->fft_size);
   s->in = AUBIO_ARRAY(smpl_t, s->fft_size);
   s->out = AUBIO_ARRAY(smpl_t, s->fft_size);
 …
   s->spec.imagp = AUBIO_ARRAY(smpl_t, s->fft_size/2);
   s->fftSetup = aubio_vDSP_create_fftsetup(s->log2fftsize, FFT_RADIX2);
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  const IppHintAlgorithm qualityHint = ippAlgHintAccurate; // OR ippAlgHintFast;
+  const int flags = IPP_FFT_NODIV_BY_ANY; // we're scaling manually afterwards
+  int order = aubio_power_of_two_order(winsize);
+  int sizeSpec, sizeInit, sizeBuffer;
+  IppStatus status;
+  if (winsize <= 4 || aubio_is_power_of_two(winsize) != 1)
+  {
+    AUBIO_ERR("intel IPP fft: can only create with sizes > 4 and power of two, requested %d,"
+      " try recompiling aubio with --enable-fftw3\n", winsize);
+    goto beach;
+  }
+#if HAVE_AUBIO_DOUBLE
+  status = ippsFFTGetSize_R_64f(order, flags, qualityHint,
+      &sizeSpec, &sizeInit, &sizeBuffer);
+#else
+  status = ippsFFTGetSize_R_32f(order, flags, qualityHint,
+    &sizeSpec, &sizeInit, &sizeBuffer);
+#endif
+  if (status != ippStsNoErr) {
+    AUBIO_ERR("fft: failed to initialize fft. IPP error: %d\n", status);
+    goto beach;
+  }
+  s->fft_size = s->winsize = winsize;
+  s->compspec = new_fvec(winsize);
+  s->in = AUBIO_ARRAY(smpl_t, s->winsize);
+  s->out = AUBIO_ARRAY(smpl_t, s->winsize);
+  s->memSpec = ippsMalloc_8u(sizeSpec);
+  s->memBuffer = ippsMalloc_8u(sizeBuffer);
+  if (sizeInit > 0 ) {
+    s->memInit = ippsMalloc_8u(sizeInit);
+  }
+#if HAVE_AUBIO_DOUBLE
+  s->complexOut = ippsMalloc_64fc(s->fft_size / 2 + 1);
+  status = ippsFFTInit_R_64f(
+    &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);
+#else
+  s->complexOut = ippsMalloc_32fc(s->fft_size / 2 + 1);
+  status = ippsFFTInit_R_32f(
+    &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);
+#endif
+  if (status != ippStsNoErr) {
+    AUBIO_ERR("fft: failed to initialize. IPP error: %d\n", status);
+    goto beach;
+  }
 #else                         // using OOURA
   if (aubio_is_power_of_two(winsize) != 1) {
 …
   s->w     = AUBIO_ARRAY(smpl_t, s->fft_size);
   s->ip[0] = 0;
 #endif /* HAVE_ACCELERATE */
+#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
   return s;
 beach:
   AUBIO_FREE(s);
 …
 void del_aubio_fft(aubio_fft_t * s) {
   /* destroy data */
-  del_fvec(s->compspec);
 #ifdef HAVE_FFTW3             // using FFTW3
   pthread_mutex_lock(&aubio_fftw_mutex);
 …
   fftw_free(s->specdata);
   pthread_mutex_unlock(&aubio_fftw_mutex);
+#else /* HAVE_FFTW3 */
 #ifdef HAVE_ACCELERATE        // using ACCELERATE
+#elif defined HAVE_ACCELERATE // using ACCELERATE
   AUBIO_FREE(s->spec.realp);
   AUBIO_FREE(s->spec.imagp);
   aubio_vDSP_destroy_fftsetup(s->fftSetup);
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  ippFree(s->memSpec);
+  ippFree(s->memInit);
+  ippFree(s->memBuffer);
+  ippFree(s->complexOut);
 #else                         // using OOURA
   AUBIO_FREE(s->w);
   AUBIO_FREE(s->ip);
+#endif /* HAVE_ACCELERATE */
+#endif /* HAVE_FFTW3 */
+#endif
+  del_fvec(s->compspec);
+  AUBIO_FREE(s->in);
   AUBIO_FREE(s->out);
-  AUBIO_FREE(s->in);
   AUBIO_FREE(s);
+}
 …
 void aubio_fft_do(aubio_fft_t * s, const fvec_t * input, cvec_t * spectrum) {
   aubio_fft_do_complex(s, input, s->compspec);
   aubio_fft_get_spectrum(s->compspec, spectrum);
+  aubio_fft_get_spectrum(s, s->compspec, spectrum);
+}
 void aubio_fft_rdo(aubio_fft_t * s, const cvec_t * spectrum, fvec_t * output) {
   aubio_fft_get_realimag(spectrum, s->compspec);
+  aubio_fft_get_realimag(s, spectrum, s->compspec);
   aubio_fft_rdo_complex(s, s->compspec, output);
+}
 …
   memcpy(s->in, input->data, s->winsize * sizeof(smpl_t));
 #endif /* HAVE_MEMCPY_HACKS */
 #ifdef HAVE_FFTW3             // using FFTW3
   fftw_execute(s->pfw);
 …
+  }
 #endif /* HAVE_COMPLEX_H */
+#else /* HAVE_FFTW3 */
 #ifdef HAVE_ACCELERATE        // using ACCELERATE
+#elif defined HAVE_ACCELERATE // using ACCELERATE
   // convert real data to even/odd format used in vDSP
   aubio_vDSP_ctoz((aubio_DSPComplex*)s->in, 2, &s->spec, 1, s->fft_size/2);
 …
   smpl_t scale = 1./2.;
   aubio_vDSP_vsmul(compspec->data, 1, &scale, compspec->data, 1, s->fft_size);
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  // apply fft
+#if HAVE_AUBIO_DOUBLE
+  ippsFFTFwd_RToCCS_64f(s->in, (Ipp64f*)s->complexOut, s->fftSpec, s->memBuffer);
+#else
+  ippsFFTFwd_RToCCS_32f(s->in, (Ipp32f*)s->complexOut, s->fftSpec, s->memBuffer);
+#endif
+  // convert complex buffer to [ r0, r1, ..., rN, iN-1, .., i2, i1]
+  compspec->data[0] = s->complexOut[0].re;
+  compspec->data[s->fft_size / 2] = s->complexOut[s->fft_size / 2].re;
+  for (i = 1; i < s->fft_size / 2; i++) {
+    compspec->data[i] = s->complexOut[i].re;
+    compspec->data[s->fft_size - i] = s->complexOut[i].im;
+  }
+  // apply scaling
+#if HAVE_AUBIO_DOUBLE
+  ippsMulC_64f(compspec->data, 1.0 / 2.0, compspec->data, s->fft_size);
+#else
+  ippsMulC_32f(compspec->data, 1.0 / 2.0, compspec->data, s->fft_size);
+#endif
 #else                         // using OOURA
   aubio_ooura_rdft(s->winsize, 1, s->in, s->ip, s->w);
 …
     compspec->data[s->winsize - i] = - s->in[2 * i + 1];
+  }
+#endif /* HAVE_ACCELERATE */
+#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
+}
 …
     output->data[i] = s->out[i]*renorm;
+  }
+#else /* HAVE_FFTW3 */
 #ifdef HAVE_ACCELERATE        // using ACCELERATE
+#elif defined HAVE_ACCELERATE // using ACCELERATE
   // convert from real imag  [ r0, r1, ..., rN, iN-1, .., i2, i1]
   // to vDSP packed format   [ r0, rN, r1, i1, ..., rN-1, iN-1 ]
 …
   smpl_t scale = 1.0 / s->winsize;
   aubio_vDSP_vsmul(output->data, 1, &scale, output->data, 1, s->fft_size);
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  // convert from real imag  [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format
+  s->complexOut[0].re = compspec->data[0];
+  s->complexOut[0].im = 0;
+  s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];
+  s->complexOut[s->fft_size / 2].im = 0.0;
+  for (i = 1; i < s->fft_size / 2; i++) {
+    s->complexOut[i].re = compspec->data[i];
+    s->complexOut[i].im = compspec->data[s->fft_size - i];
+  }
+#if HAVE_AUBIO_DOUBLE
+  // apply fft
+  ippsFFTInv_CCSToR_64f((const Ipp64f *)s->complexOut, output->data, s->fftSpec, s->memBuffer);
+  // apply scaling
+  ippsMulC_64f(output->data, 1.0 / s->winsize, output->data, s->fft_size);
+#else
+  // apply fft
+  ippsFFTInv_CCSToR_32f((const Ipp32f *)s->complexOut, output->data, s->fftSpec, s->memBuffer);
+  // apply scaling
+  ippsMulC_32f(output->data, 1.0f / s->winsize, output->data, s->fft_size);
+#endif /* HAVE_AUBIO_DOUBLE */
 #else                         // using OOURA
   smpl_t scale = 2.0 / s->winsize;
+  smpl_t scale = 1.0 / s->winsize;
   s->out[0] = compspec->data[0];
   s->out[1] = compspec->data[s->winsize / 2];
 …
     output->data[i] = s->out[i] * scale;
+  }
+#endif /* HAVE_ACCELERATE */
+#endif /* HAVE_FFTW3 */
+}
+void aubio_fft_get_spectrum(const fvec_t * compspec, cvec_t * spectrum) {
+  aubio_fft_get_phas(compspec, spectrum);
+  aubio_fft_get_norm(compspec, spectrum);
+}
+void aubio_fft_get_realimag(const cvec_t * spectrum, fvec_t * compspec) {
+  aubio_fft_get_imag(spectrum, compspec);
+  aubio_fft_get_real(spectrum, compspec);
+}
+void aubio_fft_get_phas(const fvec_t * compspec, cvec_t * spectrum) {
+#endif
+}
+void aubio_fft_get_spectrum(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
+  aubio_fft_get_phas(s, compspec, spectrum);
+  aubio_fft_get_norm(s, compspec, spectrum);
+}
+void aubio_fft_get_realimag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
+  aubio_fft_get_imag(s, spectrum, compspec);
+  aubio_fft_get_real(s, spectrum, compspec);
+}
+void aubio_fft_get_phas(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
+#ifdef INTEL_IPP_FFT // using Intel IPP FFT
+  uint_t i;
+  // convert from real imag  [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format
+  s->complexOut[0].re = compspec->data[0];
+  s->complexOut[0].im = 0;
+  s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];
+  s->complexOut[s->fft_size / 2].im = 0.0;
+  for (i = 1; i < spectrum->length - 1; i++) {
+    s->complexOut[i].re = compspec->data[i];
+    s->complexOut[i].im = compspec->data[compspec->length - i];
+  }
+#if HAVE_AUBIO_DOUBLE
+  IppStatus status = ippsPhase_64fc(s->complexOut, spectrum->phas, spectrum->length);
+#else
+  IppStatus status = ippsPhase_32fc(s->complexOut, spectrum->phas, spectrum->length);
+#endif
+  if (status != ippStsNoErr) {
+    AUBIO_ERR("fft: failed to extract phase from fft. IPP error: %d\n", status);
+  }
+#else                 // NOT using Intel IPP
   uint_t i;
   if (compspec->data[0] < 0) {
 …
     spectrum->phas[spectrum->length - 1] = 0.;
+  }
+}
+void aubio_fft_get_norm(const fvec_t * compspec, cvec_t * spectrum) {
+#endif
+}
+void aubio_fft_get_norm(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum) {
   uint_t i = 0;
   spectrum->norm[0] = ABS(compspec->data[0]);
 …
+}
 void aubio_fft_get_imag(const cvec_t * spectrum, fvec_t * compspec) {
+void aubio_fft_get_imag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
   uint_t i;
   for (i = 1; i < ( compspec->length + 1 ) / 2 /*- 1 + 1*/; i++) {
 …
+}
 void aubio_fft_get_real(const cvec_t * spectrum, fvec_t * compspec) {
+void aubio_fft_get_real(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec) {
   uint_t i;
   for (i = 0; i < compspec->length / 2 + 1; i++) {

src/spectral/fft.h

-                      r34ce715
+                      r986131d
 */
 void aubio_fft_get_spectrum(const fvec_t * compspec, cvec_t * spectrum);
+void aubio_fft_get_spectrum(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum);
 /** convert real/imag spectrum to norm/phas spectrum
 …
 */
 void aubio_fft_get_realimag(const cvec_t * spectrum, fvec_t * compspec);
+void aubio_fft_get_realimag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec);
 /** compute phas spectrum from real/imag parts
 …
 */
 void aubio_fft_get_phas(const fvec_t * compspec, cvec_t * spectrum);
+void aubio_fft_get_phas(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum);
 /** compute imaginary part from the norm/phas cvec
 …
 */
 void aubio_fft_get_imag(const cvec_t * spectrum, fvec_t * compspec);
+void aubio_fft_get_imag(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec);
 /** compute norm component from real/imag parts
 …
 */
 void aubio_fft_get_norm(const fvec_t * compspec, cvec_t * spectrum);
+void aubio_fft_get_norm(aubio_fft_t *s, const fvec_t * compspec, cvec_t * spectrum);
 /** compute real part from norm/phas components
 …
 */
 void aubio_fft_get_real(const cvec_t * spectrum, fvec_t * compspec);
+void aubio_fft_get_real(aubio_fft_t *s, const cvec_t * spectrum, fvec_t * compspec);
 #ifdef __cplusplus

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: