Changeset c97f7ed


Ignore:
Timestamp:
Dec 29, 2021, 5:51:48 PM (2 years ago)
Author:
Paul Brossier <piem@piem.org>
Branches:
feature/cnn, feature/crepe
Children:
d91fa83
Parents:
337e70d
git-author:
Paul Brossier <piem@piem.org> (01/08/19 15:45:59)
git-committer:
Paul Brossier <piem@piem.org> (12/29/21 17:51:48)
Message:

[conv1d] add second blas optimisation using gemv

File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/ai/conv1d.c

    r337e70d rc97f7ed  
    289289
    290290  uint_t sdot_size = c->kernel->shape[0] * c->kernel->shape[1];
    291   uint_t input_stride = c->stride_shape * c->kernel->shape[1];
     291  uint_t input_stride = c->stride_shape * c->padded_input->shape[1]
    292292
    293293  AUBIO_ASSERT(c && input_tensor && activations);
     
    308308  // for each output
    309309  for (j = 0; j < activations->shape[0]; j++) {
     310    // for each row of activation output
     311    aubio_cblas__gemv(CblasRowMajor, CblasTrans,
     312        sdot_size, c->kernel->shape[2], 1.,
     313        c->kernel->buffer, c->kernel->shape[2],
     314        c->padded_input->buffer + j  * input_stride, 1, 0.,
     315        activations->buffer + j * activations->shape[1], 1);
     316  }
     317  for (j = 0; j < activations->shape[0]; j++) {
    310318    // for each kernel filter k
    311319    for (i = 0; i < activations->shape[1]; i++) {
    312       // get bias
    313       bias = c->bias->data[i];
    314 
    315       // compute one activation output
    316       acc = aubio_cblas_dot(sdot_size, c->kernel->buffer + i,
    317           c->kernel->shape[2], c->padded_input->buffer + j * input_stride, 1);
    318 
    319       // apply bias
    320       acc += bias;
    321 
    322       // compute RELU
    323       activations->data[j][i] = MAX(acc, 0.);
     320      activations->data[j][i] += c->bias->data[i];
     321      activations->data[j][i] = MAX(activations->data[j][i], 0);
    324322    }
    325323  }
Note: See TracChangeset for help on using the changeset viewer.