/* Copyright (C) 2018 Paul Brossier This file is part of aubio. aubio is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. aubio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with aubio. If not, see . */ #include "aubio_priv.h" #include "fmat.h" #include "tensor.h" #include "conv2d.h" typedef enum { PAD_SAME = 0, // same, aka half mode PAD_VALID = 1 // valid, aka no padding } aubio_conv2d_padding_t; struct _aubio_conv2d_t { // define internals here uint_t n_filters; uint_t kernel_shape[2]; // kernel sizes uint_t stride_shape[2]; // stride sizes aubio_conv2d_padding_t padding_mode; // these will be set after calling get_output_shape aubio_tensor_t *kernel; fvec_t *bias; uint_t output_shape[3]; // shape of output uint_t padding_start[2]; // {top, left} padding #if defined(HAVE_BLAS) aubio_tensor_t *padded_input; #endif }; #if defined(DEBUG) static void aubio_conv2d_debug(aubio_conv2d_t *c, aubio_tensor_t *input_tensor); #endif aubio_conv2d_t *new_aubio_conv2d(uint_t n_filters, uint_t kernel_shape[2]) { aubio_conv2d_t *c = AUBIO_NEW(aubio_conv2d_t); // validate input parameters AUBIO_GOTO_FAILURE((sint_t)n_filters >= 1); AUBIO_GOTO_FAILURE((sint_t)kernel_shape[0] >= 1); AUBIO_GOTO_FAILURE((sint_t)kernel_shape[1] >= 1); // set internal variables c->n_filters = n_filters; c->kernel_shape[0] = kernel_shape[0]; c->kernel_shape[1] = kernel_shape[1]; // default to padding_mode="valid" c->padding_mode = PAD_VALID; // set default stride_shape to {1, 1} { uint_t default_stride[2] = {1, 1}; aubio_conv2d_set_stride(c, default_stride); } return c; failure: del_aubio_conv2d(c); return NULL; } void del_aubio_conv2d(aubio_conv2d_t *c) { AUBIO_ASSERT(c); if (c->kernel) del_aubio_tensor(c->kernel); if (c->bias) del_fvec(c->bias); #if defined(HAVE_BLAS) if (c->padded_input) del_aubio_tensor(c->padded_input); #endif AUBIO_FREE(c); } uint_t aubio_conv2d_set_stride(aubio_conv2d_t *c, uint_t stride[2]) { if ((sint_t)stride[0] < 1) return AUBIO_FAIL; if ((sint_t)stride[1] < 1) return AUBIO_FAIL; c->stride_shape[0] = stride[0]; c->stride_shape[1] = stride[1]; return AUBIO_OK; } uint_t *aubio_conv2d_get_stride(aubio_conv2d_t *c) { return c->stride_shape; } uint_t aubio_conv2d_get_output_shape(aubio_conv2d_t *c, aubio_tensor_t *input_tensor, uint_t *shape) { uint_t output_shape[3] = {0, 0, c->n_filters}; uint_t padding_start[2] = {0, 0}; // total amount of padding uint_t padding_shape[2] = {0, 0}; // check input parameters AUBIO_ASSERT(input_tensor); AUBIO_ASSERT(shape); // reset output array shape[0] = 0; shape[1] = 0; shape[2] = 0; switch (c->padding_mode) { case PAD_SAME: // compute output shape output_shape[0] = (uint_t)CEIL(input_tensor->shape[0] / (smpl_t)c->stride_shape[0]); output_shape[1] = (uint_t)CEIL(input_tensor->shape[1] / (smpl_t)c->stride_shape[1]); padding_shape[0] = (output_shape[0] - 1) * c->stride_shape[0] + c->kernel_shape[0] - input_tensor->shape[0]; padding_shape[1] = (output_shape[1] - 1) * c->stride_shape[1] + c->kernel_shape[1] - input_tensor->shape[1]; padding_start[0] = FLOOR(padding_shape[0] / 2); padding_start[1] = FLOOR(padding_shape[1] / 2); break; case PAD_VALID: output_shape[0] = (input_tensor->shape[0] - c->kernel_shape[0] + 1) / c->stride_shape[0]; output_shape[1] = (input_tensor->shape[1] - c->kernel_shape[1] + 1) / c->stride_shape[1]; padding_start[0] = 0; padding_start[1] = 0; break; //case PAD_CAUSAL: // // TODO // return AUBIO_FAIL; default: return AUBIO_FAIL; } uint_t kernel_shape[4]; kernel_shape[0] = c->kernel_shape[0]; kernel_shape[1] = c->kernel_shape[1]; kernel_shape[2] = input_tensor->shape[2]; kernel_shape[3] = c->n_filters; if (c->kernel) del_aubio_tensor(c->kernel); if (c->bias) del_fvec(c->bias); c->kernel = new_aubio_tensor(4, kernel_shape); if (!c->kernel) return AUBIO_FAIL; c->bias = new_fvec(c->n_filters); // set internals upon success c->output_shape[0] = output_shape[0]; c->output_shape[1] = output_shape[1]; c->output_shape[2] = output_shape[2]; c->padding_start[0] = padding_start[0]; c->padding_start[1] = padding_start[1]; // set output shape[0] = output_shape[0]; shape[1] = output_shape[1]; shape[2] = output_shape[2]; #if defined(HAVE_BLAS) // im2col padding padding_shape[0] = output_shape[0] * output_shape[1]; padding_shape[1] = c->kernel_shape[0] * c->kernel_shape[1] * input_tensor->shape[2]; c->padded_input = new_aubio_tensor(2, padding_shape); if (!c-> padded_input) { AUBIO_MSG("conv2d: failed creating padded_input with shape (%d, %d, %d)\n", padding_shape); return AUBIO_FAIL; } #endif #if defined(DEBUG) aubio_conv2d_debug(c, input_tensor); #endif return AUBIO_OK; } #if defined(DEBUG) void aubio_conv2d_debug(aubio_conv2d_t *c, aubio_tensor_t *input_tensor) { // print some info AUBIO_ASSERT(c); uint_t n_params = (c->kernel->shape[0] * c->kernel->shape[2] + 1) * c->kernel->shape[1] * c->kernel->shape[3]; const char_t *tensor_str = aubio_tensor_get_shape_string(input_tensor); //AUBIO_DBG("conv2d: kernel_shape_str %s\n", kernel_shape_str); AUBIO_DBG("conv2d: %15s -> (%d, %d, %d)", tensor_str, c->output_shape[0], c->output_shape[1], c->output_shape[2]); tensor_str = aubio_tensor_get_shape_string(c->kernel); AUBIO_DBG(" (n_params=%d, kernel_shape=(%d, %d)," " weigths=%s, stride (%d, %d), pad_start [%d, %d])\n", n_params, c->kernel_shape[0], c->kernel_shape[1], tensor_str, c->stride_shape[0], c->stride_shape[1], -c->padding_start[0], -c->padding_start[1]); } #endif uint_t aubio_conv2d_check_output_shape(aubio_conv2d_t *c, aubio_tensor_t *input_tensor, aubio_tensor_t *activations) { // fetch output_shape if it hasn't been done before if (c->output_shape[0] == 0 || c->output_shape[1] == 0 || c->output_shape[2] == 0) { if (!aubio_conv2d_get_output_shape(c, input_tensor, c->output_shape)) { return AUBIO_FAIL; } } // check we have as many filters as expected activation outputs if (activations->shape[2] != c->n_filters) return AUBIO_FAIL; if (activations->shape[2] != c->kernel->shape[3]) return AUBIO_FAIL; if (input_tensor->shape[2] != c->kernel->shape[2]) return AUBIO_FAIL; // check tensor activations has the expected sizes if (c->output_shape[0] != activations->shape[0]) return AUBIO_FAIL; if (c->output_shape[1] != activations->shape[1]) return AUBIO_FAIL; if (c->output_shape[2] != activations->shape[2]) return AUBIO_FAIL; return AUBIO_OK; } #if !defined(HAVE_BLAS) void aubio_conv2d_do(aubio_conv2d_t *c, aubio_tensor_t *input_tensor, aubio_tensor_t *activations) { uint_t i, j, k, l, a, b; uint_t stride_a, stride_b; sint_t x, y; smpl_t s, w, bias, acc; uint_t jj, ll, bb, yy; uint_t k_stride1 = c->kernel->shape[3]; uint_t k_stride2 = c->kernel->shape[2] * k_stride1; AUBIO_ASSERT(c && input_tensor && activations); // check we have the correct output activation sizes if (aubio_conv2d_check_output_shape(c, input_tensor, activations)) { AUBIO_ERR("conv2d: check_output_shape failed\n"); return; } // for each kernel filter k for (i = 0; i < activations->shape[2]; i++) { // get bias bias = c->bias->data[i]; stride_b = 0; // == j * c->stride_shape[1] jj = 0; // == j * activations->shape[2] // for each output y for (j = 0; j < activations->shape[1]; j++) { // for each output x stride_a = 0; // k * c->stride_shape[0] for (k = 0; k < activations->shape[0]; k++) { // reset output acc = 0; // compute convolution for one kernel for (a = 0; a < c->kernel_shape[0]; a++) { x = stride_a + a - c->padding_start[0]; if ((x < 0) || (x > (sint_t)input_tensor->shape[0] - 1)) continue; // padding with 0. bb = 0; // == b * k_stride2 for (b = 0; b < c->kernel_shape[1]; b++) { y = stride_b + b - c->padding_start[1]; if ((y < 0) || (y > (sint_t)input_tensor->shape[1] - 1)) continue; // padding with 0. yy = y * input_tensor->shape[2]; ll = bb + i; // + l * k_stride1 // for each input channel for (l = 0; l < input_tensor->shape[2]; l++) { // get kernel weight w = c->kernel->data[a][ll]; // get input sample s = input_tensor->data[x][yy + l]; acc += w * s; ll += k_stride1; } bb += k_stride2; } } stride_a += c->stride_shape[0]; // apply bias acc += bias; // set output activation activations->data[k][jj + i] = acc; } stride_b += c->stride_shape[1]; jj += activations->shape[2]; } } } #else /* HAVE_BLAS */ void aubio_conv2d_copy_to_padded(aubio_conv2d_t *o, aubio_tensor_t *input_tensor, aubio_tensor_t *padded_input) { // naive implementation of im2col uint_t i, j, k, l, m; uint_t stride_4 = o->kernel->shape[2]; uint_t stride_3 = o->kernel->shape[1] * stride_4; uint_t stride_2 = o->kernel->shape[0] * stride_3; uint_t stride_1 = o->output_shape[1] * stride_2; uint_t stride_in_2 = input_tensor->shape[2]; uint_t stride_in_1 = input_tensor->shape[1] * stride_in_2; AUBIO_ASSERT(padded_input->size == o->output_shape[0] * o->output_shape[1] * o->kernel_shape[0] * o->kernel_shape[1] * input_tensor->shape[2]); AUBIO_ASSERT(input_tensor->shape[2] == o->kernel->shape[2]); for (i = 0; i < o->output_shape[0]; i++) { for (j = 0; j < o->output_shape[1]; j++) { for (k = 0; k < o->kernel->shape[0]; k++) { for (l = 0; l < o->kernel->shape[1]; l++) { for (m = 0; m < o->kernel->shape[2]; m++) { uint_t read_i = i * o->stride_shape[0] + k; uint_t read_j = j * o->stride_shape[1] + l; if (read_i < o->padding_start[0]) continue; else if (read_i - o->padding_start[0] >= input_tensor->shape[0]) continue; if (read_j < o->padding_start[1]) continue; else if (read_j - o->padding_start[1] >= input_tensor->shape[1]) continue; sint_t idx = ((read_i - o->padding_start[0])) * stride_in_1 + ((read_j - o->padding_start[1])) * stride_in_2 + m; padded_input->buffer[i * stride_1 + j * stride_2 + k * stride_3 + l * stride_4 + m] = input_tensor->buffer[idx]; } } } } } } void aubio_conv2d_do(aubio_conv2d_t *o, aubio_tensor_t *input_tensor, aubio_tensor_t *activations) { uint_t i, j; smpl_t bias; aubio_tensor_t *padded_input = o->padded_input; aubio_tensor_t *kernel = o->kernel; AUBIO_ASSERT(o && input_tensor && activations); // check we have the correct output activation sizes if (aubio_conv2d_check_output_shape(o, input_tensor, activations)) { AUBIO_ERR("conv2d: check_output_shape failed\n"); return; } uint_t M = padded_input->shape[0]; uint_t K = padded_input->size/padded_input->shape[0]; uint_t N = kernel->size / K; // check sizes AUBIO_ASSERT(M * K == padded_input->size); AUBIO_ASSERT(N * K == kernel->size); AUBIO_ASSERT(M * N == activations->size); // copy input to im2col sliding window version aubio_conv2d_copy_to_padded(o, input_tensor, padded_input); aubio_cblas__gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, // M N, // N K, // K 1.F, // alpha padded_input->buffer, // M x K matrix K, // K (2nd dim of A) kernel->buffer, // K x N matrix N, // N 0.F, // beta activations->buffer, // M x N matrix N); // N (2nd dim of C) // apply bias for (i = 0; i < activations->shape[2]; i++) { bias = o->bias->data[i]; for (j = 0; j < activations->shape[0] * activations->shape[1]; j++) { activations->buffer[j * activations->shape[2] + i] += bias; } } } #endif void aubio_conv2d_do_backwards(aubio_conv2d_t *c, /*aubio_tensor_t *old_gradients,*/ aubio_tensor_t *gradients) { uint_t i, j, k, a, b; AUBIO_ASSERT(c && gradients); // TODO // for each kernel filter k for (i = 0; i < c->n_filters; i++) { // for each input column for (j = 0; j < gradients->shape[1]; j++) { // for each input row for (k = 0; k < gradients->shape[2]; k++) { for (a = 0; a < c->kernel_shape[0]; a++) { for (b = 0; b < c->kernel_shape[1]; b++) { #if 0 smpl_t grad = gradients->data[i]->data[a][b]; smpl_t oldgrad = old_gradients->data[i]->data[a][b]; smpl_t m = (grad - oldgrad * momentum); w -= lr * m - lr * decay * w; #endif } } } } } } uint_t aubio_conv2d_set_padding_mode(aubio_conv2d_t *c, const char_t *padding_mode) { AUBIO_ASSERT(c && padding_mode); if (strncasecmp(padding_mode, "same", PATH_MAX) == 0) { c->padding_mode = PAD_SAME; } else if (strncasecmp(padding_mode, "valid", PATH_MAX) == 0) { c->padding_mode = PAD_VALID; } else { return AUBIO_FAIL; } return AUBIO_OK; } uint_t aubio_conv2d_set_kernel(aubio_conv2d_t *c, aubio_tensor_t *kernel) { AUBIO_ASSERT(c && kernel); if (aubio_tensor_have_same_shape(kernel, c->kernel)) { aubio_tensor_copy(kernel, c->kernel); return AUBIO_OK; } return AUBIO_FAIL; } aubio_tensor_t *aubio_conv2d_get_kernel(aubio_conv2d_t* c) { AUBIO_ASSERT(c && c->kernel); return c->kernel; } uint_t aubio_conv2d_set_bias(aubio_conv2d_t *c, fvec_t *bias) { AUBIO_ASSERT(c && bias); if (bias->length == c->bias->length) { fvec_copy(bias, c->bias); return AUBIO_OK; } return AUBIO_OK; } fvec_t *aubio_conv2d_get_bias(aubio_conv2d_t* c) { AUBIO_ASSERT(c && c->bias); return c->bias; }