Source code for layers.convolutional_layer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from __future__ import print_function

from NumPyNet.activations import Activations
from NumPyNet.utils import _check_activation
from NumPyNet.utils import check_is_fitted
from NumPyNet.layers.base import BaseLayer
from NumPyNet.exception import LayerError

import numpy as np

__author__ = ['Mattia Ceccarelli', 'Nico Curti']
__email__ = ['mattia.ceccarelli3@studio.unibo.it', 'nico.curti2@unibo.it']


[docs]class Convolutional_layer(BaseLayer):

  '''
  Convolutional Layer

  Parameters
  ----------
    filters : int
      Number of filters to be slided over the input, and also the number of channels of the output (channels_out)

    size : tuple
      Size of the kernel of shape (kx, ky).

    stride : tuple (default=None)
      Step of the kernel, with shape (st1, st2). If None, stride is assigned size values.

    input_shape : tuple (default=None)
      Shape of the input in the format (batch, w, h, c), None is used when the layer is part of a Network model.

    weights : array-like (default=None)
      Filters of the convolutionanl layer, with shape (kx, ky, channels_in, filters).
      If None, random weights are initialized

    bias : array-like (default=None)
      Bias of the convolutional layer. If None, bias init is random with shape (filters, )

    pad : bool (default=False)
      If False the image is cutted along the last raws and columns,
      if True the input is padded following keras SAME padding

    activation : str or Activation object
      Activation function of the layer.

  Example
  -------
  >>>  import os
  >>>  from PIL import Image
  >>>  import pylab as plt
  >>>  from NumPyNet import activations
  >>>
  >>>  img_2_float = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 1.)).astype(float)
  >>>  float_2_img = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 255.)).astype(np.uint8)
  >>>
  >>>  filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg')
  >>>  inpt = np.asarray(Image.open(filename), dtype=float)
  >>>  inpt.setflags(write=1)
  >>>  inpt = img_2_float(inpt)
  >>>  # Relu activation constrain
  >>>  inpt = inpt * 2 - 1
  >>>
  >>>  inpt = np.expand_dims(inpt, axis=0) # shape from (w, h, c) to (1, w, h, c)
  >>>
  >>>  channels_out = 10
  >>>  size         = (3, 3)
  >>>  stride       = (1, 1)
  >>>  pad          = False
  >>>
  >>>  layer_activation = activations.Relu()
  >>>
  >>>  np.random.seed(123)
  >>>
  >>>  b, w, h, c = inpt.shape
  >>>  filters    = np.random.uniform(-1., 1., size = (size[0], size[1], c, channels_out))
  >>>  # bias       = np.random.uniform(-1., 1., size = (channels_out,))
  >>>  bias = np.zeros(shape=(channels_out,))
  >>>
  >>>  layer = Convolutional_layer(input_shape=inpt.shape,
  >>>                              filters=channels_out,
  >>>                              weights=filters,
  >>>                              bias=bias,
  >>>                              activation=layer_activation,
  >>>                              size=size,
  >>>                              stride=stride,
  >>>                              pad=pad)
  >>>
  >>>  # FORWARD
  >>>
  >>>  layer.forward(inpt)
  >>>  forward_out = layer.output.copy()
  >>>
  >>>  # after the forward to load all the attribute
  >>>  print(layer)
  >>>
  >>>  # BACKWARD
  >>>
  >>>  layer.delta = np.ones(layer.out_shape, dtype=float)
  >>>  delta = np.zeros(shape=inpt.shape, dtype=float)
  >>>  layer.backward(delta)
  >>>
  >>>  # layer.update()
  >>>
  >>>  # Visualization
  >>>
  >>>  fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5))
  >>>  fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15)
  >>>
  >>>  fig.suptitle('Convolutional Layer')
  >>>
  >>>  ax1.imshow(float_2_img(inpt[0]))
  >>>  ax1.set_title('Original image')
  >>>  ax1.axis('off')
  >>>  # here every filter effect on the image can be shown
  >>>  ax2.imshow(float_2_img(forward_out[0, :, :, 1]))
  >>>  ax2.set_title('Forward')
  >>>  ax2.axis('off')
  >>>
  >>>  ax3.imshow(float_2_img(delta[0]))
  >>>  ax3.set_title('Backward')
  >>>  ax3.axis('off')
  >>>
  >>>  fig.tight_layout()
  >>>  plt.show()

  References
  ----------
    - https://arxiv.org/abs/1603.07285
    - https://cs231n.github.io/convolutional-networks/
    - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy
    - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html
    - https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave
  '''

  def __init__(self, filters, size, stride=None, input_shape=None,
               weights=None, bias=None,
               pad=False,
               activation=Activations,
               **kwargs):

    if isinstance(filters, int) and filters > 0:
      self.channels_out = filters
    else:
      raise ValueError('Parameter "filters" must be an integer and > 0')

    self.size = size
    if not hasattr(self.size, '__iter__'):
      self.size = (int(self.size), int(self.size))

    if self.size[0] <= 0. or self.size[1] <= 0.:
      raise LayerError('Convolutional layer. Incompatible size values. They must be both > 0')

    if not stride:
      self.stride = size
    else:
      self.stride = stride

    if not hasattr(self.stride, '__iter__'):
      self.stride = (int(self.stride), int(self.stride))

    if self.stride[0] <= 0. or self.stride[1] <= 0.:
      raise LayerError('Convolutional layer. Incompatible stride values. They must be both > 0')

    if len(self.size) != 2 or len(self.stride) != 2:
      raise LayerError('Convolutional layer. Incompatible stride/size dimensions. They must be a 1D-2D tuple of values')

    # Weights and bias
    self.weights = weights
    self.bias    = bias

    # Activation function
    activation = _check_activation(self, activation)

    self.activation = activation.activate
    self.gradient   = activation.gradient

    # Padding
    self.pad = pad
    self.pad_left, self.pad_right, self.pad_bottom, self.pad_top = (0, 0, 0, 0)

    # Output, Delta and Updates
    self.weights_update = None
    self.bias_update    = None
    self.optimizer      = None

    if input_shape is not None:
      super(Convolutional_layer, self).__init__(input_shape=input_shape)
      self._build()

  def _build(self):
    '''
    Init layer weights and biases and set the correct
    layer out_shapes.

    Returns
    -------
      self
    '''

    _, w, h, c = self.input_shape

    if self.weights is None:
      scale = np.sqrt(2. / (self.size[0] * self.size[1] * c))
      self.weights = np.random.normal(loc=scale, scale=1., size=(self.size[0], self.size[1], c, self.channels_out))

    if self.bias is None:
      self.bias = np.zeros(shape=(self.channels_out, ), dtype=float)

    if self.pad:
      self._evaluate_padding()

    self.out_w = 1 + (w + self.pad_top + self.pad_bottom - self.size[0]) // self.stride[0]
    self.out_h = 1 + (h + self.pad_left + self.pad_right - self.size[1]) // self.stride[1]

    return self

  def __str__(self):
    '''
    Printer
    '''
    batch, out_w, out_h, out_c = self.out_shape
    _, w, h, c = self.input_shape
    return 'conv   {0:>4d} {1:d} x {2:d} / {3:d}  {4:>4d} x{5:>4d} x{6:>4d} x{7:>4d}   ->  {4:>4d} x{8:>4d} x{9:>4d} x{10:>4d}  {11:>5.3f} BFLOPs'.format(
           out_c, self.size[0], self.size[1], self.stride[0],
           batch, w, h, c,
           out_w, out_h, out_c,
           (2. * self.weights.size * out_h * out_w) * 1e-9)

  def __call__(self, previous_layer):

    super(Convolutional_layer, self).__call__(previous_layer)
    self._build()

    return self

  @property
  def out_shape(self):
    '''
    Get the output shape as (batch, out_w, out_h, out_channels)
    '''
    return (self.input_shape[0], self.out_w, self.out_h, self.channels_out)

[docs]  def load_weights(self, chunck_weights, pos=0):
    '''
    Load weights from full array of model weights

    Parameters
    ----------
      chunck_weights : array-like
        model weights and bias

      pos : int (default=0)
        Current position of the array

    Returns
    ----------
      pos : int
        Updated stream position.
    '''

    c = self.input_shape[-1]
    self.bias = chunck_weights[pos : pos + self.channels_out]
    pos += self.channels_out

    self.weights = chunck_weights[pos : pos + self.weights.size]
    self.weights = self.weights.reshape(self.size[0], self.size[1], c, self.channels_out)
    pos += self.weights.size

    return pos

[docs]  def save_weights(self):
    '''
    Return the biases and weights in a single ravel fmt to save in binary file
    '''
    return np.concatenate([self.bias.ravel(), self.weights.ravel()], axis=0).tolist()

  def _asStride(self, arr, back=False):
    '''
    _asStride returns a view of the input array such that a kernel of size = (kx,ky)
    is slided over the image with stride = (st1, st2)

    Parameters
    ----------
      arr : array-like
        input batch of images to be convoluted with shape = (b, w, h, c)

      back : bool (default=False)
        Define whether the function is called from forward or backward functions.

    Returns
    -------
      subs : array-view
        View of the input array with shape (batch, out_w, out_h, kx, ky, out_c)

    References
    ----------
      - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html
      - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy
    '''

    B, s0, s1, c1 = arr.strides
    b, m1, n1, c = arr.shape

    m2, n2 = self.size
    st1, st2 = self.stride

    if back:
      st1 = st2 = 1

    self.out_w = 1 + (m1 - m2) // st1
    self.out_h = 1 + (n1 - n2) // st2

    # Shape of the final view
    view_shape = (b, self.out_w, self.out_h, m2, n2, c)

    # strides of the final view
    strides = (B, st1 * s0, st2 * s1, s0, s1, c1)

    subs = np.lib.stride_tricks.as_strided(arr, view_shape, strides=strides)

    return subs

  def _dilate_pad(self, arr):
    '''
    Dilate input array for backward pass

    reference:
    https://mc.ai/backpropagation-for-convolution-with-strides/

    Parameters
    ----------
      arr : array-like
       input array to be dilated and padded with shape (b, out_w, out_h, out_c)

    Returns
    -------
      dilated : array-like
        The dilated array
    '''

    b, ow, oh, oc = self.out_shape
    b, w, h, c = self.input_shape

    kx, ky = self.size
    sx, sy = self.stride
    dx, dy = sx - 1, sy - 1

    final_shape_dilation = (b, ow * sx - dx, oh * sy - dy, oc)

    dilated = np.zeros(shape=final_shape_dilation)

    dilated[:, ::sx, ::sy, :] = arr

    input_pad_w = (self.pad_top + self.pad_bottom)
    input_pad_h = (self.pad_left + self.pad_right)

    pad_width = (w - kx + input_pad_w) % sx
    pad_height = (h - ky + input_pad_h) % sy

    pad_H_l = ky - self.pad_left - 1
    pad_H_r = ky - self.pad_right - 1 + pad_height

    pad_W_t = kx - self.pad_top - 1
    pad_W_b = kx - self.pad_bottom - 1 + pad_width

    dilated = np.pad(dilated,
                     pad_width=((0, 0),
                                (pad_W_t, pad_W_b),
                                (pad_H_l, pad_H_r),
                                (0, 0)),
                     mode='constant',
                     constant_values=(0., 0.))

    return dilated

  def _evaluate_padding(self):
    '''
    Compute padding dimensions following keras SAME padding.
    See also:
    https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave
    '''
    _, w, h, _ = self.input_shape
    # Compute how many Raws are needed to pad the image in the 'w' axis
    if (w % self.stride[0] == 0):
      pad_w = max(self.size[0] - self.stride[0], 0)
    else:
      pad_w = max(self.size[0] - (w % self.stride[0]), 0)

    # Compute how many Columns are needed to pad the image in 'h' axis
    if (h % self.stride[1] == 0):
      pad_h = max(self.size[1] - self.stride[1], 0)
    else:
      pad_h = max(self.size[1] - (h % self.stride[1]), 0)

    # Number of raws/columns to be added for every directons
    self.pad_top = pad_w >> 1  # bit shift, integer division by two
    self.pad_bottom = pad_w - self.pad_top
    self.pad_left = pad_h >> 1
    self.pad_right = pad_h - self.pad_left

  def _pad(self, inpt):
    '''
    Pad every image in a batch with zeros, following keras SAME padding.

    Parameters
    ----------
      inpt : array-like
        input images to pad in the format (batch, in_w, in_h, in_c).

    Returns
    -------
      padded : array-like
        Padded input array, following keras SAME padding format.
    '''

    # return the zeros-padded image, in the same format as inpt (batch, in_w + pad_w, in_h + pad_h, in_c)
    return np.pad(inpt, pad_width=((0, 0), (self.pad_top, self.pad_bottom), (self.pad_left, self.pad_right), (0, 0)),
                  mode='constant', constant_values=(0., 0.))

[docs]  def forward(self, inpt, copy=False):
    '''
    Forward function of the Convolutional Layer: it convolves an image with 'channels_out'
    filters with dimension (kx, ky, channels_in). In doing so, it creates a view of the image
    with shape (batch, out_w, out_h, in_c, kx, ky) in order to perform a single matrix
    multiplication with the reshaped filters array, which shape is (in_c * kx * ky, out_c).

    Parameters
    ----------
      inpt : array-like
        Input batch of images in format (batch, in_w, in_h, in _c)

      copy : bool (default=False)
        If False the activation function modifies its input, if True make a copy instead

    Returns
    -------
      self
    '''

    self._check_dims(shape=self.input_shape, arr=inpt, func='Forward')

    kx, ky = self.size
    sx, sy = self.stride
    _, w, h, _ = self.input_shape
    inpt = inpt.astype('float64')

    # Padding
    if self.pad:
      mat_pad = self._pad(inpt)
    else:
      # If no pad, every image in the batch is cut
      mat_pad = inpt[:, : (w - kx) // sx * sx + kx, : (h - ky) // sy * sy + ky, ...]

    # Create the view of the array with shape (batch, out_w ,out_h, kx, ky, in_c)
    self.view = self._asStride(mat_pad)

    # the choice of numpy.einsum is due to reshape of self.view being a copy
    z = np.einsum('lmnijk, ijko -> lmno', self.view, self.weights, optimize=True) + self.bias

    # (batch, out_w, out_h, out_c)
    self.output = self.activation(z, copy=copy)

    self.delta = np.zeros(shape=self.out_shape, dtype=float)

    return self

[docs]  def backward(self, delta, copy=False):
    '''
    Backward function of the Convolutional layer.
    Source: https://arxiv.org/abs/1603.07285

    Parameters
    ----------
      delta : array-like
        delta array of shape (batch, w, h, c). Global delta to be backpropagated.

      copy : bool (default=False)
        States if the activation function have to return a copy of the input or not.

    Returns
    -------
      self
    '''

    check_is_fitted(self, 'delta')
    self._check_dims(shape=self.input_shape, arr=delta, func='Backward')
    delta[:] = delta.astype('float64')

    self.delta *= self.gradient(self.output, copy=copy)

    self.weights_update = np.einsum('ijklmn, ijko -> lmno', self.view, self.delta, optimize=True)
    self.bias_update = self.delta.sum(axis=(0, 1, 2))  # shape = (channels_out)

    # Rotated weights, as theory suggest
    w_rot = np.rot90(self.weights, 2, axes=(0, 1))

    # Pad and dilate the delta array, then stride it and convolve
    self.delta = self._dilate_pad(self.delta)
    delta_view = self._asStride(self.delta, back=True)

    delta[:] = np.einsum('ijklmn, lmon -> ijko', delta_view, w_rot, optimize=True)

    return self

[docs]  def update(self):
    '''
    Update function for the convolution layer.
    Optimizer must be assigned externally as an optimizer object.

    Returns
    -------
      self
    '''
    check_is_fitted(self, 'delta')

    self.bias, self.weights = self.optimizer.update(params=[self.bias, self.weights],
                                                    gradients=[self.bias_update, self.weights_update]
                                                    )

    return self


if __name__ == '__main__':

  import os

  from PIL import Image
  import pylab as plt

  from NumPyNet import activations

  img_2_float = lambda im: ((im - im.min()) * (1./(im.max() - im.min()) * 1.)).astype(float)
  float_2_img = lambda im: ((im - im.min()) * (1./(im.max() - im.min()) * 255.)).astype(np.uint8)

  filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg')
  inpt = np.asarray(Image.open(filename), dtype=float)
  inpt.setflags(write=1)
  inpt = img_2_float(inpt)
  # Relu activation constrain
  inpt = inpt * 2 - 1

  inpt = np.expand_dims(inpt, axis=0)  # shape from (w, h, c) to (1, w, h, c)

  channels_out = 10
  size = (3, 3)
  stride = (1, 1)
  pad = False

  layer_activation = activations.Relu()

  np.random.seed(123)

  b, w, h, c = inpt.shape
  filters = np.random.uniform(-1., 1., size=(size[0], size[1], c, channels_out))
  # bias       = np.random.uniform(-1., 1., size = (channels_out,))
  bias = np.zeros(shape=(channels_out,))

  layer = Convolutional_layer(input_shape=inpt.shape,
                              filters=channels_out,
                              weights=filters,
                              bias=bias,
                              activation=layer_activation,
                              size=size,
                              stride=stride,
                              pad=pad)

  # FORWARD

  layer.forward(inpt)
  forward_out = layer.output.copy()

  # after the forward to load all the attribute
  print(layer)

  # BACKWARD

  layer.delta = np.ones(layer.out_shape, dtype=float)
  delta = np.zeros(shape=inpt.shape, dtype=float)
  layer.backward(delta)

  # layer.update()

  # Visualization

  fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5))
  fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15)

  fig.suptitle(('Convolutional Layer\n activation: {}, ' +
                'size: {}, stride: {}, ' +
                'output channels: {}').format(layer_activation.name, size, stride, channels_out))

  ax1.imshow(float_2_img(inpt[0]))
  ax1.set_title('Original image')
  ax1.axis('off')
  # here every filter effect on the image can be shown
  ax2.imshow(float_2_img(forward_out[0, :, :, 1]))
  ax2.set_title('Forward')
  ax2.axis('off')

  ax3.imshow(float_2_img(delta[0]))
  ax3.set_title('Backward')
  ax3.axis('off')

  fig.tight_layout()
  plt.show()