Source code for layers.convolutional_layer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from __future__ import print_function

from NumPyNet.activations import Activations
from NumPyNet.utils import _check_activation
from NumPyNet.utils import check_is_fitted
from NumPyNet.layers.base import BaseLayer
from NumPyNet.exception import LayerError

import numpy as np

__author__ = ['Mattia Ceccarelli', 'Nico Curti']
__email__ = ['mattia.ceccarelli3@studio.unibo.it', 'nico.curti2@unibo.it']


[docs]class Convolutional_layer(BaseLayer): ''' Convolutional Layer Parameters ---------- filters : int Number of filters to be slided over the input, and also the number of channels of the output (channels_out) size : tuple Size of the kernel of shape (kx, ky). stride : tuple (default=None) Step of the kernel, with shape (st1, st2). If None, stride is assigned size values. input_shape : tuple (default=None) Shape of the input in the format (batch, w, h, c), None is used when the layer is part of a Network model. weights : array-like (default=None) Filters of the convolutionanl layer, with shape (kx, ky, channels_in, filters). If None, random weights are initialized bias : array-like (default=None) Bias of the convolutional layer. If None, bias init is random with shape (filters, ) pad : bool (default=False) If False the image is cutted along the last raws and columns, if True the input is padded following keras SAME padding activation : str or Activation object Activation function of the layer. Example ------- >>> import os >>> from PIL import Image >>> import pylab as plt >>> from NumPyNet import activations >>> >>> img_2_float = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 1.)).astype(float) >>> float_2_img = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 255.)).astype(np.uint8) >>> >>> filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg') >>> inpt = np.asarray(Image.open(filename), dtype=float) >>> inpt.setflags(write=1) >>> inpt = img_2_float(inpt) >>> # Relu activation constrain >>> inpt = inpt * 2 - 1 >>> >>> inpt = np.expand_dims(inpt, axis=0) # shape from (w, h, c) to (1, w, h, c) >>> >>> channels_out = 10 >>> size = (3, 3) >>> stride = (1, 1) >>> pad = False >>> >>> layer_activation = activations.Relu() >>> >>> np.random.seed(123) >>> >>> b, w, h, c = inpt.shape >>> filters = np.random.uniform(-1., 1., size = (size[0], size[1], c, channels_out)) >>> # bias = np.random.uniform(-1., 1., size = (channels_out,)) >>> bias = np.zeros(shape=(channels_out,)) >>> >>> layer = Convolutional_layer(input_shape=inpt.shape, >>> filters=channels_out, >>> weights=filters, >>> bias=bias, >>> activation=layer_activation, >>> size=size, >>> stride=stride, >>> pad=pad) >>> >>> # FORWARD >>> >>> layer.forward(inpt) >>> forward_out = layer.output.copy() >>> >>> # after the forward to load all the attribute >>> print(layer) >>> >>> # BACKWARD >>> >>> layer.delta = np.ones(layer.out_shape, dtype=float) >>> delta = np.zeros(shape=inpt.shape, dtype=float) >>> layer.backward(delta) >>> >>> # layer.update() >>> >>> # Visualization >>> >>> fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5)) >>> fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15) >>> >>> fig.suptitle('Convolutional Layer') >>> >>> ax1.imshow(float_2_img(inpt[0])) >>> ax1.set_title('Original image') >>> ax1.axis('off') >>> # here every filter effect on the image can be shown >>> ax2.imshow(float_2_img(forward_out[0, :, :, 1])) >>> ax2.set_title('Forward') >>> ax2.axis('off') >>> >>> ax3.imshow(float_2_img(delta[0])) >>> ax3.set_title('Backward') >>> ax3.axis('off') >>> >>> fig.tight_layout() >>> plt.show() References ---------- - https://arxiv.org/abs/1603.07285 - https://cs231n.github.io/convolutional-networks/ - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html - https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave ''' def __init__(self, filters, size, stride=None, input_shape=None, weights=None, bias=None, pad=False, activation=Activations, **kwargs): if isinstance(filters, int) and filters > 0: self.channels_out = filters else: raise ValueError('Parameter "filters" must be an integer and > 0') self.size = size if not hasattr(self.size, '__iter__'): self.size = (int(self.size), int(self.size)) if self.size[0] <= 0. or self.size[1] <= 0.: raise LayerError('Convolutional layer. Incompatible size values. They must be both > 0') if not stride: self.stride = size else: self.stride = stride if not hasattr(self.stride, '__iter__'): self.stride = (int(self.stride), int(self.stride)) if self.stride[0] <= 0. or self.stride[1] <= 0.: raise LayerError('Convolutional layer. Incompatible stride values. They must be both > 0') if len(self.size) != 2 or len(self.stride) != 2: raise LayerError('Convolutional layer. Incompatible stride/size dimensions. They must be a 1D-2D tuple of values') # Weights and bias self.weights = weights self.bias = bias # Activation function activation = _check_activation(self, activation) self.activation = activation.activate self.gradient = activation.gradient # Padding self.pad = pad self.pad_left, self.pad_right, self.pad_bottom, self.pad_top = (0, 0, 0, 0) # Output, Delta and Updates self.weights_update = None self.bias_update = None self.optimizer = None if input_shape is not None: super(Convolutional_layer, self).__init__(input_shape=input_shape) self._build() def _build(self): ''' Init layer weights and biases and set the correct layer out_shapes. Returns ------- self ''' _, w, h, c = self.input_shape if self.weights is None: scale = np.sqrt(2. / (self.size[0] * self.size[1] * c)) self.weights = np.random.normal(loc=scale, scale=1., size=(self.size[0], self.size[1], c, self.channels_out)) if self.bias is None: self.bias = np.zeros(shape=(self.channels_out, ), dtype=float) if self.pad: self._evaluate_padding() self.out_w = 1 + (w + self.pad_top + self.pad_bottom - self.size[0]) // self.stride[0] self.out_h = 1 + (h + self.pad_left + self.pad_right - self.size[1]) // self.stride[1] return self def __str__(self): ''' Printer ''' batch, out_w, out_h, out_c = self.out_shape _, w, h, c = self.input_shape return 'conv {0:>4d} {1:d} x {2:d} / {3:d} {4:>4d} x{5:>4d} x{6:>4d} x{7:>4d} -> {4:>4d} x{8:>4d} x{9:>4d} x{10:>4d} {11:>5.3f} BFLOPs'.format( out_c, self.size[0], self.size[1], self.stride[0], batch, w, h, c, out_w, out_h, out_c, (2. * self.weights.size * out_h * out_w) * 1e-9) def __call__(self, previous_layer): super(Convolutional_layer, self).__call__(previous_layer) self._build() return self @property def out_shape(self): ''' Get the output shape as (batch, out_w, out_h, out_channels) ''' return (self.input_shape[0], self.out_w, self.out_h, self.channels_out)
[docs] def load_weights(self, chunck_weights, pos=0): ''' Load weights from full array of model weights Parameters ---------- chunck_weights : array-like model weights and bias pos : int (default=0) Current position of the array Returns ---------- pos : int Updated stream position. ''' c = self.input_shape[-1] self.bias = chunck_weights[pos : pos + self.channels_out] pos += self.channels_out self.weights = chunck_weights[pos : pos + self.weights.size] self.weights = self.weights.reshape(self.size[0], self.size[1], c, self.channels_out) pos += self.weights.size return pos
[docs] def save_weights(self): ''' Return the biases and weights in a single ravel fmt to save in binary file ''' return np.concatenate([self.bias.ravel(), self.weights.ravel()], axis=0).tolist()
def _asStride(self, arr, back=False): ''' _asStride returns a view of the input array such that a kernel of size = (kx,ky) is slided over the image with stride = (st1, st2) Parameters ---------- arr : array-like input batch of images to be convoluted with shape = (b, w, h, c) back : bool (default=False) Define whether the function is called from forward or backward functions. Returns ------- subs : array-view View of the input array with shape (batch, out_w, out_h, kx, ky, out_c) References ---------- - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy ''' B, s0, s1, c1 = arr.strides b, m1, n1, c = arr.shape m2, n2 = self.size st1, st2 = self.stride if back: st1 = st2 = 1 self.out_w = 1 + (m1 - m2) // st1 self.out_h = 1 + (n1 - n2) // st2 # Shape of the final view view_shape = (b, self.out_w, self.out_h, m2, n2, c) # strides of the final view strides = (B, st1 * s0, st2 * s1, s0, s1, c1) subs = np.lib.stride_tricks.as_strided(arr, view_shape, strides=strides) return subs def _dilate_pad(self, arr): ''' Dilate input array for backward pass reference: https://mc.ai/backpropagation-for-convolution-with-strides/ Parameters ---------- arr : array-like input array to be dilated and padded with shape (b, out_w, out_h, out_c) Returns ------- dilated : array-like The dilated array ''' b, ow, oh, oc = self.out_shape b, w, h, c = self.input_shape kx, ky = self.size sx, sy = self.stride dx, dy = sx - 1, sy - 1 final_shape_dilation = (b, ow * sx - dx, oh * sy - dy, oc) dilated = np.zeros(shape=final_shape_dilation) dilated[:, ::sx, ::sy, :] = arr input_pad_w = (self.pad_top + self.pad_bottom) input_pad_h = (self.pad_left + self.pad_right) pad_width = (w - kx + input_pad_w) % sx pad_height = (h - ky + input_pad_h) % sy pad_H_l = ky - self.pad_left - 1 pad_H_r = ky - self.pad_right - 1 + pad_height pad_W_t = kx - self.pad_top - 1 pad_W_b = kx - self.pad_bottom - 1 + pad_width dilated = np.pad(dilated, pad_width=((0, 0), (pad_W_t, pad_W_b), (pad_H_l, pad_H_r), (0, 0)), mode='constant', constant_values=(0., 0.)) return dilated def _evaluate_padding(self): ''' Compute padding dimensions following keras SAME padding. See also: https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave ''' _, w, h, _ = self.input_shape # Compute how many Raws are needed to pad the image in the 'w' axis if (w % self.stride[0] == 0): pad_w = max(self.size[0] - self.stride[0], 0) else: pad_w = max(self.size[0] - (w % self.stride[0]), 0) # Compute how many Columns are needed to pad the image in 'h' axis if (h % self.stride[1] == 0): pad_h = max(self.size[1] - self.stride[1], 0) else: pad_h = max(self.size[1] - (h % self.stride[1]), 0) # Number of raws/columns to be added for every directons self.pad_top = pad_w >> 1 # bit shift, integer division by two self.pad_bottom = pad_w - self.pad_top self.pad_left = pad_h >> 1 self.pad_right = pad_h - self.pad_left def _pad(self, inpt): ''' Pad every image in a batch with zeros, following keras SAME padding. Parameters ---------- inpt : array-like input images to pad in the format (batch, in_w, in_h, in_c). Returns ------- padded : array-like Padded input array, following keras SAME padding format. ''' # return the zeros-padded image, in the same format as inpt (batch, in_w + pad_w, in_h + pad_h, in_c) return np.pad(inpt, pad_width=((0, 0), (self.pad_top, self.pad_bottom), (self.pad_left, self.pad_right), (0, 0)), mode='constant', constant_values=(0., 0.))
[docs] def forward(self, inpt, copy=False): ''' Forward function of the Convolutional Layer: it convolves an image with 'channels_out' filters with dimension (kx, ky, channels_in). In doing so, it creates a view of the image with shape (batch, out_w, out_h, in_c, kx, ky) in order to perform a single matrix multiplication with the reshaped filters array, which shape is (in_c * kx * ky, out_c). Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c) copy : bool (default=False) If False the activation function modifies its input, if True make a copy instead Returns ------- self ''' self._check_dims(shape=self.input_shape, arr=inpt, func='Forward') kx, ky = self.size sx, sy = self.stride _, w, h, _ = self.input_shape inpt = inpt.astype('float64') # Padding if self.pad: mat_pad = self._pad(inpt) else: # If no pad, every image in the batch is cut mat_pad = inpt[:, : (w - kx) // sx * sx + kx, : (h - ky) // sy * sy + ky, ...] # Create the view of the array with shape (batch, out_w ,out_h, kx, ky, in_c) self.view = self._asStride(mat_pad) # the choice of numpy.einsum is due to reshape of self.view being a copy z = np.einsum('lmnijk, ijko -> lmno', self.view, self.weights, optimize=True) + self.bias # (batch, out_w, out_h, out_c) self.output = self.activation(z, copy=copy) self.delta = np.zeros(shape=self.out_shape, dtype=float) return self
[docs] def backward(self, delta, copy=False): ''' Backward function of the Convolutional layer. Source: https://arxiv.org/abs/1603.07285 Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. copy : bool (default=False) States if the activation function have to return a copy of the input or not. Returns ------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] = delta.astype('float64') self.delta *= self.gradient(self.output, copy=copy) self.weights_update = np.einsum('ijklmn, ijko -> lmno', self.view, self.delta, optimize=True) self.bias_update = self.delta.sum(axis=(0, 1, 2)) # shape = (channels_out) # Rotated weights, as theory suggest w_rot = np.rot90(self.weights, 2, axes=(0, 1)) # Pad and dilate the delta array, then stride it and convolve self.delta = self._dilate_pad(self.delta) delta_view = self._asStride(self.delta, back=True) delta[:] = np.einsum('ijklmn, lmon -> ijko', delta_view, w_rot, optimize=True) return self
[docs] def update(self): ''' Update function for the convolution layer. Optimizer must be assigned externally as an optimizer object. Returns ------- self ''' check_is_fitted(self, 'delta') self.bias, self.weights = self.optimizer.update(params=[self.bias, self.weights], gradients=[self.bias_update, self.weights_update] ) return self
if __name__ == '__main__': import os from PIL import Image import pylab as plt from NumPyNet import activations img_2_float = lambda im: ((im - im.min()) * (1./(im.max() - im.min()) * 1.)).astype(float) float_2_img = lambda im: ((im - im.min()) * (1./(im.max() - im.min()) * 255.)).astype(np.uint8) filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg') inpt = np.asarray(Image.open(filename), dtype=float) inpt.setflags(write=1) inpt = img_2_float(inpt) # Relu activation constrain inpt = inpt * 2 - 1 inpt = np.expand_dims(inpt, axis=0) # shape from (w, h, c) to (1, w, h, c) channels_out = 10 size = (3, 3) stride = (1, 1) pad = False layer_activation = activations.Relu() np.random.seed(123) b, w, h, c = inpt.shape filters = np.random.uniform(-1., 1., size=(size[0], size[1], c, channels_out)) # bias = np.random.uniform(-1., 1., size = (channels_out,)) bias = np.zeros(shape=(channels_out,)) layer = Convolutional_layer(input_shape=inpt.shape, filters=channels_out, weights=filters, bias=bias, activation=layer_activation, size=size, stride=stride, pad=pad) # FORWARD layer.forward(inpt) forward_out = layer.output.copy() # after the forward to load all the attribute print(layer) # BACKWARD layer.delta = np.ones(layer.out_shape, dtype=float) delta = np.zeros(shape=inpt.shape, dtype=float) layer.backward(delta) # layer.update() # Visualization fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5)) fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15) fig.suptitle(('Convolutional Layer\n activation: {}, ' + 'size: {}, stride: {}, ' + 'output channels: {}').format(layer_activation.name, size, stride, channels_out)) ax1.imshow(float_2_img(inpt[0])) ax1.set_title('Original image') ax1.axis('off') # here every filter effect on the image can be shown ax2.imshow(float_2_img(forward_out[0, :, :, 1])) ax2.set_title('Forward') ax2.axis('off') ax3.imshow(float_2_img(delta[0])) ax3.set_title('Backward') ax3.axis('off') fig.tight_layout() plt.show()