Source code for layers.avgpool_layer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from __future__ import print_function

import numpy as np
from NumPyNet.exception import LayerError
from NumPyNet.utils import check_is_fitted
from NumPyNet.layers.base import BaseLayer

__author__ = ['Mattia Ceccarelli', 'Nico Curti']
__email__ = ['mattia.ceccarelli3@studio.unibo.it', 'nico.curti2@unibo.it']


[docs]class Avgpool_layer(BaseLayer): ''' Avgpool layer Parameters ---------- size : tuple or int Size of the kernel to slide over the input image. If a tuple, it must contains two integers, (kx, ky). If a int, size = kx = ky. stride : tuple or int (default=None) Represents the horizontal and vertical stride of the kernel (sx, sy). If None or 0, stride is assigned the same values as `size`. input_shape : tuple (default=None) Input shape of the layer. The default value is used when the layer is part of a network. pad : bool, (default=False) If False the image is cut to fit the size and stride dimensions, if True the image is padded following keras SAME padding, see references for details. Examples -------- >>> import os >>> import pylab as plt >>> from PIL import Image >>> >>> img_2_float = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 1.)).astype(float) >>> float_2_img = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 255.)).astype(np.uint8) >>> >>> filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg') >>> inpt = np.asarray(Image.open(filename), dtype=float) >>> inpt.setflags(write=1) >>> inpt = img_2_float(inpt) >>> >>> inpt = np.expand_dims(inpt, axis=0) >>> pad = False >>> >>> size = 3 >>> stride = 2 >>> >>> # Model initialization >>> layer = Avgpool_layer(input_shape=inpt.shape, size=size, stride=stride, padding=pad) >>> >>> # FORWARD >>> >>> layer.forward(inpt) >>> forward_out = layer.output.copy() >>> >>> print(layer) >>> >>> # BACKWARD >>> >>> delta = np.random.uniform(low=0., high=1.,size=inpt.shape) >>> layer.delta = np.ones(layer.out_shape, dtype=float) >>> layer.backward(delta) >>> >>> # Visualizations >>> >>> fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5)) >>> fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15) >>> >>> fig.suptitle('Average Pool Layer\n\nsize : {}, stride : {}, padding : {}'.format(size, stride, pad)) >>> >>> ax1.imshow(float_2_img(inpt)[0]) >>> ax1.set_title('Original image') >>> ax1.axis('off') >>> >>> ax2.imshow(float_2_img(layer.output[0])) >>> ax2.set_title('Forward') >>> ax2.axis('off') >>> >>> ax3.imshow(float_2_img(delta[0])) >>> ax3.set_title('Backward') >>> ax3.axis('off') >>> >>> fig.tight_layout() >>> plt.show() .. image:: ../../../NumPyNet/images/average_3-2.png .. image:: ../../../NumPyNet/images/average_30-20.png References ---------- - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy - https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave ''' def __init__(self, size, stride=None, pad=False, input_shape=None, **kwargs): self.size = size if not hasattr(self.size, '__iter__'): self.size = (int(self.size), int(self.size)) if self.size[0] <= 0. or self.size[1] <= 0.: raise LayerError('Avgpool layer. Incompatible size dimensions. They must be both > 0') if not stride: self.stride = size else: self.stride = stride if not hasattr(self.stride, '__iter__'): self.stride = (int(self.stride), int(self.stride)) if len(self.size) != 2 or len(self.stride) != 2: raise LayerError('Avgpool layer. Incompatible stride/size dimensions. They must be a 1D-2D tuple of values') # for padding self.pad = pad self.pad_left, self.pad_right, self.pad_bottom, self.pad_top = (0, 0, 0, 0) super(Avgpool_layer, self).__init__(input_shape=input_shape) self._build(input_shape) def _build(self, input_shape=None): ''' Set the input shape and re-evaluate padding ''' if input_shape is not None: if self.pad: self._evaluate_padding() def __str__(self): ''' Printer ''' batch, w, h, c = self.input_shape _, out_width, out_height, out_channels = self.out_shape return 'avg {} x {} / {} {:>4d} x{:>4d} x{:>4d} x{:>4d} -> {:>4d} x{:>4d} x{:>4d}'.format( self.size[0], self.size[1], self.stride[0], batch, w, h, c, out_width, out_height, out_channels) @property def out_shape(self): ''' Get the output shape Returns ------- out_shape : tuple Output shape as (batch, out_width, out_height, out_channels) ''' batch, w, h, c = self.input_shape out_height = (h + self.pad_left + self.pad_right - self.size[1]) // self.stride[1] + 1 out_width = (w + self.pad_top + self.pad_bottom - self.size[0]) // self.stride[0] + 1 out_channels = c return (batch, out_width, out_height, out_channels) def _asStride(self, arr): ''' _asStride returns a view of the input array such that a kernel of size = (kx,ky) is slided over the image with stride = (st1, st2) Parameters ---------- <<<<<<< HEAD inpt : array-like Input batch of images to be stride with shape = (batch, w, h, c) References ---------- - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy ======= arr : array-like Input batch of images to be convoluted with shape = (b, w, h, c) Returns ------- subs : array-view View of the input array with shape (batch, out_w, out_h, kx, ky, out_c) >>>>>>> bc4153f58a054d4e60bb14c993bf61a058458e8b ''' batch_stride, s0, s1 = arr.strides[:3] batch, w, h = arr.shape[:3] kx, ky = self.size st1, st2 = self.stride # Shape of the final view view_shape = (batch, 1 + (w - kx) // st1, 1 + (h - ky) // st2) + arr.shape[3:] + (kx, ky) # strides of the final view strides = (batch_stride, st1 * s0, st2 * s1) + arr.strides[3:] + (s0, s1) subs = np.lib.stride_tricks.as_strided(arr, view_shape, strides=strides) # returns a view with shape = (batch, out_w, out_h, out_c, kx, ky) return subs def _evaluate_padding(self): ''' Compute padding dimensions, following keras VALID and SAME criteria. References ---------- - https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave ''' _, w, h, _ = self.input_shape # Compute how many raws are needed to pad the image in the 'w' axis if (w % self.stride[0] == 0): pad_w = max(self.size[0] - self.stride[0], 0) else: pad_w = max(self.size[0] - (w % self.stride[0]), 0) # Compute how many Columns are needed if (h % self.stride[1] == 0): pad_h = max(self.size[1] - self.stride[1], 0) else: pad_h = max(self.size[1] - (h % self.stride[1]), 0) # Number of raws/columns to be added for every directons self.pad_top = pad_w >> 1 # bit shift, integer division by two self.pad_bottom = pad_w - self.pad_top self.pad_left = pad_h >> 1 self.pad_right = pad_h - self.pad_left return self def _pad(self, inpt): ''' Padd every image in a batch with np.nan following keras SAME padding Parameters ---------- inpt : array-like Input images in the format (batch, width, height, channels). Returns ------- <<<<<<< HEAD pad : array-like A padded batch of images, following keras SAME padding. References - https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave ---------- ======= array-like A padded batch of images, following keras SAME padding. >>>>>>> bc4153f58a054d4e60bb14c993bf61a058458e8b ''' # return the nan-padded image, in the same format as inpt (batch, width + pad_w, height + pad_h, channels) return np.pad(inpt, pad_width=((0, 0), (self.pad_top, self.pad_bottom), (self.pad_left, self.pad_right), (0, 0)), mode='constant', constant_values=(np.nan, np.nan))
[docs] def forward(self, inpt): ''' Forward function of the average pool layer: it slide a kernel of size (kx,ky) = size and with step (st1, st2) = strides over every image in the batch. For every sub-matrix it computes the average value without considering NAN value (padding), and passes it to the output. Parameters ---------- inpt : array-like <<<<<<< HEAD Input batch of image, with the shape (batch, input_w, input_h, input_c). ======= Input batch of images, with shape (batch, input_w, input_h, input_c). >>>>>>> bc4153f58a054d4e60bb14c993bf61a058458e8b Returns ------- self ''' self._check_dims(shape=self.input_shape, arr=inpt, func='Forward') kx, ky = self.size sx, sy = self.stride _, w, h, _ = self.input_shape inpt = inpt.astype('float64') # Padding if self.pad: mat_pad = self._pad(inpt) else: # If padding false, it cuts images' raws/columns mat_pad = inpt[:, : (w - kx) // sx * sx + kx, : (h - ky) // sy * sy + ky, ...] # 'view' is the strided input image, shape = (batch, out_w, out_h, out_c, kx, ky) view = self._asStride(mat_pad) # Mean of every sub matrix, computed without considering the padd(np.nan) self.output = np.nanmean(view, axis=(4, 5)) self.delta = np.zeros(shape=self.out_shape, dtype=float) return self
[docs] def backward(self, delta): ''' <<<<<<< HEAD Backward function of the average_pool layer: the function modifies the net delta ======= Backward function of the averagepool layer: the function modifies the net delta >>>>>>> bc4153f58a054d4e60bb14c993bf61a058458e8b to be backpropagated. Parameters ---------- delta : array-like Global delta to be backpropagated with shape (batch, out_w, out_h, out_c). Returns ---------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] = delta.astype('float64') # kx, ky = self.size # Padding delta for a coherent _asStrided dimension if self.pad: mat_pad = self._pad(delta) else: mat_pad = delta # _asStrid of padded delta let me access every pixel of the memory in the order I want. # This is used to create a 1-1 correspondence between output and input pixels. net_delta_view = self._asStride(mat_pad) # norm = 1./(kx*ky) # needs to count only no nan values for keras _, w, h, c = self.output.shape # The indexes are necessary to access every pixel value one at a time, since # modifing the same memory address more times at once doesn't produce the correct result # norm = 1. / (kx*ky) norm = self.delta * (1. / np.count_nonzero(~np.isnan(net_delta_view), axis=(4, 5))) net_delta_review = np.moveaxis(net_delta_view, source=[1, 2, 3], destination=[0, 1, 2]) for (i, j, k), n in zip(np.ndindex(w, h, c), np.nditer(norm)): net_delta_review[i, j, k, ...] += n # net_delta_view *= norm # Here delta is updated correctly if self.pad: _, w_pad, h_pad, _ = mat_pad.shape # Excluding the padded part of the image delta[:] = mat_pad[:, self.pad_top: w_pad - self.pad_bottom, self.pad_left: h_pad - self.pad_right, :] else: delta[:] = mat_pad return self
if __name__ == '__main__': import os import pylab as plt from PIL import Image img_2_float = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 1.)).astype(float) float_2_img = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 255.)).astype(np.uint8) filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg') inpt = np.asarray(Image.open(filename), dtype=float) inpt.setflags(write=1) inpt = img_2_float(inpt) inpt = np.expand_dims(inpt, axis=0) pad = False size = 3 stride = 2 # Model initialization layer = Avgpool_layer(input_shape=inpt.shape, size=size, stride=stride, padding=pad) # FORWARD layer.forward(inpt) forward_out = layer.output.copy() print(layer) # BACKWARD delta = np.random.uniform(low=0., high=1., size=inpt.shape) layer.delta = np.ones(layer.out_shape, dtype=float) layer.backward(delta) # Visualizations fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5)) fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15) fig.suptitle('Average Pool Layer\n\nsize : {}, stride : {}, padding : {}'.format(size, stride, pad)) ax1.imshow(float_2_img(inpt)[0]) ax1.set_title('Original image') ax1.axis('off') ax2.imshow(float_2_img(layer.output[0])) ax2.set_title('Forward') ax2.axis('off') ax3.imshow(float_2_img(delta[0])) ax3.set_title('Backward') ax3.axis('off') fig.tight_layout() plt.show()