Source code for layers.maxpool_layer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from __future__ import print_function

import numpy as np
from NumPyNet.exception import LayerError
from NumPyNet.utils import check_is_fitted
from NumPyNet.layers.base import BaseLayer

__author__ = ['Mattia Ceccarelli', 'Nico Curti']
__email__ = ['mattia.ceccarelli3@studio.unibo.it', 'nico.curti2@unibo.it']


[docs]class Maxpool_layer(BaseLayer): ''' Maxpool layer Parameters ---------- size : tuple or int Size of the kernel to slide over the input image. If a tuple, it must contains two integers, (kx, ky). If a int, size = kx = ky. stride : tuple or int (default = None) Represents the horizontal and vertical stride of the kernel (sx, sy). If None or 0, stride is assigned the same values as `size`. input_shape : tuple (default = None) Input shape of the layer. The default value is used when the layer is part of a network. pad : bool, (default = False) If False the image is cut to fit the size and stride dimensions, if True the image is padded following keras SAME padding, see references for details. Examples -------- >>> import os >>> >>> import pylab as plt >>> from PIL import Image >>> >>> img_2_float = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 1.)).astype(float) >>> float_2_img = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 255.)).astype(np.uint8) >>> >>> filename = os.path.join(os.path.dirname('__file__'), '..', '..', 'data', 'dog.jpg') >>> inpt = np.asarray(Image.open(filename), dtype=float) >>> inpt.setflags(write=1) >>> inpt = img_2_float(inpt) >>> >>> inpt = np.expand_dims(inpt, axis=0) # Add the batch shape. >>> b, w, h, c = inpt.shape >>> >>> size = (3, 3) >>> stride = (2, 2) >>> pad = False >>> >>> layer = Maxpool_layer(input_shape=inpt.shape, size=size, stride=stride, padding=pad) >>> >>> # FORWARD >>> >>> layer.forward(inpt) >>> >>> forward_out = layer.output >>> >>> print(layer) >>> >>> # BACKWARD >>> >>> delta = np.zeros(inpt.shape, dtype=float) >>> layer.delta = np.ones(layer.out_shape, dtype=float) >>> layer.backward(delta) >>> >>> # Visualizations >>> >>> fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5)) >>> fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15) >>> fig.suptitle('MaxPool Layer\nsize : {}, stride : {}, padding : {} '.format(size, stride, pad)) >>> >>> ax1.imshow(float_2_img(inpt[0])) >>> ax1.set_title('Original Image') >>> ax1.axis('off') >>> >>> ax2.imshow(float_2_img(forward_out[0])) >>> ax2.set_title('Forward') >>> ax2.axis('off') >>> >>> ax3.imshow(float_2_img(delta[0])) >>> ax3.set_title('Backward') >>> ax3.axis('off') >>> >>> fig.tight_layout() >>> plt.show() .. image:: ../../../NumPyNet/images/maxpool_3-2.png .. image:: ../../../NumPyNet/images/maxpool_30-20.png Reference --------- - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpys ''' def __init__(self, size, stride=None, pad=False, input_shape=None, **kwargs): self.size = size if not hasattr(self.size, '__iter__'): self.size = (int(self.size), int(self.size)) if self.size[0] <= 0. or self.size[1] <= 0.: raise LayerError('Maxpool layer. Incompatible size dimensions. They must be both > 0') if not stride: self.stride = size else: self.stride = stride if not hasattr(self.stride, '__iter__'): self.stride = (int(self.stride), int(self.stride)) if len(self.size) != 2 or len(self.stride) != 2: raise LayerError('Maxpool layer. Incompatible stride/size dimensions. They must be a 1D-2D tuple of values') # for padding self.pad = pad self.pad_left, self.pad_right, self.pad_bottom, self.pad_top = (0, 0, 0, 0) super(Maxpool_layer, self).__init__(input_shape=input_shape) self._build(input_shape) def _build(self, input_shape=None): if input_shape is not None: if self.pad: self._evaluate_padding() def __str__(self): batch, w, h, c = self.input_shape batch, out_width, out_height, out_channels = self.out_shape return 'max {} x {} / {} {:>4d} x{:>4d} x{:>4d} x{:>4d} -> {:>4d} x{:>4d} x{:>4d} x{:>4d}'.format( self.size[0], self.size[1], self.stride[0], batch, w, h, c, batch, out_width, out_height, out_channels) @property def out_shape(self): batch, w, h, c = self.input_shape out_height = (h + self.pad_left + self.pad_right - self.size[1]) // self.stride[1] + 1 out_width = (w + self.pad_top + self.pad_bottom - self.size[0]) // self.stride[0] + 1 out_channels = c return (batch, out_width, out_height, out_channels) def _asStride(self, arr): ''' _asStride returns a view of the input array such that a kernel of size = (kx,ky) is slided over the image with stride = (st1, st2) better reference here : https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html see also: https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy Parameters ---------- arr : array-like Input batch of images to be convoluted with shape = (b, w, h, c) Returns ------- subs : array-view View of the input array with shape (batch, out_w, out_h, kx, ky, out_c) ''' batch_stride, s0, s1, s3 = arr.strides batch, w, h, c = arr.shape kx, ky = self.size st1, st2 = self.stride out_w = 1 + (w - kx) // st1 out_h = 1 + (h - ky) // st2 # Shape of the final view view_shape = (batch, out_w, out_h, c) + (kx, ky) # strides of the final view strides = (batch_stride, s0 * st1, s1 * st2, s3) + (s0, s1) subs = np.lib.stride_tricks.as_strided(arr, view_shape, strides=strides) return subs def _evaluate_padding(self): ''' Compute padding dimensions, following keras VALID and SAME criteria. See: https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave ''' _, w, h, c = self.input_shape # Compute how many raws are needed to pad the image in the 'w' axis if (w % self.stride[0] == 0): pad_w = max(self.size[0] - self.stride[0], 0) else: pad_w = max(self.size[0] - (w % self.stride[0]), 0) # Compute how many Columns are needed if (h % self.stride[1] == 0): pad_h = max(self.size[1] - self.stride[1], 0) else: pad_h = max(self.size[1] - (h % self.stride[1]), 0) # Number of raws/columns to be added for every directons self.pad_top = pad_w >> 1 # bit shift, integer division by two self.pad_bottom = pad_w - self.pad_top self.pad_left = pad_h >> 1 self.pad_right = pad_h - self.pad_left def _pad(self, inpt): ''' Padd every image in a batch with np.nan following keras SAME padding See also: https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave Parameters ---------- inpt : array-like Input images in the format (batch, width, height, channels). Returns ------- array-like A padded batch of images, following keras SAME padding. ''' # return the nan-padded image, in the same format as inpt (batch, width + pad_w, height + pad_h, channels) return np.pad(inpt, pad_width=((0, 0), (self.pad_top, self.pad_bottom), (self.pad_left, self.pad_right), (0, 0)), mode='constant', constant_values=(np.nan, np.nan))
[docs] def forward(self, inpt): ''' Forward function of the maxpool layer: It slides a kernel over every input image and return the maximum value of every sub-window. the function _asStride returns a view of the input arrary with shape (batch, out_w, out_h , c, kx, ky), where, for every image in the batch we have: out_w * out_h * c sub matrixes kx * ky, containing pixel values. Parameters ---------- inpt : array-like Input batch of images, with shape (batch, input_w, input_h, input_c). Returns ------- self ''' self._check_dims(shape=self.input_shape, arr=inpt, func='Forward') kx, ky = self.size st1, st2 = self.stride _, w, h, _ = self.input_shape inpt = inpt.astype('float64') if self.pad: mat_pad = self._pad(inpt) else: # If no padding, cut the last raws/columns in every image in the batch mat_pad = inpt[:, : (w - kx) // st1 * st1 + kx, : (h - ky) // st2 * st2 + ky, ...] # Return a strided view of the input array, shape: (batch, 1+(w-kx)//st1,1+(h-ky)//st2 ,c, kx, ky) view = self._asStride(mat_pad) # final shape (batch, out_w, out_h, c) self.output = np.nanmax(view, axis=(4, 5)) # New shape for view, to retrieve indexes new_shape = view.shape[:4] + (kx * ky, ) self.indexes = np.nanargmax(view.reshape(new_shape), axis=4) # self.indexes = np.unravel_index(self.indexes.ravel(), (kx, ky)) ? try: self.indexes = np.unravel_index(self.indexes.ravel(), shape=(kx, ky)) except TypeError: # retro-compatibility for Numpy version older than 1.16 self.indexes = np.unravel_index(self.indexes.ravel(), dims=(kx, ky)) self.delta = np.zeros(shape=self.out_shape, dtype=float) return self
[docs] def backward(self, delta): ''' Backward function of maxpool layer: it access avery position where in the input image there's a chosen maximum and add the correspondent self.delta value. Since we work with a 'view' of delta, the same pixel may appear more than one time, and an atomic acces to it's value is needed to correctly modifiy it. Parameters ---------- delta : array-like Global delta to be backpropagated with shape (batch, out_w, out_h, out_c). Returns ---------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] = delta.astype('float64') # Padding delta in order to create another view if self.pad: mat_pad = self._pad(delta) else: mat_pad = delta # Create a view of mat_pad, following the padding true or false net_delta_view = self._asStride(mat_pad) b, w, h, c = self.output.shape # those indexes are usefull to access 'Atomically'(one at a time) every element in net_delta_view for (i, j, k, l), m, o, D in zip(np.ndindex(b, w, h, c), self.indexes[0], self.indexes[1], np.nditer(self.delta)): net_delta_view[i, j, k, l, m, o] += D # Here delta is correctly modified if self.pad: _, w_pad, h_pad, _ = mat_pad.shape delta[:] = mat_pad[:, self.pad_top: w_pad - self.pad_bottom, self.pad_left: h_pad - self.pad_right, :] else: delta[:] = mat_pad return self
if __name__ == '__main__': import os import pylab as plt from PIL import Image img_2_float = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 1.)).astype(float) float_2_img = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 255.)).astype(np.uint8) filename = os.path.join(os.path.dirname('__file__'), '..', '..', 'data', 'dog.jpg') inpt = np.asarray(Image.open(filename), dtype=float) inpt.setflags(write=1) inpt = img_2_float(inpt) inpt = np.expand_dims(inpt, axis=0) # Add the batch shape. b, w, h, c = inpt.shape size = (3, 3) stride = (2, 2) pad = False layer = Maxpool_layer(input_shape=inpt.shape, size=size, stride=stride, padding=pad) # FORWARD layer.forward(inpt) forward_out = layer.output print(layer) # BACKWARD delta = np.zeros(inpt.shape, dtype=float) layer.delta = np.ones(layer.out_shape, dtype=float) layer.backward(delta) # Visualizations fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5)) fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15) fig.suptitle('MaxPool Layer\nsize : {}, stride : {}, padding : {} '.format(size, stride, pad)) ax1.imshow(float_2_img(inpt[0])) ax1.set_title('Original Image') ax1.axis('off') ax2.imshow(float_2_img(forward_out[0])) ax2.set_title('Forward') ax2.axis('off') ax3.imshow(float_2_img(delta[0])) ax3.set_title('Backward') ax3.axis('off') fig.tight_layout() plt.show()