Source code for layers.maxpool_layer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from __future__ import print_function

import numpy as np
from NumPyNet.exception import LayerError
from NumPyNet.utils import check_is_fitted
from NumPyNet.layers.base import BaseLayer

__author__ = ['Mattia Ceccarelli', 'Nico Curti']
__email__ = ['mattia.ceccarelli3@studio.unibo.it', 'nico.curti2@unibo.it']


[docs]class Maxpool_layer(BaseLayer):
  '''
  Maxpool layer

  Parameters
  ----------
    size : tuple or int
      Size of the kernel to slide over the input image. If a tuple, it must contains two integers, (kx, ky).
      If a int, size = kx = ky.

    stride  : tuple or int (default = None)
      Represents the horizontal and vertical stride of the kernel (sx, sy).
      If None or 0, stride is assigned the same values as `size`.

    input_shape : tuple (default = None)
      Input shape of the layer. The default value is used when the layer is part of a network.

    pad : bool, (default = False)
      If False the image is cut to fit the size and stride dimensions, if True the
      image is padded following keras SAME padding, see references for details.

  Examples
  --------
  >>> import os
  >>>
  >>> import pylab as plt
  >>> from PIL import Image
  >>>
  >>> img_2_float = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 1.)).astype(float)
  >>> float_2_img = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 255.)).astype(np.uint8)
  >>>
  >>> filename = os.path.join(os.path.dirname('__file__'), '..', '..', 'data', 'dog.jpg')
  >>> inpt = np.asarray(Image.open(filename), dtype=float)
  >>> inpt.setflags(write=1)
  >>> inpt = img_2_float(inpt)
  >>>
  >>> inpt = np.expand_dims(inpt, axis=0)  # Add the batch shape.
  >>> b, w, h, c = inpt.shape
  >>>
  >>> size = (3, 3)
  >>> stride = (2, 2)
  >>> pad = False
  >>>
  >>> layer = Maxpool_layer(input_shape=inpt.shape, size=size, stride=stride, padding=pad)
  >>>
  >>> # FORWARD
  >>>
  >>> layer.forward(inpt)
  >>>
  >>> forward_out = layer.output
  >>>
  >>> print(layer)
  >>>
  >>> # BACKWARD
  >>>
  >>> delta = np.zeros(inpt.shape, dtype=float)
  >>> layer.delta = np.ones(layer.out_shape, dtype=float)
  >>> layer.backward(delta)
  >>>
  >>> # Visualizations
  >>>
  >>> fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5))
  >>> fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15)
  >>> fig.suptitle('MaxPool Layer\nsize : {}, stride : {}, padding : {} '.format(size, stride, pad))
  >>>
  >>> ax1.imshow(float_2_img(inpt[0]))
  >>> ax1.set_title('Original Image')
  >>> ax1.axis('off')
  >>>
  >>> ax2.imshow(float_2_img(forward_out[0]))
  >>> ax2.set_title('Forward')
  >>> ax2.axis('off')
  >>>
  >>> ax3.imshow(float_2_img(delta[0]))
  >>> ax3.set_title('Backward')
  >>> ax3.axis('off')
  >>>
  >>> fig.tight_layout()
  >>> plt.show()

  .. image:: ../../../NumPyNet/images/maxpool_3-2.png
  .. image:: ../../../NumPyNet/images/maxpool_30-20.png

  Reference
  ---------
  - https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html
  - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy
  - https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpys
  '''

  def __init__(self, size, stride=None, pad=False, input_shape=None, **kwargs):

    self.size = size

    if not hasattr(self.size, '__iter__'):
      self.size = (int(self.size), int(self.size))

    if self.size[0] <= 0. or self.size[1] <= 0.:
      raise LayerError('Maxpool layer. Incompatible size dimensions. They must be both > 0')

    if not stride:
      self.stride = size
    else:
      self.stride = stride

    if not hasattr(self.stride, '__iter__'):
      self.stride = (int(self.stride), int(self.stride))

    if len(self.size) != 2 or len(self.stride) != 2:
      raise LayerError('Maxpool layer. Incompatible stride/size dimensions. They must be a 1D-2D tuple of values')

    # for padding
    self.pad = pad
    self.pad_left, self.pad_right, self.pad_bottom, self.pad_top = (0, 0, 0, 0)

    super(Maxpool_layer, self).__init__(input_shape=input_shape)
    self._build(input_shape)

  def _build(self, input_shape=None):
    if input_shape is not None:

      if self.pad:
        self._evaluate_padding()

  def __str__(self):
    batch, w, h, c = self.input_shape
    batch, out_width, out_height, out_channels = self.out_shape
    return 'max         {} x {} / {}  {:>4d} x{:>4d} x{:>4d} x{:>4d}   ->  {:>4d} x{:>4d} x{:>4d} x{:>4d}'.format(
           self.size[0], self.size[1], self.stride[0],
           batch, w, h, c,
           batch, out_width, out_height, out_channels)

  @property
  def out_shape(self):
    batch, w, h, c = self.input_shape
    out_height = (h + self.pad_left + self.pad_right - self.size[1]) // self.stride[1] + 1
    out_width = (w + self.pad_top + self.pad_bottom - self.size[0]) // self.stride[0] + 1
    out_channels = c
    return (batch, out_width, out_height, out_channels)

  def _asStride(self, arr):
    '''
    _asStride returns a view of the input array such that a kernel of size = (kx,ky)
    is slided over the image with stride = (st1, st2)

    better reference here :
    https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html

    see also:
    https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy

    Parameters
    ----------
      arr : array-like
        Input batch of images to be convoluted with shape = (b, w, h, c)

    Returns
    -------
      subs : array-view
        View of the input array with shape (batch, out_w, out_h, kx, ky, out_c)
    '''

    batch_stride, s0, s1, s3 = arr.strides
    batch, w, h, c = arr.shape
    kx, ky = self.size
    st1, st2 = self.stride

    out_w = 1 + (w - kx) // st1
    out_h = 1 + (h - ky) // st2

    # Shape of the final view
    view_shape = (batch, out_w, out_h, c) + (kx, ky)

    # strides of the final view
    strides = (batch_stride, s0 * st1, s1 * st2, s3) + (s0, s1)

    subs = np.lib.stride_tricks.as_strided(arr, view_shape, strides=strides)
    return subs

  def _evaluate_padding(self):
    '''
    Compute padding dimensions, following keras VALID and SAME criteria. See:
    https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave
    '''
    _, w, h, c = self.input_shape

    # Compute how many raws are needed to pad the image in the 'w' axis
    if (w % self.stride[0] == 0):
      pad_w = max(self.size[0] - self.stride[0], 0)
    else:
      pad_w = max(self.size[0] - (w % self.stride[0]), 0)

    # Compute how many Columns are needed
    if (h % self.stride[1] == 0):
      pad_h = max(self.size[1] - self.stride[1], 0)
    else:
      pad_h = max(self.size[1] - (h % self.stride[1]), 0)

    # Number of raws/columns to be added for every directons
    self.pad_top = pad_w >> 1  # bit shift, integer division by two
    self.pad_bottom = pad_w - self.pad_top
    self.pad_left = pad_h >> 1
    self.pad_right = pad_h - self.pad_left

  def _pad(self, inpt):
    '''
    Padd every image in a batch with np.nan following keras SAME padding
    See also:
      https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave

    Parameters
    ----------
      inpt : array-like
        Input images in the format (batch, width, height, channels).

    Returns
    -------
      array-like
        A padded batch of images, following keras SAME padding.
    '''

    # return the nan-padded image, in the same format as inpt (batch, width + pad_w, height + pad_h, channels)
    return np.pad(inpt, pad_width=((0, 0), (self.pad_top, self.pad_bottom), (self.pad_left, self.pad_right), (0, 0)),
                  mode='constant', constant_values=(np.nan, np.nan))

[docs]  def forward(self, inpt):
    '''
    Forward function of the maxpool layer: It slides a kernel over every input image and return
    the maximum value of every sub-window.
    the function _asStride returns a view of the input arrary with shape
    (batch, out_w, out_h , c, kx, ky), where, for every image in the batch we have:
    out_w * out_h * c sub matrixes kx * ky, containing pixel values.

    Parameters
    ----------
      inpt : array-like
        Input batch of images, with shape (batch, input_w, input_h, input_c).

    Returns
    -------
      self
    '''

    self._check_dims(shape=self.input_shape, arr=inpt, func='Forward')

    kx, ky = self.size
    st1, st2 = self.stride
    _, w, h, _ = self.input_shape
    inpt = inpt.astype('float64')

    if self.pad:
      mat_pad = self._pad(inpt)
    else:
      # If no padding, cut the last raws/columns in every image in the batch
      mat_pad = inpt[:, : (w - kx) // st1 * st1 + kx, : (h - ky) // st2 * st2 + ky, ...]

    # Return a strided view of the input array, shape: (batch, 1+(w-kx)//st1,1+(h-ky)//st2 ,c, kx, ky)
    view = self._asStride(mat_pad)

    # final shape (batch, out_w, out_h, c)

    self.output = np.nanmax(view, axis=(4, 5))

    # New shape for view, to retrieve indexes
    new_shape = view.shape[:4] + (kx * ky, )

    self.indexes = np.nanargmax(view.reshape(new_shape), axis=4)

    # self.indexes = np.unravel_index(self.indexes.ravel(), (kx, ky)) ?
    try:
      self.indexes = np.unravel_index(self.indexes.ravel(), shape=(kx, ky))
    except TypeError:  # retro-compatibility for Numpy version older than 1.16
      self.indexes = np.unravel_index(self.indexes.ravel(), dims=(kx, ky))

    self.delta = np.zeros(shape=self.out_shape, dtype=float)

    return self

[docs]  def backward(self, delta):
    '''
    Backward function of maxpool layer: it access avery position where in the input image
    there's a chosen maximum and add the correspondent self.delta value.
    Since we work with a 'view' of delta, the same pixel may appear more than one time,
    and an atomic acces to it's value is needed to correctly modifiy it.

    Parameters
    ----------
      delta : array-like
        Global delta to be backpropagated with shape (batch, out_w, out_h, out_c).

    Returns
    ----------
      self
    '''

    check_is_fitted(self, 'delta')
    self._check_dims(shape=self.input_shape, arr=delta, func='Backward')
    delta[:] = delta.astype('float64')

    # Padding delta in order to create another view
    if self.pad:
      mat_pad = self._pad(delta)
    else:
      mat_pad = delta

    # Create a view of mat_pad, following the padding true or false
    net_delta_view = self._asStride(mat_pad)

    b, w, h, c = self.output.shape

    # those indexes are usefull to access 'Atomically'(one at a time) every element in net_delta_view
    for (i, j, k, l), m, o, D in zip(np.ndindex(b, w, h, c), self.indexes[0], self.indexes[1], np.nditer(self.delta)):
      net_delta_view[i, j, k, l, m, o] += D

    # Here delta is correctly modified
    if self.pad:
      _, w_pad, h_pad, _ = mat_pad.shape
      delta[:] = mat_pad[:, self.pad_top: w_pad - self.pad_bottom, self.pad_left: h_pad - self.pad_right, :]

    else:
      delta[:] = mat_pad

    return self


if __name__ == '__main__':

  import os

  import pylab as plt
  from PIL import Image

  img_2_float = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 1.)).astype(float)
  float_2_img = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 255.)).astype(np.uint8)

  filename = os.path.join(os.path.dirname('__file__'), '..', '..', 'data', 'dog.jpg')
  inpt = np.asarray(Image.open(filename), dtype=float)
  inpt.setflags(write=1)
  inpt = img_2_float(inpt)

  inpt = np.expand_dims(inpt, axis=0)  # Add the batch shape.
  b, w, h, c = inpt.shape

  size = (3, 3)
  stride = (2, 2)
  pad = False

  layer = Maxpool_layer(input_shape=inpt.shape, size=size, stride=stride, padding=pad)

  # FORWARD

  layer.forward(inpt)

  forward_out = layer.output

  print(layer)

  # BACKWARD

  delta = np.zeros(inpt.shape, dtype=float)
  layer.delta = np.ones(layer.out_shape, dtype=float)
  layer.backward(delta)

  # Visualizations

  fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5))
  fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15)
  fig.suptitle('MaxPool Layer\nsize : {}, stride : {}, padding : {} '.format(size, stride, pad))

  ax1.imshow(float_2_img(inpt[0]))
  ax1.set_title('Original Image')
  ax1.axis('off')

  ax2.imshow(float_2_img(forward_out[0]))
  ax2.set_title('Forward')
  ax2.axis('off')

  ax3.imshow(float_2_img(delta[0]))
  ax3.set_title('Backward')
  ax3.axis('off')

  fig.tight_layout()
  plt.show()