#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import print_function
import numpy as np
from NumPyNet.exception import LayerError
from NumPyNet.utils import check_is_fitted
from NumPyNet.layers.base import BaseLayer
__author__ = ['Mattia Ceccarelli', 'Nico Curti']
__email__ = ['mattia.ceccarelli3@studio.unibo.it', 'nico.curti2@unibo.it']
[docs]class Avgpool_layer(BaseLayer):
'''
Avgpool layer
Parameters
----------
size : tuple or int
Size of the kernel to slide over the input image. If a tuple, it must contains two integers, (kx, ky).
If a int, size = kx = ky.
stride : tuple or int (default=None)
Represents the horizontal and vertical stride of the kernel (sx, sy).
If None or 0, stride is assigned the same values as `size`.
input_shape : tuple (default=None)
Input shape of the layer. The default value is used when the layer is part of a network.
pad : bool, (default=False)
If False the image is cut to fit the size and stride dimensions, if True the
image is padded following keras SAME padding, see references for details.
Examples
--------
>>> import os
>>> import pylab as plt
>>> from PIL import Image
>>>
>>> img_2_float = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 1.)).astype(float)
>>> float_2_img = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 255.)).astype(np.uint8)
>>>
>>> filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg')
>>> inpt = np.asarray(Image.open(filename), dtype=float)
>>> inpt.setflags(write=1)
>>> inpt = img_2_float(inpt)
>>>
>>> inpt = np.expand_dims(inpt, axis=0)
>>> pad = False
>>>
>>> size = 3
>>> stride = 2
>>>
>>> # Model initialization
>>> layer = Avgpool_layer(input_shape=inpt.shape, size=size, stride=stride, padding=pad)
>>>
>>> # FORWARD
>>>
>>> layer.forward(inpt)
>>> forward_out = layer.output.copy()
>>>
>>> print(layer)
>>>
>>> # BACKWARD
>>>
>>> delta = np.random.uniform(low=0., high=1.,size=inpt.shape)
>>> layer.delta = np.ones(layer.out_shape, dtype=float)
>>> layer.backward(delta)
>>>
>>> # Visualizations
>>>
>>> fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5))
>>> fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15)
>>>
>>> fig.suptitle('Average Pool Layer\n\nsize : {}, stride : {}, padding : {}'.format(size, stride, pad))
>>>
>>> ax1.imshow(float_2_img(inpt)[0])
>>> ax1.set_title('Original image')
>>> ax1.axis('off')
>>>
>>> ax2.imshow(float_2_img(layer.output[0]))
>>> ax2.set_title('Forward')
>>> ax2.axis('off')
>>>
>>> ax3.imshow(float_2_img(delta[0]))
>>> ax3.set_title('Backward')
>>> ax3.axis('off')
>>>
>>> fig.tight_layout()
>>> plt.show()
.. image:: ../../../NumPyNet/images/average_3-2.png
.. image:: ../../../NumPyNet/images/average_30-20.png
References
----------
- https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html
- https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy
- https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave
'''
def __init__(self, size, stride=None, pad=False, input_shape=None, **kwargs):
self.size = size
if not hasattr(self.size, '__iter__'):
self.size = (int(self.size), int(self.size))
if self.size[0] <= 0. or self.size[1] <= 0.:
raise LayerError('Avgpool layer. Incompatible size dimensions. They must be both > 0')
if not stride:
self.stride = size
else:
self.stride = stride
if not hasattr(self.stride, '__iter__'):
self.stride = (int(self.stride), int(self.stride))
if len(self.size) != 2 or len(self.stride) != 2:
raise LayerError('Avgpool layer. Incompatible stride/size dimensions. They must be a 1D-2D tuple of values')
# for padding
self.pad = pad
self.pad_left, self.pad_right, self.pad_bottom, self.pad_top = (0, 0, 0, 0)
super(Avgpool_layer, self).__init__(input_shape=input_shape)
self._build(input_shape)
def _build(self, input_shape=None):
'''
Set the input shape and re-evaluate padding
'''
if input_shape is not None:
if self.pad:
self._evaluate_padding()
def __str__(self):
'''
Printer
'''
batch, w, h, c = self.input_shape
_, out_width, out_height, out_channels = self.out_shape
return 'avg {} x {} / {} {:>4d} x{:>4d} x{:>4d} x{:>4d} -> {:>4d} x{:>4d} x{:>4d}'.format(
self.size[0], self.size[1], self.stride[0],
batch, w, h, c,
out_width, out_height, out_channels)
@property
def out_shape(self):
'''
Get the output shape
Returns
-------
out_shape : tuple
Output shape as (batch, out_width, out_height, out_channels)
'''
batch, w, h, c = self.input_shape
out_height = (h + self.pad_left + self.pad_right - self.size[1]) // self.stride[1] + 1
out_width = (w + self.pad_top + self.pad_bottom - self.size[0]) // self.stride[0] + 1
out_channels = c
return (batch, out_width, out_height, out_channels)
def _asStride(self, arr):
'''
_asStride returns a view of the input array such that a kernel of size = (kx,ky)
is slided over the image with stride = (st1, st2)
Parameters
----------
<<<<<<< HEAD
inpt : array-like
Input batch of images to be stride with shape = (batch, w, h, c)
References
----------
- https://docs.scipy.org/doc/numpy/reference/generated/numpy.lib.stride_tricks.as_strided.html
- https://stackoverflow.com/questions/42463172/how-to-perform-max-mean-pooling-on-a-2d-array-using-numpy
=======
arr : array-like
Input batch of images to be convoluted with shape = (b, w, h, c)
Returns
-------
subs : array-view
View of the input array with shape (batch, out_w, out_h, kx, ky, out_c)
>>>>>>> bc4153f58a054d4e60bb14c993bf61a058458e8b
'''
batch_stride, s0, s1 = arr.strides[:3]
batch, w, h = arr.shape[:3]
kx, ky = self.size
st1, st2 = self.stride
# Shape of the final view
view_shape = (batch, 1 + (w - kx) // st1, 1 + (h - ky) // st2) + arr.shape[3:] + (kx, ky)
# strides of the final view
strides = (batch_stride, st1 * s0, st2 * s1) + arr.strides[3:] + (s0, s1)
subs = np.lib.stride_tricks.as_strided(arr, view_shape, strides=strides)
# returns a view with shape = (batch, out_w, out_h, out_c, kx, ky)
return subs
def _evaluate_padding(self):
'''
Compute padding dimensions, following keras VALID and SAME criteria.
References
----------
- https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave
'''
_, w, h, _ = self.input_shape
# Compute how many raws are needed to pad the image in the 'w' axis
if (w % self.stride[0] == 0):
pad_w = max(self.size[0] - self.stride[0], 0)
else:
pad_w = max(self.size[0] - (w % self.stride[0]), 0)
# Compute how many Columns are needed
if (h % self.stride[1] == 0):
pad_h = max(self.size[1] - self.stride[1], 0)
else:
pad_h = max(self.size[1] - (h % self.stride[1]), 0)
# Number of raws/columns to be added for every directons
self.pad_top = pad_w >> 1 # bit shift, integer division by two
self.pad_bottom = pad_w - self.pad_top
self.pad_left = pad_h >> 1
self.pad_right = pad_h - self.pad_left
return self
def _pad(self, inpt):
'''
Padd every image in a batch with np.nan following keras SAME padding
Parameters
----------
inpt : array-like
Input images in the format (batch, width, height, channels).
Returns
-------
<<<<<<< HEAD
pad : array-like
A padded batch of images, following keras SAME padding.
References
- https://stackoverflow.com/questions/53819528/how-does-tf-keras-layers-conv2d-with-padding-same-and-strides-1-behave
----------
=======
array-like
A padded batch of images, following keras SAME padding.
>>>>>>> bc4153f58a054d4e60bb14c993bf61a058458e8b
'''
# return the nan-padded image, in the same format as inpt (batch, width + pad_w, height + pad_h, channels)
return np.pad(inpt, pad_width=((0, 0), (self.pad_top, self.pad_bottom), (self.pad_left, self.pad_right), (0, 0)),
mode='constant', constant_values=(np.nan, np.nan))
[docs] def forward(self, inpt):
'''
Forward function of the average pool layer: it slide a kernel of size (kx,ky) = size
and with step (st1, st2) = strides over every image in the batch. For every sub-matrix
it computes the average value without considering NAN value (padding), and passes it
to the output.
Parameters
----------
inpt : array-like
<<<<<<< HEAD
Input batch of image, with the shape (batch, input_w, input_h, input_c).
=======
Input batch of images, with shape (batch, input_w, input_h, input_c).
>>>>>>> bc4153f58a054d4e60bb14c993bf61a058458e8b
Returns
-------
self
'''
self._check_dims(shape=self.input_shape, arr=inpt, func='Forward')
kx, ky = self.size
sx, sy = self.stride
_, w, h, _ = self.input_shape
inpt = inpt.astype('float64')
# Padding
if self.pad:
mat_pad = self._pad(inpt)
else:
# If padding false, it cuts images' raws/columns
mat_pad = inpt[:, : (w - kx) // sx * sx + kx, : (h - ky) // sy * sy + ky, ...]
# 'view' is the strided input image, shape = (batch, out_w, out_h, out_c, kx, ky)
view = self._asStride(mat_pad)
# Mean of every sub matrix, computed without considering the padd(np.nan)
self.output = np.nanmean(view, axis=(4, 5))
self.delta = np.zeros(shape=self.out_shape, dtype=float)
return self
[docs] def backward(self, delta):
'''
<<<<<<< HEAD
Backward function of the average_pool layer: the function modifies the net delta
=======
Backward function of the averagepool layer: the function modifies the net delta
>>>>>>> bc4153f58a054d4e60bb14c993bf61a058458e8b
to be backpropagated.
Parameters
----------
delta : array-like
Global delta to be backpropagated with shape (batch, out_w, out_h, out_c).
Returns
----------
self
'''
check_is_fitted(self, 'delta')
self._check_dims(shape=self.input_shape, arr=delta, func='Backward')
delta[:] = delta.astype('float64')
# kx, ky = self.size
# Padding delta for a coherent _asStrided dimension
if self.pad:
mat_pad = self._pad(delta)
else:
mat_pad = delta
# _asStrid of padded delta let me access every pixel of the memory in the order I want.
# This is used to create a 1-1 correspondence between output and input pixels.
net_delta_view = self._asStride(mat_pad)
# norm = 1./(kx*ky) # needs to count only no nan values for keras
_, w, h, c = self.output.shape
# The indexes are necessary to access every pixel value one at a time, since
# modifing the same memory address more times at once doesn't produce the correct result
# norm = 1. / (kx*ky)
norm = self.delta * (1. / np.count_nonzero(~np.isnan(net_delta_view), axis=(4, 5)))
net_delta_review = np.moveaxis(net_delta_view, source=[1, 2, 3], destination=[0, 1, 2])
for (i, j, k), n in zip(np.ndindex(w, h, c), np.nditer(norm)):
net_delta_review[i, j, k, ...] += n
# net_delta_view *= norm
# Here delta is updated correctly
if self.pad:
_, w_pad, h_pad, _ = mat_pad.shape
# Excluding the padded part of the image
delta[:] = mat_pad[:, self.pad_top: w_pad - self.pad_bottom, self.pad_left: h_pad - self.pad_right, :]
else:
delta[:] = mat_pad
return self
if __name__ == '__main__':
import os
import pylab as plt
from PIL import Image
img_2_float = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 1.)).astype(float)
float_2_img = lambda im : ((im - im.min()) * (1./(im.max() - im.min()) * 255.)).astype(np.uint8)
filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg')
inpt = np.asarray(Image.open(filename), dtype=float)
inpt.setflags(write=1)
inpt = img_2_float(inpt)
inpt = np.expand_dims(inpt, axis=0)
pad = False
size = 3
stride = 2
# Model initialization
layer = Avgpool_layer(input_shape=inpt.shape, size=size, stride=stride, padding=pad)
# FORWARD
layer.forward(inpt)
forward_out = layer.output.copy()
print(layer)
# BACKWARD
delta = np.random.uniform(low=0., high=1., size=inpt.shape)
layer.delta = np.ones(layer.out_shape, dtype=float)
layer.backward(delta)
# Visualizations
fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5))
fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15)
fig.suptitle('Average Pool Layer\n\nsize : {}, stride : {}, padding : {}'.format(size, stride, pad))
ax1.imshow(float_2_img(inpt)[0])
ax1.set_title('Original image')
ax1.axis('off')
ax2.imshow(float_2_img(layer.output[0]))
ax2.set_title('Forward')
ax2.axis('off')
ax3.imshow(float_2_img(delta[0]))
ax3.set_title('Backward')
ax3.axis('off')
fig.tight_layout()
plt.show()