Source code for layers.cost_layer

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import division
from __future__ import print_function

import functools

import numpy as np
from NumPyNet.utils import check_is_fitted
from NumPyNet.utils import cost_type
from NumPyNet.utils import _check_cost
from NumPyNet.layers.base import BaseLayer

__author__ = ['Mattia Ceccarelli', 'Nico Curti']
__email__ = ['mattia.ceccarelli3@studio.unibo.it', 'nico.curti2@unibo.it']


[docs]class Cost_layer(BaseLayer): ''' Cost layer Compute the cost of the output based on the selected cost function. Parameters: input_shape : tuple (default=None) Shape of the input in the format (batch, w, h, c), None is used when the layer is part of a Network model. cost_type : cost_type or str Cost function to be applied to the layer, from the enum cost_type. scale : float (default=1.) ratio : float (default=0.) noobject_scale : float (default=1) threshold : float (default=0.) smooothing: float (default=0.) Example ------- >>> import os >>> >>> import numpy as np >>> import pylab as plt >>> from PIL import Image >>> >>> img_2_float = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 1.)).astype(float) >>> float_2_img = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 255.)).astype(np.uint8) >>> >>> filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg') >>> inpt = np.asarray(Image.open(filename), dtype=float) >>> inpt.setflags(write=1) >>> inpt = img_2_float(inpt) >>> >>> # batch == 1 >>> inpt = np.expand_dims(inpt, axis=0) >>> >>> cost_type = cost_type.mse >>> scale = 1. >>> ratio = 0. >>> noobject_scale = 1. >>> threshold = 0. >>> smoothing = 0. >>> >>> truth = np.random.uniform(low=0., high=1., size=inpt.shape) >>> >>> layer = Cost_layer(input_shape=inpt.shape, >>> cost_type=cost_type, scale=scale, >>> ratio=ratio, >>> noobject_scale=noobject_scale, >>> threshold=threshold, >>> smoothing=smoothing, >>> trainable=True) >>> print(layer) >>> >>> layer.forward(inpt, truth) >>> forward_out = layer.output >>> >>> print('Cost: {:.3f}'.format(layer.cost)) References ---------- - TODO ''' SECRET_NUM = 12345 def __init__(self, cost_type, input_shape=None, scale=1., ratio=0., noobject_scale=1., threshold=0., smoothing=0., **kwargs): self.cost = 0. self.cost_type = _check_cost(self, cost_type) self.scale = scale self.ratio = ratio self.noobject_scale = noobject_scale self.threshold = threshold self.smoothing = smoothing # Need an empty initialization to work out _smooth_l1 and _wgan super(Cost_layer, self).__init__(input_shape=input_shape) self.loss = np.empty(shape=self.out_shape) def __str__(self): ''' PRINTER ''' return 'cost {0:>4d} x{1:>4d} x{2:>4d} x{3:>4d} -> {0:>4d} x{1:>4d} x{2:>4d} x{3:>4d}'.format(*self.out_shape)
[docs] def forward(self, inpt, truth=None): ''' Forward function for the cost layer. Using the chosen cost function, computes output, delta and cost. Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth: array-like truth values, it must have the same dimension as inpt. Returns ------- self ''' self._check_dims(shape=self.input_shape, arr=inpt, func='Forward') self.delta = np.empty(shape=self.out_shape) self.output = inpt[:] if truth is not None: if self.smoothing: truth = self._smoothing(truth) # smooth is applied on truth if self.cost_type == cost_type.smooth: self._smooth_l1(inpt, truth) # smooth_l1 if smooth not zero elif self.cost_type == cost_type.mae: self._l1(inpt, truth) # call for l1 if mae is cost elif self.cost_type == cost_type.wgan: self._wgan(inpt, truth) # call for wgan elif self.cost_type == cost_type.hellinger: self._hellinger(inpt, truth) # call for hellinger distance elif self.cost_type == cost_type.hinge: self._hinge(inpt, truth) # call for hellinger distance elif self.cost_type == cost_type.logcosh: self._logcosh(inpt, truth) # call for hellinger distance else: self._l2(inpt, truth) # call for l2 if mse or nothing if self.cost_type == cost_type.seg and self.noobject_scale != 1.: # seg if noobject_scale is not 1. self._seg(truth) if self.cost_type == cost_type.masked: # l2 Masked truth values if selected self._masked(inpt, truth) if self.ratio: self._ratio(truth) if self.threshold: self._threshold() norm = 1. / self.delta.size # normalization of delta! self.delta *= norm self.cost = np.mean(self.loss) # compute the cost return self
[docs] def backward(self, delta): ''' Backward function of the cost_layer, it updates the delta variable to be backpropagated. `self.delta` is updated inside the cost function. Parameters ---------- delta : array-like delta array of shape (batch, w, h, c). Global delta to be backpropagated. Returns ------- self ''' check_is_fitted(self, 'delta') self._check_dims(shape=self.input_shape, arr=delta, func='Backward') delta[:] += self.scale * self.delta return self
def _smoothing(self, truth): ''' _smoothing function Parameters ---------- truth: array-like truth values, it must have the same dimension as inpt. Returns ------- array-like smoothed values of the input array ''' scale = 1. - self.smoothing bias = self.smoothing / np.prod(self.loss.shape[1:]) return truth * scale + bias def _smooth_l1(self, inpt, truth): ''' _smooth_l1 cost function Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' diff = inpt - truth abs_diff = np.abs(diff) mask_index = abs_diff < 1. self.loss[mask_index] = diff[mask_index] * diff[mask_index] self.delta[mask_index] = diff[mask_index] mask_index = ~mask_index self.loss[mask_index] = 2. * abs_diff[mask_index] - 1. self.delta[mask_index] = - np.sign(diff[mask_index]) def _l1(self, inpt, truth): ''' cost function for the l1 norm of the ouput. It computes the absolute difference between truth and inpt and updates output and delta. Called for mae cost_type. Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' diff = truth - inpt self.loss = np.abs(diff) self.delta = -np.sign(diff) def _wgan(self, inpt, truth): ''' wgan cost function: where truth is not 0, the output is the inverted input. Input is forwarded as it is otherwise. Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' mask_index = truth != 0 # mask_index = truth[ truth != 0 ] self.loss[mask_index] = -inpt[mask_index] mask_index = ~mask_index self.loss[mask_index] = inpt[mask_index] self.delta = np.sign(truth) def _l2(self, inpt, truth): ''' Cost function for the l2 norm. It computes the square difference (truth - inpt)**2 and modifies output and delta. Called for mse cost_type. Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' diff = truth - inpt self.loss = diff * diff self.delta = -2. * diff def _hinge(self, inpt, truth): ''' Cost function for the Hinge loss. The gradient is computed as the smoothed version of Rennie and Srebro Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' diff = truth * inpt self.loss = np.maximum(0, 1. - diff) self.delta = diff check1 = np.vectorize(lambda t: t <= 0.) check2 = np.vectorize(lambda t: (t > 0.) and (t <= 1.)) check3 = np.vectorize(lambda t: t >= 1.) self.delta[check1(diff)] = .5 - diff[check1(diff)] self.delta[check2(diff)] = .5 * (1. - diff[check2(diff)]**2) self.delta[check3(diff)] = 0. def _hellinger(self, inpt, truth): ''' cost function for the Hellinger distance. It computes the square difference (sqrt(truth) - sqrt(inpt))**2 and modifies output and delta. Called for hellinger cost_type. Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' diff = np.sqrt(truth) - np.sqrt(inpt) self.loss = diff * diff self.delta = -diff / np.sqrt(2 * inpt) def _logcosh(self, inpt, truth): ''' Cost function for the Log-Cosh. Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' diff = truth - inpt self.loss = np.log(np.cosh(diff)) self.delta = np.tanh(-diff) def _seg(self, truth): ''' _seg function, where truth is zero, scale output and delta for noobject_scale Parameters ---------- truth : array-like truth values, it must have the same dimension as `inpt` ''' mask_index = truth == 0. self.loss[mask_index] *= self.noobject_scale self.delta[mask_index] *= self.noobject_scale def _masked(self, inpt, truth): ''' _masked function: set to zero the part of the input where the condition is true. used to ignore certain classes Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' # utils is not here yet inpt[truth == self.SECRET_NUM] = 0. def _ratio(self, truth): ''' _ratio function: called if self.ratio is not zero. Parameters ---------- inpt : array-like Input batch of images in format (batch, in_w, in_h, in _c). truth : array-like truth values, it must have the same dimension as `inpt` ''' compare = functools.cmp_to_key(lambda x, y: (abs(x) > abs(y)) ^ (abs(x) < abs(y))) ordered = sorted(self.delta.ravel(), key=compare) self.delta = np.asarray(ordered).reshape(self.delta.shape) # index = int(1. - self.ratio) * len(delta) thr = 0 # np.abs(self.delta[index]) self.delta[(self.delta * self.delta) < thr] = 0. def _threshold(self): ''' _threshold function: set a global threshold to delta ''' scale = self.threshold / self.loss.size scale *= scale self.delta[(self.delta * self.delta) < scale] = 0.
if __name__ == '__main__': import os import pylab as plt from PIL import Image img_2_float = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 1.)).astype(float) float_2_img = lambda im : ((im - im.min()) * (1. / (im.max() - im.min()) * 255.)).astype(np.uint8) filename = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'dog.jpg') inpt = np.asarray(Image.open(filename), dtype=float) inpt.setflags(write=1) inpt = img_2_float(inpt) # batch == 1 inpt = np.expand_dims(inpt, axis=0) cost_type = cost_type.mse scale = 1. ratio = 0. noobject_scale = 1. threshold = 0. smoothing = 0. truth = np.random.uniform(low=0., high=1., size=inpt.shape) layer = Cost_layer(input_shape=inpt.shape, cost_type=cost_type, scale=scale, ratio=ratio, noobject_scale=noobject_scale, threshold=threshold, smoothing=smoothing, trainable=True) print(layer) layer.forward(inpt, truth) forward_out = layer.output print('Cost: {:.3f}'.format(layer.cost)) delta = np.zeros(shape=inpt.shape, dtype=float) layer.backward(delta) fig, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3, figsize=(10, 5)) fig.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.15) fig.suptitle('Cost Layer:\n{0}'.format(cost_type)) ax1.imshow(float_2_img(inpt[0])) ax1.axis('off') ax1.set_title('Original Image') ax2.imshow(float_2_img(forward_out[0])) ax2.axis('off') ax2.set_title('Forward Image') ax3.imshow(float_2_img(delta[0])) ax3.axis('off') ax3.set_title('Delta Image') fig.tight_layout() plt.show()