#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import print_function
import os
import cv2
import numpy as np
from NumPyNet.image_utils import image_utils
from NumPyNet.image_utils import normalization
__author__ = ['Mattia Ceccarelli', 'Nico Curti']
__email__ = ['mattia.ceccarelli3@studio.unibo.it', 'nico.curti2@unibo.it']
[docs]class Image (object):
'''
Constructor of the image object. If filename the load function loads the image file.
Parameters
----------
filename : str (default=None)
Image filename
'''
def __init__ (self, filename=None):
if filename is not None:
self.load(filename)
else:
self._data = None
@property
def shape (self):
'''
Get the image dimensions
'''
return self._data.shape
[docs] def add_single_batch (self):
'''
Add batch dimension for testing layer
Returns
-------
self
'''
self._data = np.expand_dims(self._data, axis=0)
return self
[docs] def remove_single_batch (self):
'''
Remove batch dimension for testing layer
Returns
-------
self
'''
self._data = np.squeeze(self._data, axis=0)
return self
def _image2cv (self, img):
'''
Convert image from image-fmt to opencv fmt
Parameters
----------
img : array-like
Input image to convert
Returns
-------
cv_img : array-like
Image as uint8 nd-array
Notes
-----
.. note::
The channels are automatically converted from
RGB 2 BGR for OpenCV compatibility
'''
# constrain
img = np.clip(img, 0., 1.)
# normalize between [0, 255]
img *= 255.
# rgb 2 bgr
img = img[..., ::-1].astype('uint8')
return img
def _cv2image (self, img):
'''
Convert image from opencv-fmt to image-fmt
Parameters
----------
img : array-like
Input image to convert
Returns
-------
Image_img : array-like
Image as float [0., 1.] nd-array
Notes
-----
.. note::
The channels are automatically converted from
BGR 2 RGB for Image compatibility
'''
img = img.astype('float64')
# bgr 2 rgb
img = img[..., ::-1]
# normalize between [0, 1]
img *= 1. / 255.
return img
def __array__ (self):
'''
Compatibility with numpy array.
In this way np.array(Image_object) is a valid 3D array and you can
also simply call plt.imshow(Image_object) without other intermediate steps.
'''
return self.data
def _get_color (self, x, max):
'''
Get the color
'''
ratio = (x / max) * len(image_utils.num_box_colors - 1)
i, j = np.floor(ratio), np.ceil(ratio)
ratio -= i
r, g, b = image_utils.colors[i]
return ( (1. - ratio) * b + ratio * b,
(1. - ratio) * g + ratio * g,
(1. - ratio) * r + ratio * r )
[docs] def get (self):
'''
Return the data object as a numpy array
Returns
-------
data : array-like
Image data as numpy array
'''
return self._data
[docs] def load (self, filename):
'''
Read Image from file
Parameters
----------
filename : str
Image filename path
Returns
-------
self
'''
if not os.path.isfile(filename):
raise IOError('Could not open or find the data file. Given: {}'.format(filename))
# read image from file
img = cv2.imread(filename, cv2.IMREAD_COLOR)
self._data = self._cv2image(img)
return self
[docs] def standardize (self, means, process=normalization.normalize):
'''
Remove or add train mean-image from current image
Parameters
----------
means : array_like
Array of means to apply to the image
process : normalization (int, default = normalize)
Switch between normalization (0) and denormalization (1)
Returns
-------
self
'''
if process is normalization.normalize:
self._data += means
elif process is normalization.denormalize:
self._data -= means
return self
[docs] def rescale (self, var, process=normalization.normalize):
'''
Divide or multiply by train variance-image
Parameters
----------
variances : array_like
Array of variances to apply to the image
process : normalization (int)
Switch between normalization and denormalization
Returns
-------
self
'''
if process is normalization.normalize:
inv_vars = 1. / var
self._data *= inv_vars
elif process is normalization.denormalize:
self._data *= var
return self
[docs] def scale (self, scaling, process=normalization.normalize):
'''
Scale image values
Parameters
----------
scale : float
Scale factor to apply to the image
process : normalization (int, default = normalize)
Switch between normalization (0) and denormalization (1)
Returns
-------
self
'''
if process is normalization.normalize:
self._data *= scaling
elif process is normalization.denormalize:
inv_scaling = 1. / scaling
self._data *= inv_scaling
return self
[docs] def scale_between (self, minimum, maximum):
'''
Rescale image value between min and max
Parameters
----------
minimum : float (default = 0.)
Min value
maximum : float (default = 1.)
Max value
Returns
-------
self
'''
diff = maximum - minimum
self._data = self._data * diff + minimum
return self
[docs] def mean_std_norm (self):
'''
Normalize the current image as
.. code-block:: python
image = (image - mean) / variance
Returns
-------
self
'''
mean = np.mean(self._data)
var = 1. / np.var(self._data)
self._data = (self._data - mean) * var
return self
[docs] def flip (self, axis=-1):
'''
Flip the image along given axis (0 - horizontal, 1 - vertical)
Parameters
----------
axis : int (default=0)
Axis to flip
Returns
-------
self
'''
cv2.flip(self._data, axis)
return self
[docs] def transpose (self):
'''
Transpose width and height
Returns
-------
self
'''
self._data = self._data.transpose(1, 0, 2)
return self
[docs] def rotate (self, angle):
'''
Rotate the image according to the given angle in degree fmt.
Parameters
----------
angle : float
Angle in degree fmt
Returns
-------
rotated : Image
Rotated image
Note
----
.. note::
This rotation preserves the original size so some original parts can be removed
from the rotated image.
See 'rotate_bound' for a conservative rotation.
References
----------
https://www.pyimagesearch.com/2017/01/02/rotate-images-correctly-with-opencv-and-python/
'''
h, w = self._data.shape[:2]
center = (w // 2, h // 2)
rotation_matrix = cv2.getRotationMatrix2D(center=center, angle=angle, scale=1.0)
self._data = cv2.warpAffine(self._data, M=rotation_matrix, dsize=(w, h))
return self
[docs] def rotate_bound (self, angle):
'''
Rotate the image according to the given angle in degree fmt.
Parameters
----------
angle : float
Angle in degree fmt
Returns
-------
rotated : Image
Rotated image
Note
----
.. note::
This rotation preserves the original image, so the output can be greater than the
original size.
See 'rotate' for a rotation which preserves the size.
References
----------
https://www.pyimagesearch.com/2017/01/02/rotate-images-correctly-with-opencv-and-python/
'''
# grab the dimensions of the image and then determine the
# center
(h, w) = self._data.shape[:2]
(cX, cY) = (w // 2, h // 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D(center=(cX, cY), angle=angle, scale=1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# perform the actual rotation and return the image
self._data = cv2.warpAffine(self._data, M=M, dsize=(nW, nH))
return self
[docs] def crop (self, dsize, size):
'''
Crop the image according to the given dimensions [dsize[0] : dsize[0] + size[0], dsize[1] : dsize[1] + size[1]]
Parameters
----------
dsize : 2D iterable
(X, Y) of the crop
size : 2D iterable
(width, height) of the crop
Returns
-------
cropped : Image
Cropped image
'''
dx, dy = dsize
sx, sy = size
self._data = self._data[dx : dx + sx, dy : dy + sy]
return self
[docs] def rgb2rgba (self):
'''
Add alpha channel to the original image
Returns
-------
self
Notes
-----
.. note::
Pay attention to the value of the alpha channel!
OpenCV does not set its values to null but they are and empty (garbage) array.
'''
self._data = cv2.cvtColor(self._data, cv2.COLOR_RGB2RGBA)
return self
[docs] def show (self, window_name, ms=0, fullscreen=None):
'''
show the image
Parameters
----------
window_name : str
Name of the plot
ms : int (default=0)
Milliseconds to wait
Returns
-------
check : bool
True if everything is ok
'''
img = self._image2cv(self._data)
# show image
if ms == 0:
print('Press ESC to close the window', flush=True)
cv2.imshow(window_name, img)
if fullscreen:
cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
cv2.waitKey(ms)
return True
[docs] def save (self, filename):
'''
save the image
Parameters
----------
filename : str
Output filename of the image
Returns
-------
True if everything is ok
'''
img = self._image2cv(self._data)
# save image
cv2.imwrite(filename + '.png', img)
return True
[docs] def from_numpy_matrix (self, array):
'''
Use numpy array as the image
Parameters
----------
array : array_like
buffer of the input image as (width, height, channel)
Returns
-------
self
'''
self._data = array
return self
[docs] def from_frame (self, array):
'''
Use opencv frame array as the image
'''
self._data = self._cv2image(array)
return self
[docs] def resize (self, dsize=None, scale_factor=(None, None)):
'''
Resize the image according to the new shape given
Parameters
----------
dsize : 2D iterable (default=None)
Destination size of the image
scale_factor : 2D iterable (default=(None, None))
width scale factor, height scale factor
Returns
-------
res : Image
Resized Image
Notes
-----
.. note::
The resize is performed using the LANCZOS interpolation.
'''
fx, fy = scale_factor
return cv2.resize(self._data, dsize=dsize, fx=fx, fy=fy,
interpolation=cv2.INTER_LANCZOS4)
[docs] def letterbox (self, net_dim):
'''
resize image with unchanged aspect ratio using padding
Parameters
----------
net_dim : 2D iterable
width and height outputs
Returns
-------
resized : Image
Resized Image
'''
resized = Image()
img_h, img_w, _ = self._data.shape
net_w, net_h = net_dim
mins = min(net_w / img_w, net_h / img_h)
new_w, new_h = int(img_w * mins), int(img_h * mins)
resized_image = cv2.resize(self._data, (new_w, new_h),
interpolation=cv2.INTER_LANCZOS4)
delta_w, delta_h = net_w - new_w, net_h - new_h
top, left = delta_h // 2, delta_w // 2
bottom, right = delta_h - top, delta_w - left
resized._data = cv2.copyMakeBorder(resized_image, top, bottom, left, right,
cv2.BORDER_CONSTANT,
value=(0.5, 0.5, 0.5))
return resized
[docs] def draw_detections (self, dets, thresh, names):
'''
Draw the detections into the current image
Parameters
----------
dets : Detection list
List of pre-computed detection objects
thresh : float
Probability threshold to filter the boxes
names : iterable
List of object names as strings
Returns
-------
self
'''
width = 1 if self.height < 167 else int(self.height * 6e-3)
for d in dets:
index = np.where(d.prob > thresh)[0]
labels = None
if index:
labels = ', '.join(names[index])
perf = '\n'.join(['{0}: {1:.3f}%'.format(name, prob * 100.)
for name, prob in zip(names[index], d.prob[index])
])
print(perf)
if labels is not None:
num_obj = labels.count(',') + 1
offset = num_obj * 123457 % len(names)
r, g, b = self._get_color(offset, len(names))
left, right, top, bottom = ( (d.bbox.x - d.bbox.w * .5) * self.width,
(d.bbox.x + d.bbox.w * .5) * self.width,
(d.bbox.y - d.bbox.h * .5) * self.height,
(d.bbox.y + d.bbox.h * .5) * self.height
)
left = np.clip(left, 0, self.width)
right = np.clip(right, 0, self.width)
top = np.clip(top, 0, self.height)
bottom = np.clip(bottom, 0, self.height)
# detection box
cv2.rectangle(img=self._data, pt1=(left, top), pt2=(right, bottom), color=(r, g, b), thickness=width)
# get string text size
(label_w, label_h), baseline = cv2.getTextSize(labels, fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=1, thickness=1)
# boxe around the string
cv2.rectangle(img=self._data, pt1=(left, top - label_h - baseline), pt2=(left + label_w, top), color=(r, g, b), thickness=cv2.FILLED)
# label string
cv2.putText(img=self._data, text=labels, org=(left, top + baseline - label_h), fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=1, color=(0, 0, 0), thickness=1, lineType=1)
return self
@property
def width(self):
'''
Get the image width
'''
return self._data.shape[0]
@property
def height(self):
'''
Get the image height
'''
return self._data.shape[1]
@property
def channels(self):
'''
Get the image number of channels
'''
return self._data.shape[2]
if __name__ == '__main__':
filename = os.path.join(os.path.dirname(__file__), '..', 'data', 'dog.jpg')
img = Image(filename)
resized = img.letterbox(net_dim=(416, 416))
cv2.namedWindow('Test', cv2.WINDOW_NORMAL)
resized.show('Test')
cv2.destroyAllWindows()