Source code for mxnet.gluon.data.vision.transforms
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# coding: utf-8
# pylint: disable= arguments-differ
"Image transforms."
from ...block import Block, HybridBlock
from ...nn import Sequential, HybridSequential
from .... import image
from ....base import numeric_types
from ....util import is_np_array
[docs]class Compose(Sequential):
"""Sequentially composes multiple transforms.
Parameters
----------
transforms : list of transform Blocks.
The list of transforms to be composed.
Inputs:
- **data**: input tensor with shape of the first transform Block requires.
Outputs:
- **out**: output tensor with shape of the last transform Block produces.
Examples
--------
>>> transformer = transforms.Compose([transforms.Resize(300),
... transforms.CenterCrop(256),
... transforms.ToTensor()])
>>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 3x256x256 @cpu(0)>
"""
def __init__(self, transforms):
super(Compose, self).__init__()
transforms.append(None)
hybrid = []
for i in transforms:
if isinstance(i, HybridBlock):
hybrid.append(i)
continue
elif len(hybrid) == 1:
self.add(hybrid[0])
hybrid = []
elif len(hybrid) > 1:
hblock = HybridSequential()
for j in hybrid:
hblock.add(j)
hblock.hybridize()
self.add(hblock)
hybrid = []
if i is not None:
self.add(i)
[docs]class Cast(HybridBlock):
"""Cast input to a specific data type
Parameters
----------
dtype : str, default 'float32'
The target data type, in string or `numpy.dtype`.
Inputs:
- **data**: input tensor with arbitrary shape and dtype.
Outputs:
- **out**: output tensor with the same shape as `data` and data type as dtype.
"""
def __init__(self, dtype='float32'):
super(Cast, self).__init__()
self._dtype = dtype
[docs]class ToTensor(HybridBlock):
"""Converts an image NDArray or batch of image NDArray to a tensor NDArray.
Converts an image NDArray of shape (H x W x C) in the range
[0, 255] to a float32 tensor NDArray of shape (C x H x W) in
the range [0, 1].
If batch input, converts a batch image NDArray of shape (N x H x W x C) in the
range [0, 255] to a float32 tensor NDArray of shape (N x C x H x W).
Inputs:
- **data**: input tensor with (H x W x C) or (N x H x W x C) shape and uint8 type.
Outputs:
- **out**: output tensor with (C x H x W) or (N x C x H x W) shape and float32 type.
Examples
--------
>>> transformer = vision.transforms.ToTensor()
>>> image = mx.nd.random.uniform(0, 255, (4, 2, 3)).astype(dtype=np.uint8)
>>> transformer(image)
[[[ 0.85490197 0.72156864]
[ 0.09019608 0.74117649]
[ 0.61960787 0.92941177]
[ 0.96470588 0.1882353 ]]
[[ 0.6156863 0.73725492]
[ 0.46666667 0.98039216]
[ 0.44705883 0.45490196]
[ 0.01960784 0.8509804 ]]
[[ 0.39607844 0.03137255]
[ 0.72156864 0.52941179]
[ 0.16470589 0.7647059 ]
[ 0.05490196 0.70588237]]]
<NDArray 3x4x2 @cpu(0)>
"""
def __init__(self):
super(ToTensor, self).__init__()
[docs]class Normalize(HybridBlock):
"""Normalize an tensor of shape (C x H x W) or (N x C x H x W) with mean and
standard deviation.
Given mean `(m1, ..., mn)` and std `(s1, ..., sn)` for `n` channels,
this transform normalizes each channel of the input tensor with::
output[i] = (input[i] - mi) / si
If mean or std is scalar, the same value will be applied to all channels.
Parameters
----------
mean : float or tuple of floats
The mean values.
std : float or tuple of floats
The standard deviation values.
Inputs:
- **data**: input tensor with (C x H x W) or (N x C x H x W) shape.
Outputs:
- **out**: output tensor with the shape as `data`.
Examples
--------
>>> transformer = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))
>>> image = mx.nd.random.uniform(0, 1, (3, 4, 2))
>>> transformer(image)
[[[ 0.18293785 0.19761486]
[ 0.23839645 0.28142193]
[ 0.20092112 0.28598186]
[ 0.18162774 0.28241724]]
[[-0.2881726 -0.18821815]
[-0.17705294 -0.30780914]
[-0.2812064 -0.3512327 ]
[-0.05411351 -0.4716435 ]]
[[-1.0363373 -1.7273437 ]
[-1.6165586 -1.5223348 ]
[-1.208275 -1.1878313 ]
[-1.4711051 -1.5200229 ]]]
<NDArray 3x4x2 @cpu(0)>
"""
def __init__(self, mean=0.0, std=1.0):
super(Normalize, self).__init__()
self._mean = mean
self._std = std
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.normalize(x, self._mean, self._std)
[docs]class RandomResizedCrop(Block):
"""Crop the input image with random scale and aspect ratio.
Makes a crop of the original image with random size (default: 0.08
to 1.0 of the original image size) and random aspect ratio (default:
3/4 to 4/3), then resize it to the specified size.
Parameters
----------
size : int or tuple of (W, H)
Size of the final output.
scale : tuple of two floats
If scale is `(min_area, max_area)`, the cropped image's area will
range from min_area to max_area of the original image's area
ratio : tuple of two floats
Range of aspect ratio of the cropped image before resizing.
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.
Inputs:
- **data**: input tensor with (Hi x Wi x C) shape.
Outputs:
- **out**: output tensor with (H x W x C) shape.
"""
def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0),
interpolation=1):
super(RandomResizedCrop, self).__init__()
if isinstance(size, numeric_types):
size = (size, size)
self._args = (size, scale, ratio, interpolation)
[docs]class CropResize(HybridBlock):
r"""Crop the input image with and optionally resize it.
Makes a crop of the original image then optionally resize it to the specified size.
Parameters
----------
x : int
Left boundary of the cropping area
y : int
Top boundary of the cropping area
w : int
Width of the cropping area
h : int
Height of the cropping area
size : int or tuple of (w, h)
Optional, resize to new size after cropping
interpolation : int, optional
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.
https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html?highlight=resize#resize
Note that the Resize on gpu use contrib.bilinearResize2D operator
which only support bilinear interpolation(1). The result would be slightly
different on gpu compared to cpu. OpenCV tend to align center while bilinearResize2D
use algorithm which aligns corner.
Inputs:
- **data**: input tensor with (H x W x C) or (N x H x W x C) shape.
Outputs:
- **out**: input tensor with (H x W x C) or (N x H x W x C) shape.
Examples
--------
>>> transformer = vision.transforms.CropResize(x=0, y=0, width=100, height=100)
>>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 100x100x3 @cpu(0)>
>>> image = mx.nd.random.uniform(0, 255, (3, 224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 3x100x100x3 @cpu(0)>
>>> transformer = vision.transforms.CropResize(x=0, y=0, width=100, height=100, size=(50, 50), interpolation=1)
>>> transformer(image)
<NDArray 3x50x50 @cpu(0)>
"""
def __init__(self, x, y, width, height, size=None, interpolation=None):
super(CropResize, self).__init__()
self._x = x
self._y = y
self._width = width
self._height = height
self._size = size
self._interpolation = interpolation
[docs] def hybrid_forward(self, F, x):
out = F.image.crop(x, self._x, self._y, self._width, self._height)
if self._size:
out = F.image.resize(out, self._size, False, self._interpolation)
return out
[docs]class CenterCrop(Block):
"""Crops the image `src` to the given `size` by trimming on all four
sides and preserving the center of the image. Upsamples if `src` is
smaller than `size`.
Parameters
----------
size : int or tuple of (W, H)
Size of output image.
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.
Inputs:
- **data**: input tensor with (Hi x Wi x C) shape.
Outputs:
- **out**: output tensor with (H x W x C) shape.
Examples
--------
>>> transformer = vision.transforms.CenterCrop(size=(1000, 500))
>>> image = mx.nd.random.uniform(0, 255, (2321, 3482, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 500x1000x3 @cpu(0)>
"""
def __init__(self, size, interpolation=1):
super(CenterCrop, self).__init__()
if isinstance(size, numeric_types):
size = (size, size)
self._args = (size, interpolation)
[docs]class Resize(HybridBlock):
"""Resize an image or a batch of image NDArray to the given size.
Should be applied before `mxnet.gluon.data.vision.transforms.ToTensor`.
Parameters
----------
size : int or tuple of (W, H)
Size of output image.
keep_ratio : bool
Whether to resize the short edge or both edges to `size`,
if size is give as an integer.
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.
Note that the Resize on gpu use contrib.bilinearResize2D operator
which only support bilinear interpolation(1). The result would be slightly
different on gpu compared to cpu. OpenCV tend to align center while bilinearResize2D
use algorithm which aligns corner.
Inputs:
- **data**: input tensor with (H x W x C) or (N x H x W x C) shape.
Outputs:
- **out**: output tensor with (H x W x C) or (N x H x W x C) shape.
Examples
--------
>>> transformer = vision.transforms.Resize(size=(1000, 500))
>>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 500x1000x3 @cpu(0)>
>>> image = mx.nd.random.uniform(0, 255, (3, 224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 3x500x1000x3 @cpu(0)>
"""
def __init__(self, size, keep_ratio=False, interpolation=1):
super(Resize, self).__init__()
self._keep = keep_ratio
self._size = size
self._interpolation = interpolation
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.resize(x, self._size, self._keep, self._interpolation)
[docs]class RandomFlipLeftRight(HybridBlock):
"""Randomly flip the input image left to right with a probability
of 0.5.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self):
super(RandomFlipLeftRight, self).__init__()
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.random_flip_left_right(x)
[docs]class RandomFlipTopBottom(HybridBlock):
"""Randomly flip the input image top to bottom with a probability
of 0.5.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self):
super(RandomFlipTopBottom, self).__init__()
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.random_flip_top_bottom(x)
[docs]class RandomBrightness(HybridBlock):
"""Randomly jitters image brightness with a factor
chosen from `[max(0, 1 - brightness), 1 + brightness]`.
Parameters
----------
brightness: float
How much to jitter brightness. brightness factor is randomly
chosen from `[max(0, 1 - brightness), 1 + brightness]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, brightness):
super(RandomBrightness, self).__init__()
self._args = (max(0, 1-brightness), 1+brightness)
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.random_brightness(x, *self._args)
[docs]class RandomContrast(HybridBlock):
"""Randomly jitters image contrast with a factor
chosen from `[max(0, 1 - contrast), 1 + contrast]`.
Parameters
----------
contrast: float
How much to jitter contrast. contrast factor is randomly
chosen from `[max(0, 1 - contrast), 1 + contrast]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, contrast):
super(RandomContrast, self).__init__()
self._args = (max(0, 1-contrast), 1+contrast)
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.random_contrast(x, *self._args)
[docs]class RandomSaturation(HybridBlock):
"""Randomly jitters image saturation with a factor
chosen from `[max(0, 1 - saturation), 1 + saturation]`.
Parameters
----------
saturation: float
How much to jitter saturation. saturation factor is randomly
chosen from `[max(0, 1 - saturation), 1 + saturation]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, saturation):
super(RandomSaturation, self).__init__()
self._args = (max(0, 1-saturation), 1+saturation)
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.random_saturation(x, *self._args)
[docs]class RandomHue(HybridBlock):
"""Randomly jitters image hue with a factor
chosen from `[max(0, 1 - hue), 1 + hue]`.
Parameters
----------
hue: float
How much to jitter hue. hue factor is randomly
chosen from `[max(0, 1 - hue), 1 + hue]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, hue):
super(RandomHue, self).__init__()
self._args = (max(0, 1-hue), 1+hue)
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.random_hue(x, *self._args)
[docs]class RandomColorJitter(HybridBlock):
"""Randomly jitters the brightness, contrast, saturation, and hue
of an image.
Parameters
----------
brightness : float
How much to jitter brightness. brightness factor is randomly
chosen from `[max(0, 1 - brightness), 1 + brightness]`.
contrast : float
How much to jitter contrast. contrast factor is randomly
chosen from `[max(0, 1 - contrast), 1 + contrast]`.
saturation : float
How much to jitter saturation. saturation factor is randomly
chosen from `[max(0, 1 - saturation), 1 + saturation]`.
hue : float
How much to jitter hue. hue factor is randomly
chosen from `[max(0, 1 - hue), 1 + hue]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
super(RandomColorJitter, self).__init__()
self._args = (brightness, contrast, saturation, hue)
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.random_color_jitter(x, *self._args)
[docs]class RandomLighting(HybridBlock):
"""Add AlexNet-style PCA-based noise to an image.
Parameters
----------
alpha : float
Intensity of the image.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, alpha):
super(RandomLighting, self).__init__()
self._alpha = alpha
[docs] def hybrid_forward(self, F, x):
if is_np_array():
F = F.npx
return F.image.random_lighting(x, self._alpha)