Source code for mxnet.image.image

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# pylint: disable=no-member, too-many-lines, redefined-builtin, protected-access, unused-import, invalid-name
# pylint: disable=too-many-arguments, too-many-locals, no-name-in-module, too-many-branches, too-many-statements
"""Read individual image files and perform augmentations."""

from __future__ import absolute_import, print_function

import sys
import os
import random
import logging
import json
import warnings
import numpy as np


try:
    import cv2
except ImportError:
    cv2 = None

from ..base import numeric_types
from .. import ndarray as nd
from ..ndarray import _internal
from .. import io
from .. import recordio


def imread(filename, *args, **kwargs):
    """Read and decode an image to an NDArray.

    .. note:: `imread` uses OpenCV (not the CV2 Python library).
       MXNet must have been built with USE_OPENCV=1 for `imdecode` to work.

    Parameters
    ----------
    filename : str
        Name of the image file to be loaded.
    flag : {0, 1}, default 1
        1 for three channel color output. 0 for grayscale output.
    to_rgb : bool, default True
        True for RGB formatted output (MXNet default).
        False for BGR formatted output (OpenCV default).
    out : NDArray, optional
        Output buffer. Use `None` for automatic allocation.

    Returns
    -------
    NDArray
        An `NDArray` containing the image.

    Example
    -------
    >>> mx.img.imread("flower.jpg")
    

    Set `flag` parameter to 0 to get grayscale output

    >>> mx.img.imread("flower.jpg", flag=0)
    

    Set `to_rgb` parameter to 0 to get output in OpenCV format (BGR)

    >>> mx.img.imread("flower.jpg", to_rgb=0)
    
    """
    return _internal._cvimread(filename, *args, **kwargs)


def imresize(src, w, h, *args, **kwargs):
    r"""Resize image with OpenCV.

    .. note:: `imresize` uses OpenCV (not the CV2 Python library). MXNet must have been built
       with USE_OPENCV=1 for `imresize` to work.

    Parameters
    ----------
    src : NDArray
        source image
    w : int, required
        Width of resized image.
    h : int, required
        Height of resized image.
    interp : int, optional, default=1
        Interpolation method (default=cv2.INTER_LINEAR).
        Possible values:
        0: Nearest Neighbors Interpolation.
        1: Bilinear interpolation.
        2: Bicubic interpolation over 4x4 pixel neighborhood.
        3: Area-based (resampling using pixel area relation). It may be a
        preferred method for image decimation, as it gives moire-free
        results. But when the image is zoomed, it is similar to the Nearest
        Neighbors method. (used by default).
        4: Lanczos interpolation over 8x8 pixel neighborhood.
        9: Cubic for enlarge, area for shrink, bilinear for others
        10: Random select from interpolation method metioned above.
        Note:
        When shrinking an image, it will generally look best with AREA-based
        interpolation, whereas, when enlarging an image, it will generally look best
        with Bicubic (slow) or Bilinear (faster but still looks OK).
        More details can be found in the documentation of OpenCV, please refer to
        http://docs.opencv.org/master/da/d54/group__imgproc__transform.html.

    out : NDArray, optional
        The output NDArray to hold the result.

    Returns
    -------
    out : NDArray or list of NDArrays
        The output of this function.

    Example
    -------
    >>> with open("flower.jpeg", 'rb') as fp:
    ...     str_image = fp.read()
    ...
    >>> image = mx.img.imdecode(str_image)
    >>> image
    
    >>> new_image = mx.img.resize(image, 240, 360)
    >>> new_image
    
    """
    return _internal._cvimresize(src, w, h, *args, **kwargs)


def imdecode(buf, *args, **kwargs):
    """Decode an image to an NDArray.

    .. note:: `imdecode` uses OpenCV (not the CV2 Python library).
       MXNet must have been built with USE_OPENCV=1 for `imdecode` to work.

    Parameters
    ----------
    buf : str/bytes/bytearray or numpy.ndarray
        Binary image data as string or numpy ndarray.
    flag : int, optional, default=1
        1 for three channel color output. 0 for grayscale output.
    to_rgb : int, optional, default=1
        1 for RGB formatted output (MXNet default). 0 for BGR formatted output (OpenCV default).
    out : NDArray, optional
        Output buffer. Use `None` for automatic allocation.

    Returns
    -------
    NDArray
        An `NDArray` containing the image.

    Example
    -------
    >>> with open("flower.jpg", 'rb') as fp:
    ...     str_image = fp.read()
    ...
    >>> image = mx.img.imdecode(str_image)
    >>> image
    

    Set `flag` parameter to 0 to get grayscale output

    >>> with open("flower.jpg", 'rb') as fp:
    ...     str_image = fp.read()
    ...
    >>> image = mx.img.imdecode(str_image, flag=0)
    >>> image
    

    Set `to_rgb` parameter to 0 to get output in OpenCV format (BGR)

    >>> with open("flower.jpg", 'rb') as fp:
    ...     str_image = fp.read()
    ...
    >>> image = mx.img.imdecode(str_image, to_rgb=0)
    >>> image
    
    """
    if not isinstance(buf, nd.NDArray):
        if sys.version_info[0] == 3 and not isinstance(buf, (bytes, bytearray, np.ndarray)):
            raise ValueError('buf must be of type bytes, bytearray or numpy.ndarray,'
                             'if you would like to input type str, please convert to bytes')
        buf = nd.array(np.frombuffer(buf, dtype=np.uint8), dtype=np.uint8)

    return _internal._cvimdecode(buf, *args, **kwargs)


def scale_down(src_size, size):
    """Scales down crop size if it's larger than image size.

    If width/height of the crop is larger than the width/height of the image,
    sets the width/height to the width/height of the image.

    Parameters
    ----------
    src_size : tuple of int
        Size of the image in (width, height) format.
    size : tuple of int
        Size of the crop in (width, height) format.

    Returns
    -------
    tuple of int
        A tuple containing the scaled crop size in (width, height) format.

    Example
    --------
    >>> src_size = (640,480)
    >>> size = (720,120)
    >>> new_size = mx.img.scale_down(src_size, size)
    >>> new_size
    (640,106)
    """
    w, h = size
    sw, sh = src_size
    if sh < h:
        w, h = float(w * sh) / h, sh
    if sw < w:
        w, h = sw, float(h * sw) / w
    return int(w), int(h)


def copyMakeBorder(src, top, bot, left, right, *args, **kwargs):
    """Pad image border with OpenCV.

    Parameters
    ----------
    src : NDArray
        source image
    top : int, required
        Top margin.
    bot : int, required
        Bottom margin.
    left : int, required
        Left margin.
    right : int, required
        Right margin.
    type : int, optional, default='0'
        Filling type (default=cv2.BORDER_CONSTANT).
        0 - cv2.BORDER_CONSTANT - Adds a constant colored border.
        1 - cv2.BORDER_REFLECT - Border will be mirror reflection of the
        border elements, like this : fedcba|abcdefgh|hgfedcb
        2 - cv2.BORDER_REFLECT_101 or cv.BORDER_DEFAULT - Same as above,
        but with a slight change, like this : gfedcb|abcdefgh|gfedcba
        3 - cv2.BORDER_REPLICATE - Last element is replicated throughout,
        like this: aaaaaa|abcdefgh|hhhhhhh
        4 - cv2.BORDER_WRAP - it will look like this : cdefgh|abcdefgh|abcdefg
    value : double, optional, default=0
        (Deprecated! Use ``values`` instead.) Fill with single value.
    values : tuple of , optional, default=[]
        Fill with value(RGB[A] or gray), up to 4 channels.

    out : NDArray, optional
        The output NDArray to hold the result.

    Returns
    -------
    out : NDArray or list of NDArrays
        The output of this function.

    Example
    --------
    >>> with open("flower.jpeg", 'rb') as fp:
    ...     str_image = fp.read()
    ...
    >>> image = mx.img.imdecode(str_image)
    >>> image
    
    >>> new_image = mx_border = mx.image.copyMakeBorder(mx_img, 1, 2, 3, 4, type=0)
    >>> new_image
    
    """
    return _internal._cvcopyMakeBorder(src, top, bot, left, right, *args, **kwargs)


def _get_interp_method(interp, sizes=()):
    """Get the interpolation method for resize functions.
    The major purpose of this function is to wrap a random interp method selection
    and a auto-estimation method.

    Parameters
    ----------
    interp : int
        interpolation method for all resizing operations

        Possible values:
        0: Nearest Neighbors Interpolation.
        1: Bilinear interpolation.
        2: Area-based (resampling using pixel area relation). It may be a
        preferred method for image decimation, as it gives moire-free
        results. But when the image is zoomed, it is similar to the Nearest
        Neighbors method. (used by default).
        3: Bicubic interpolation over 4x4 pixel neighborhood.
        4: Lanczos interpolation over 8x8 pixel neighborhood.
        9: Cubic for enlarge, area for shrink, bilinear for others
        10: Random select from interpolation method metioned above.
        Note:
        When shrinking an image, it will generally look best with AREA-based
        interpolation, whereas, when enlarging an image, it will generally look best
        with Bicubic (slow) or Bilinear (faster but still looks OK).
        More details can be found in the documentation of OpenCV, please refer to
        http://docs.opencv.org/master/da/d54/group__imgproc__transform.html.
    sizes : tuple of int
        (old_height, old_width, new_height, new_width), if None provided, auto(9)
        will return Area(2) anyway.

    Returns
    -------
    int
        interp method from 0 to 4
    """
    if interp == 9:
        if sizes:
            assert len(sizes) == 4
            oh, ow, nh, nw = sizes
            if nh > oh and nw > ow:
                return 2
            elif nh < oh and nw < ow:
                return 3
            else:
                return 1
        else:
            return 2
    if interp == 10:
        return random.randint(0, 4)
    if interp not in (0, 1, 2, 3, 4):
        raise ValueError('Unknown interp method %d' % interp)
    return interp


def resize_short(src, size, interp=2):
    """Resizes shorter edge to size.

    .. note:: `resize_short` uses OpenCV (not the CV2 Python library).
       MXNet must have been built with OpenCV for `resize_short` to work.

    Resizes the original image by setting the shorter edge to size
    and setting the longer edge accordingly.
    Resizing function is called from OpenCV.

    Parameters
    ----------
    src : NDArray
        The original image.
    size : int
        The length to be set for the shorter edge.
    interp : int, optional, default=2
        Interpolation method used for resizing the image.
        Possible values:
        0: Nearest Neighbors Interpolation.
        1: Bilinear interpolation.
        2: Area-based (resampling using pixel area relation). It may be a
        preferred method for image decimation, as it gives moire-free
        results. But when the image is zoomed, it is similar to the Nearest
        Neighbors method. (used by default).
        3: Bicubic interpolation over 4x4 pixel neighborhood.
        4: Lanczos interpolation over 8x8 pixel neighborhood.
        9: Cubic for enlarge, area for shrink, bilinear for others
        10: Random select from interpolation method metioned above.
        Note:
        When shrinking an image, it will generally look best with AREA-based
        interpolation, whereas, when enlarging an image, it will generally look best
        with Bicubic (slow) or Bilinear (faster but still looks OK).
        More details can be found in the documentation of OpenCV, please refer to
        http://docs.opencv.org/master/da/d54/group__imgproc__transform.html.

    Returns
    -------
    NDArray
        An 'NDArray' containing the resized image.

    Example
    -------
    >>> with open("flower.jpeg", 'rb') as fp:
    ...     str_image = fp.read()
    ...
    >>> image = mx.img.imdecode(str_image)
    >>> image
    
    >>> size = 640
    >>> new_image = mx.img.resize_short(image, size)
    >>> new_image
    
    """
    h, w, _ = src.shape
    if h > w:
        new_h, new_w = size * h // w, size
    else:
        new_h, new_w = size, size * w // h
    return imresize(src, new_w, new_h, interp=_get_interp_method(interp, (h, w, new_h, new_w)))


def fixed_crop(src, x0, y0, w, h, size=None, interp=2):
    """Crop src at fixed location, and (optionally) resize it to size.

    Parameters
    ----------
    src : NDArray
        Input image
    x0 : int
        Left boundary of the cropping area
    y0 : int
        Top boundary of the cropping area
    w : int
        Width of the cropping area
    h : int
        Height of the cropping area
    size : tuple of (w, h)
        Optional, resize to new size after cropping
    interp : int, optional, default=2
        Interpolation method. See resize_short for details.

    Returns
    -------
    NDArray
        An `NDArray` containing the cropped image.
    """
    out = nd.slice(src, begin=(y0, x0, 0), end=(y0 + h, x0 + w, int(src.shape[2])))
    if size is not None and (w, h) != size:
        sizes = (h, w, size[1], size[0])
        out = imresize(out, *size, interp=_get_interp_method(interp, sizes))
    return out


def random_crop(src, size, interp=2):
    """Randomly crop `src` with `size` (width, height).
    Upsample result if `src` is smaller than `size`.

    Parameters
    ----------
    src: Source image `NDArray`
    size: Size of the crop formatted as (width, height). If the `size` is larger
           than the image, then the source image is upsampled to `size` and returned.
    interp: int, optional, default=2
        Interpolation method. See resize_short for details.
    Returns
    -------
    NDArray
        An `NDArray` containing the cropped image.
    Tuple
        A tuple (x, y, width, height) where (x, y) is top-left position of the crop in the
        original image and (width, height) are the dimensions of the cropped image.

    Example
    -------
    >>> im = mx.nd.array(cv2.imread("flower.jpg"))
    >>> cropped_im, rect  = mx.image.random_crop(im, (100, 100))
    >>> print cropped_im
    
    >>> print rect
    (20, 21, 100, 100)
    """

    h, w, _ = src.shape
    new_w, new_h = scale_down((w, h), size)

    x0 = random.randint(0, w - new_w)
    y0 = random.randint(0, h - new_h)

    out = fixed_crop(src, x0, y0, new_w, new_h, size, interp)
    return out, (x0, y0, new_w, new_h)


def center_crop(src, size, interp=2):
    """Crops the image `src` to the given `size` by trimming on all four
    sides and preserving the center of the image. Upsamples if `src` is smaller
    than `size`.

    .. note:: This requires MXNet to be compiled with USE_OPENCV.

    Parameters
    ----------
    src : NDArray
        Binary source image data.
    size : list or tuple of int
        The desired output image size.
    interp : int, optional, default=2
        Interpolation method. See resize_short for details.

    Returns
    -------
    NDArray
        The cropped image.
    Tuple
        (x, y, width, height) where x, y are the positions of the crop in the
        original image and width, height the dimensions of the crop.

    Example
    -------
    >>> with open("flower.jpg", 'rb') as fp:
    ...     str_image = fp.read()
    ...
    >>> image = mx.image.imdecode(str_image)
    >>> image
    
    >>> cropped_image, (x, y, width, height) = mx.image.center_crop(image, (1000, 500))
    >>> cropped_image
    
    >>> x, y, width, height
    (1241, 910, 1000, 500)
    """

    h, w, _ = src.shape
    new_w, new_h = scale_down((w, h), size)

    x0 = int((w - new_w) / 2)
    y0 = int((h - new_h) / 2)

    out = fixed_crop(src, x0, y0, new_w, new_h, size, interp)
    return out, (x0, y0, new_w, new_h)


def color_normalize(src, mean, std=None):
    """Normalize src with mean and std.

    Parameters
    ----------
    src : NDArray
        Input image
    mean : NDArray
        RGB mean to be subtracted
    std : NDArray
        RGB standard deviation to be divided

    Returns
    -------
    NDArray
        An `NDArray` containing the normalized image.
    """
    if mean is not None:
        src -= mean
    if std is not None:
        src /= std
    return src


def random_size_crop(src, size, area, ratio, interp=2, **kwargs):
    """Randomly crop src with size. Randomize area and aspect ratio.

    Parameters
    ----------
    src : NDArray
        Input image
    size : tuple of (int, int)
        Size of the crop formatted as (width, height).
    area : float in (0, 1] or tuple of (float, float)
        If tuple, minimum area and maximum area to be maintained after cropping
        If float, minimum area to be maintained after cropping, maximum area is set to 1.0
    ratio : tuple of (float, float)
        Aspect ratio range as (min_aspect_ratio, max_aspect_ratio)
    interp: int, optional, default=2
        Interpolation method. See resize_short for details.
    Returns
    -------
    NDArray
        An `NDArray` containing the cropped image.
    Tuple
        A tuple (x, y, width, height) where (x, y) is top-left position of the crop in the
        original image and (width, height) are the dimensions of the cropped image.

    """
    h, w, _ = src.shape
    src_area = h * w

    if 'min_area' in kwargs:
        warnings.warn('`min_area` is deprecated. Please use `area` instead.',
                      DeprecationWarning)
        area = kwargs.pop('min_area')
    assert not kwargs, "unexpected keyword arguments for `random_size_crop`."

    if isinstance(area, numeric_types):
        area = (area, 1.0)
    for _ in range(10):
        target_area = random.uniform(area[0], area[1]) * src_area
        log_ratio = (np.log(ratio[0]), np.log(ratio[1]))
        new_ratio = np.exp(random.uniform(*log_ratio))

        new_w = int(round(np.sqrt(target_area * new_ratio)))
        new_h = int(round(np.sqrt(target_area / new_ratio)))

        if new_w <= w and new_h <= h:
            x0 = random.randint(0, w - new_w)
            y0 = random.randint(0, h - new_h)

            out = fixed_crop(src, x0, y0, new_w, new_h, size, interp)
            return out, (x0, y0, new_w, new_h)

    # fall back to center_crop
    return center_crop(src, size, interp)


[docs]class Augmenter(object):
    """Image Augmenter base class"""
    def __init__(self, **kwargs):
        self._kwargs = kwargs
        for k, v in self._kwargs.items():
            if isinstance(v, nd.NDArray):
                v = v.asnumpy()
            if isinstance(v, np.ndarray):
                v = v.tolist()
                self._kwargs[k] = v

[docs]    def dumps(self):
        """Saves the Augmenter to string

        Returns
        -------
        str
            JSON formatted string that describes the Augmenter.
        """
        return json.dumps([self.__class__.__name__.lower(), self._kwargs])

    def __call__(self, src):
        """Abstract implementation body"""
        raise NotImplementedError("Must override implementation.")


[docs]class SequentialAug(Augmenter):
    """Composing a sequential augmenter list.

    Parameters
    ----------
    ts : list of augmenters
        A series of augmenters to be applied in sequential order.
    """
    def __init__(self, ts):
        super(SequentialAug, self).__init__()
        self.ts = ts

    def dumps(self):
        """Override the default to avoid duplicate dump."""
        return [self.__class__.__name__.lower(), [x.dumps() for x in self.ts]]

    def __call__(self, src):
        """Augmenter body"""
        for aug in self.ts:
            src = aug(src)
        return src


[docs]class ResizeAug(Augmenter):
    """Make resize shorter edge to size augmenter.

    Parameters
    ----------
    size : int
        The length to be set for the shorter edge.
    interp : int, optional, default=2
        Interpolation method. See resize_short for details.
    """
    def __init__(self, size, interp=2):
        super(ResizeAug, self).__init__(size=size, interp=interp)
        self.size = size
        self.interp = interp

    def __call__(self, src):
        """Augmenter body"""
        return resize_short(src, self.size, self.interp)


[docs]class ForceResizeAug(Augmenter):
    """Force resize to size regardless of aspect ratio

    Parameters
    ----------
    size : tuple of (int, int)
        The desired size as in (width, height)
    interp : int, optional, default=2
        Interpolation method. See resize_short for details.
    """
    def __init__(self, size, interp=2):
        super(ForceResizeAug, self).__init__(size=size, interp=interp)
        self.size = size
        self.interp = interp

    def __call__(self, src):
        """Augmenter body"""
        sizes = (src.shape[0], src.shape[1], self.size[1], self.size[0])
        return imresize(src, *self.size, interp=_get_interp_method(self.interp, sizes))


[docs]class RandomCropAug(Augmenter):
    """Make random crop augmenter

    Parameters
    ----------
    size : int
        The length to be set for the shorter edge.
    interp : int, optional, default=2
        Interpolation method. See resize_short for details.
    """
    def __init__(self, size, interp=2):
        super(RandomCropAug, self).__init__(size=size, interp=interp)
        self.size = size
        self.interp = interp

    def __call__(self, src):
        """Augmenter body"""
        return random_crop(src, self.size, self.interp)[0]


[docs]class RandomSizedCropAug(Augmenter):
    """Make random crop with random resizing and random aspect ratio jitter augmenter.

    Parameters
    ----------
    size : tuple of (int, int)
        Size of the crop formatted as (width, height).
    area : float in (0, 1] or tuple of (float, float)
        If tuple, minimum area and maximum area to be maintained after cropping
        If float, minimum area to be maintained after cropping, maximum area is set to 1.0
    ratio : tuple of (float, float)
        Aspect ratio range as (min_aspect_ratio, max_aspect_ratio)
    interp: int, optional, default=2
        Interpolation method. See resize_short for details.
    """
    def __init__(self, size, area, ratio, interp=2, **kwargs):
        super(RandomSizedCropAug, self).__init__(size=size, area=area,
                                                 ratio=ratio, interp=interp)
        self.size = size
        if 'min_area' in kwargs:
            warnings.warn('`min_area` is deprecated. Please use `area` instead.',
                          DeprecationWarning)
            self.area = kwargs.pop('min_area')
        else:
            self.area = area
        self.ratio = ratio
        self.interp = interp
        assert not kwargs, "unexpected keyword arguments for `RandomSizedCropAug`."

    def __call__(self, src):
        """Augmenter body"""
        return random_size_crop(src, self.size, self.area, self.ratio, self.interp)[0]


[docs]class CenterCropAug(Augmenter):
    """Make center crop augmenter.

    Parameters
    ----------
    size : list or tuple of int
        The desired output image size.
    interp : int, optional, default=2
        Interpolation method. See resize_short for details.
    """
    def __init__(self, size, interp=2):
        super(CenterCropAug, self).__init__(size=size, interp=interp)
        self.size = size
        self.interp = interp

    def __call__(self, src):
        """Augmenter body"""
        return center_crop(src, self.size, self.interp)[0]


[docs]class RandomOrderAug(Augmenter):
    """Apply list of augmenters in random order

    Parameters
    ----------
    ts : list of augmenters
        A series of augmenters to be applied in random order
    """
    def __init__(self, ts):
        super(RandomOrderAug, self).__init__()
        self.ts = ts

    def dumps(self):
        """Override the default to avoid duplicate dump."""
        return [self.__class__.__name__.lower(), [x.dumps() for x in self.ts]]

    def __call__(self, src):
        """Augmenter body"""
        random.shuffle(self.ts)
        for t in self.ts:
            src = t(src)
        return src


[docs]class BrightnessJitterAug(Augmenter):
    """Random brightness jitter augmentation.

    Parameters
    ----------
    brightness : float
        The brightness jitter ratio range, [0, 1]
    """
    def __init__(self, brightness):
        super(BrightnessJitterAug, self).__init__(brightness=brightness)
        self.brightness = brightness

    def __call__(self, src):
        """Augmenter body"""
        alpha = 1.0 + random.uniform(-self.brightness, self.brightness)
        src *= alpha
        return src


[docs]class ContrastJitterAug(Augmenter):
    """Random contrast jitter augmentation.

    Parameters
    ----------
    contrast : float
        The contrast jitter ratio range, [0, 1]
    """
    def __init__(self, contrast):
        super(ContrastJitterAug, self).__init__(contrast=contrast)
        self.contrast = contrast
        self.coef = nd.array([[[0.299, 0.587, 0.114]]])

    def __call__(self, src):
        """Augmenter body"""
        alpha = 1.0 + random.uniform(-self.contrast, self.contrast)
        gray = src * self.coef
        gray = (3.0 * (1.0 - alpha) / gray.size) * nd.sum(gray)
        src *= alpha
        src += gray
        return src


[docs]class SaturationJitterAug(Augmenter):
    """Random saturation jitter augmentation.

    Parameters
    ----------
    saturation : float
        The saturation jitter ratio range, [0, 1]
    """
    def __init__(self, saturation):
        super(SaturationJitterAug, self).__init__(saturation=saturation)
        self.saturation = saturation
        self.coef = nd.array([[[0.299, 0.587, 0.114]]])

    def __call__(self, src):
        """Augmenter body"""
        alpha = 1.0 + random.uniform(-self.saturation, self.saturation)
        gray = src * self.coef
        gray = nd.sum(gray, axis=2, keepdims=True)
        gray *= (1.0 - alpha)
        src *= alpha
        src += gray
        return src


[docs]class HueJitterAug(Augmenter):
    """Random hue jitter augmentation.

    Parameters
    ----------
    hue : float
        The hue jitter ratio range, [0, 1]
    """
    def __init__(self, hue):
        super(HueJitterAug, self).__init__(hue=hue)
        self.hue = hue
        self.tyiq = np.array([[0.299, 0.587, 0.114],
                              [0.596, -0.274, -0.321],
                              [0.211, -0.523, 0.311]])
        self.ityiq = np.array([[1.0, 0.956, 0.621],
                               [1.0, -0.272, -0.647],
                               [1.0, -1.107, 1.705]])

    def __call__(self, src):
        """Augmenter body.
        Using approximate linear transfomation described in:
        https://beesbuzz.biz/code/hsv_color_transforms.php
        """
        alpha = random.uniform(-self.hue, self.hue)
        u = np.cos(alpha * np.pi)
        w = np.sin(alpha * np.pi)
        bt = np.array([[1.0, 0.0, 0.0],
                       [0.0, u, -w],
                       [0.0, w, u]])
        t = np.dot(np.dot(self.ityiq, bt), self.tyiq).T
        src = nd.dot(src, nd.array(t))
        return src


[docs]class ColorJitterAug(RandomOrderAug):
    """Apply random brightness, contrast and saturation jitter in random order.

    Parameters
    ----------
    brightness : float
        The brightness jitter ratio range, [0, 1]
    contrast : float
        The contrast jitter ratio range, [0, 1]
    saturation : float
        The saturation jitter ratio range, [0, 1]
    """
    def __init__(self, brightness, contrast, saturation):
        ts = []
        if brightness > 0:
            ts.append(BrightnessJitterAug(brightness))
        if contrast > 0:
            ts.append(ContrastJitterAug(contrast))
        if saturation > 0:
            ts.append(SaturationJitterAug(saturation))
        super(ColorJitterAug, self).__init__(ts)


[docs]class LightingAug(Augmenter):
    """Add PCA based noise.

    Parameters
    ----------
    alphastd : float
        Noise level
    eigval : 3x1 np.array
        Eigen values
    eigvec : 3x3 np.array
        Eigen vectors
    """
    def __init__(self, alphastd, eigval, eigvec):
        super(LightingAug, self).__init__(alphastd=alphastd, eigval=eigval, eigvec=eigvec)
        self.alphastd = alphastd
        self.eigval = eigval
        self.eigvec = eigvec

    def __call__(self, src):
        """Augmenter body"""
        alpha = np.random.normal(0, self.alphastd, size=(3,))
        rgb = np.dot(self.eigvec * alpha, self.eigval)
        src += nd.array(rgb)
        return src


[docs]class ColorNormalizeAug(Augmenter):
    """Mean and std normalization.

    Parameters
    ----------
    mean : NDArray
        RGB mean to be subtracted
    std : NDArray
        RGB standard deviation to be divided
    """
    def __init__(self, mean, std):
        super(ColorNormalizeAug, self).__init__(mean=mean, std=std)
        self.mean = mean if mean is None or isinstance(mean, nd.NDArray) else nd.array(mean)
        self.std = std if std is None or isinstance(std, nd.NDArray) else nd.array(std)

    def __call__(self, src):
        """Augmenter body"""
        return color_normalize(src, self.mean, self.std)


[docs]class RandomGrayAug(Augmenter):
    """Randomly convert to gray image.

    Parameters
    ----------
    p : float
        Probability to convert to grayscale
    """
    def __init__(self, p):
        super(RandomGrayAug, self).__init__(p=p)
        self.p = p
        self.mat = nd.array([[0.21, 0.21, 0.21],
                             [0.72, 0.72, 0.72],
                             [0.07, 0.07, 0.07]])

    def __call__(self, src):
        """Augmenter body"""
        if random.random() < self.p:
            src = nd.dot(src, self.mat)
        return src


[docs]class HorizontalFlipAug(Augmenter):
    """Random horizontal flip.

    Parameters
    ----------
    p : float
        Probability to flip image horizontally
    """
    def __init__(self, p):
        super(HorizontalFlipAug, self).__init__(p=p)
        self.p = p

    def __call__(self, src):
        """Augmenter body"""
        if random.random() < self.p:
            src = nd.flip(src, axis=1)
        return src


[docs]class CastAug(Augmenter):
    """Cast to float32"""
    def __init__(self, typ='float32'):
        super(CastAug, self).__init__(type=typ)
        self.typ = typ

    def __call__(self, src):
        """Augmenter body"""
        src = src.astype(self.typ)
        return src


def CreateAugmenter(data_shape, resize=0, rand_crop=False, rand_resize=False, rand_mirror=False,
                    mean=None, std=None, brightness=0, contrast=0, saturation=0, hue=0,
                    pca_noise=0, rand_gray=0, inter_method=2):
    """Creates an augmenter list.

    Parameters
    ----------
    data_shape : tuple of int
        Shape for output data
    resize : int
        Resize shorter edge if larger than 0 at the begining
    rand_crop : bool
        Whether to enable random cropping other than center crop
    rand_resize : bool
        Whether to enable random sized cropping, require rand_crop to be enabled
    rand_gray : float
        [0, 1], probability to convert to grayscale for all channels, the number
        of channels will not be reduced to 1
    rand_mirror : bool
        Whether to apply horizontal flip to image with probability 0.5
    mean : np.ndarray or None
        Mean pixel values for [r, g, b]
    std : np.ndarray or None
        Standard deviations for [r, g, b]
    brightness : float
        Brightness jittering range (percent)
    contrast : float
        Contrast jittering range (percent)
    saturation : float
        Saturation jittering range (percent)
    hue : float
        Hue jittering range (percent)
    pca_noise : float
        Pca noise level (percent)
    inter_method : int, default=2(Area-based)
        Interpolation method for all resizing operations

        Possible values:
        0: Nearest Neighbors Interpolation.
        1: Bilinear interpolation.
        2: Area-based (resampling using pixel area relation). It may be a
        preferred method for image decimation, as it gives moire-free
        results. But when the image is zoomed, it is similar to the Nearest
        Neighbors method. (used by default).
        3: Bicubic interpolation over 4x4 pixel neighborhood.
        4: Lanczos interpolation over 8x8 pixel neighborhood.
        9: Cubic for enlarge, area for shrink, bilinear for others
        10: Random select from interpolation method metioned above.
        Note:
        When shrinking an image, it will generally look best with AREA-based
        interpolation, whereas, when enlarging an image, it will generally look best
        with Bicubic (slow) or Bilinear (faster but still looks OK).

    Examples
    --------
    >>> # An example of creating multiple augmenters
    >>> augs = mx.image.CreateAugmenter(data_shape=(3, 300, 300), rand_mirror=True,
    ...    mean=True, brightness=0.125, contrast=0.125, rand_gray=0.05,
    ...    saturation=0.125, pca_noise=0.05, inter_method=10)
    >>> # dump the details
    >>> for aug in augs:
    ...    aug.dumps()
    """
    auglist = []

    if resize > 0:
        auglist.append(ResizeAug(resize, inter_method))

    crop_size = (data_shape[2], data_shape[1])
    if rand_resize:
        assert rand_crop
        auglist.append(RandomSizedCropAug(crop_size, 0.08, (3.0 / 4.0, 4.0 / 3.0), inter_method))
    elif rand_crop:
        auglist.append(RandomCropAug(crop_size, inter_method))
    else:
        auglist.append(CenterCropAug(crop_size, inter_method))

    if rand_mirror:
        auglist.append(HorizontalFlipAug(0.5))

    auglist.append(CastAug())

    if brightness or contrast or saturation:
        auglist.append(ColorJitterAug(brightness, contrast, saturation))

    if hue:
        auglist.append(HueJitterAug(hue))

    if pca_noise > 0:
        eigval = np.array([55.46, 4.794, 1.148])
        eigvec = np.array([[-0.5675, 0.7192, 0.4009],
                           [-0.5808, -0.0045, -0.8140],
                           [-0.5836, -0.6948, 0.4203]])
        auglist.append(LightingAug(pca_noise, eigval, eigvec))

    if rand_gray > 0:
        auglist.append(RandomGrayAug(rand_gray))

    if mean is True:
        mean = nd.array([123.68, 116.28, 103.53])
    elif mean is not None:
        assert isinstance(mean, (np.ndarray, nd.NDArray)) and mean.shape[0] in [1, 3]

    if std is True:
        std = nd.array([58.395, 57.12, 57.375])
    elif std is not None:
        assert isinstance(std, (np.ndarray, nd.NDArray)) and std.shape[0] in [1, 3]

    if mean is not None or std is not None:
        auglist.append(ColorNormalizeAug(mean, std))

    return auglist


[docs]class ImageIter(io.DataIter):
    """Image data iterator with a large number of augmentation choices.
    This iterator supports reading from both .rec files and raw image files.

    To load input images from .rec files, use `path_imgrec` parameter and to load from raw image
    files, use `path_imglist` and `path_root` parameters.

    To use data partition (for distributed training) or shuffling, specify `path_imgidx` parameter.

    Parameters
    ----------
    batch_size : int
        Number of examples per batch.
    data_shape : tuple
        Data shape in (channels, height, width) format.
        For now, only RGB image with 3 channels is supported.
    label_width : int, optional
        Number of labels per example. The default label width is 1.
    path_imgrec : str
        Path to image record file (.rec).
        Created with tools/im2rec.py or bin/im2rec.
    path_imglist : str
        Path to image list (.lst).
        Created with tools/im2rec.py or with custom script.
        Format: Tab separated record of index, one or more labels and relative_path_from_root.
    imglist: list
        A list of images with the label(s).
        Each item is a list [imagelabel: float or list of float, imgpath].
    path_root : str
        Root folder of image files.
    path_imgidx : str
        Path to image index file. Needed for partition and shuffling when using .rec source.
    shuffle : bool
        Whether to shuffle all images at the start of each iteration or not.
        Can be slow for HDD.
    part_index : int
        Partition index.
    num_parts : int
        Total number of partitions.
    data_name : str
        Data name for provided symbols.
    label_name : str
        Label name for provided symbols.
    dtype : str
        Label data type. Default: float32. Other options: int32, int64, float64
    last_batch_handle : str, optional
        How to handle the last batch.
        This parameter can be 'pad'(default), 'discard' or 'roll_over'.
        If 'pad', the last batch will be padded with data starting from the begining
        If 'discard', the last batch will be discarded
        If 'roll_over', the remaining elements will be rolled over to the next iteration
    kwargs : ...
        More arguments for creating augmenter. See mx.image.CreateAugmenter.
    """

    def __init__(self, batch_size, data_shape, label_width=1,
                 path_imgrec=None, path_imglist=None, path_root=None, path_imgidx=None,
                 shuffle=False, part_index=0, num_parts=1, aug_list=None, imglist=None,
                 data_name='data', label_name='softmax_label', dtype='float32',
                 last_batch_handle='pad', **kwargs):
        super(ImageIter, self).__init__()
        assert path_imgrec or path_imglist or (isinstance(imglist, list))
        assert dtype in ['int32', 'float32', 'int64', 'float64'], dtype + ' label not supported'
        num_threads = os.environ.get('MXNET_CPU_WORKER_NTHREADS', 1)
        logging.info('Using %s threads for decoding...', str(num_threads))
        logging.info('Set enviroment variable MXNET_CPU_WORKER_NTHREADS to a'
                     ' larger number to use more threads.')
        class_name = self.__class__.__name__
        if path_imgrec:
            logging.info('%s: loading recordio %s...',
                         class_name, path_imgrec)
            if path_imgidx:
                self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')  # pylint: disable=redefined-variable-type
                self.imgidx = list(self.imgrec.keys)
            else:
                self.imgrec = recordio.MXRecordIO(path_imgrec, 'r')  # pylint: disable=redefined-variable-type
                self.imgidx = None
        else:
            self.imgrec = None

        if path_imglist:
            logging.info('%s: loading image list %s...', class_name, path_imglist)
            with open(path_imglist) as fin:
                imglist = {}
                imgkeys = []
                for line in iter(fin.readline, ''):
                    line = line.strip().split('\t')
                    label = nd.array(line[1:-1], dtype=dtype)
                    key = int(line[0])
                    imglist[key] = (label, line[-1])
                    imgkeys.append(key)
                self.imglist = imglist
        elif isinstance(imglist, list):
            logging.info('%s: loading image list...', class_name)
            result = {}
            imgkeys = []
            index = 1
            for img in imglist:
                key = str(index)  # pylint: disable=redefined-variable-type
                index += 1
                if len(img) > 2:
                    label = nd.array(img[:-1], dtype=dtype)
                elif isinstance(img[0], numeric_types):
                    label = nd.array([img[0]], dtype=dtype)
                else:
                    label = nd.array(img[0], dtype=dtype)
                result[key] = (label, img[-1])
                imgkeys.append(str(key))
            self.imglist = result
        else:
            self.imglist = None
        self.path_root = path_root

        self.check_data_shape(data_shape)
        self.provide_data = [(data_name, (batch_size,) + data_shape)]
        if label_width > 1:
            self.provide_label = [(label_name, (batch_size, label_width))]
        else:
            self.provide_label = [(label_name, (batch_size,))]
        self.batch_size = batch_size
        self.data_shape = data_shape
        self.label_width = label_width
        self.shuffle = shuffle
        if self.imgrec is None:
            self.seq = imgkeys
        elif shuffle or num_parts > 1 or path_imgidx:
            assert self.imgidx is not None
            self.seq = self.imgidx
        else:
            self.seq = None

        if num_parts > 1:
            assert part_index < num_parts
            N = len(self.seq)
            C = N // num_parts
            self.seq = self.seq[part_index * C:(part_index + 1) * C]
        if aug_list is None:
            self.auglist = CreateAugmenter(data_shape, **kwargs)
        else:
            self.auglist = aug_list
        self.cur = 0
        self._allow_read = True
        self.last_batch_handle = last_batch_handle
        self.num_image = len(self.seq) if self.seq is not None else None
        self._cache_data = None
        self._cache_label = None
        self._cache_idx = None
        self.reset()

[docs]    def reset(self):
        """Resets the iterator to the beginning of the data."""
        if self.seq is not None and self.shuffle:
            random.shuffle(self.seq)
        if self.last_batch_handle != 'roll_over' or \
            self._cache_data is None:
            if self.imgrec is not None:
                self.imgrec.reset()
            self.cur = 0
            if self._allow_read is False:
                self._allow_read = True

[docs]    def hard_reset(self):
        """Resets the iterator and ignore roll over data"""
        if self.seq is not None and self.shuffle:
            random.shuffle(self.seq)
        if self.imgrec is not None:
            self.imgrec.reset()
        self.cur = 0
        self._allow_read = True
        self._cache_data = None
        self._cache_label = None
        self._cache_idx = None

[docs]    def next_sample(self):
        """Helper function for reading in next sample."""
        if self._allow_read is False:
            raise StopIteration
        if self.seq is not None:
            if self.cur < self.num_image:
                idx = self.seq[self.cur]
            else:
                if self.last_batch_handle != 'discard':
                    self.cur = 0
                raise StopIteration
            self.cur += 1
            if self.imgrec is not None:
                s = self.imgrec.read_idx(idx)
                header, img = recordio.unpack(s)
                if self.imglist is None:
                    return header.label, img
                else:
                    return self.imglist[idx][0], img
            else:
                label, fname = self.imglist[idx]
                return label, self.read_image(fname)
        else:
            s = self.imgrec.read()
            if s is None:
                if self.last_batch_handle != 'discard':
                    self.imgrec.reset()
                raise StopIteration
            header, img = recordio.unpack(s)
            return header.label, img

    def _batchify(self, batch_data, batch_label, start=0):
        """Helper function for batchifying data"""
        i = start
        batch_size = self.batch_size
        try:
            while i < batch_size:
                label, s = self.next_sample()
                data = self.imdecode(s)
                try:
                    self.check_valid_image(data)
                except RuntimeError as e:
                    logging.debug('Invalid image, skipping:  %s', str(e))
                    continue
                data = self.augmentation_transform(data)
                assert i < batch_size, 'Batch size must be multiples of augmenter output length'
                batch_data[i] = self.postprocess_data(data)
                batch_label[i] = label
                i += 1
        except StopIteration:
            if not i:
                raise StopIteration
        return i

[docs]    def next(self):
        """Returns the next batch of data."""
        batch_size = self.batch_size
        c, h, w = self.data_shape
        # if last batch data is rolled over
        if self._cache_data is not None:
            # check both the data and label have values
            assert self._cache_label is not None, "_cache_label didn't have values"
            assert self._cache_idx is not None, "_cache_idx didn't have values"
            batch_data = self._cache_data
            batch_label = self._cache_label
            i = self._cache_idx
            # clear the cache data
        else:
            batch_data = nd.zeros((batch_size, c, h, w))
            batch_label = nd.empty(self.provide_label[0][1])
            i = self._batchify(batch_data, batch_label)
        # calculate the padding
        pad = batch_size - i
        # handle padding for the last batch
        if pad != 0:
            if self.last_batch_handle == 'discard': # pylint: disable=no-else-raise
                raise StopIteration
            # if the option is 'roll_over', throw StopIteration and cache the data
            elif self.last_batch_handle == 'roll_over' and \
                self._cache_data is None:
                self._cache_data = batch_data
                self._cache_label = batch_label
                self._cache_idx = i
                raise StopIteration
            else:
                _ = self._batchify(batch_data, batch_label, i)
                if self.last_batch_handle == 'pad':
                    self._allow_read = False
                else:
                    self._cache_data = None
                    self._cache_label = None
                    self._cache_idx = None

        return io.DataBatch([batch_data], [batch_label], pad=pad)

[docs]    def check_data_shape(self, data_shape):
        """Checks if the input data shape is valid"""
        if not len(data_shape) == 3:
            raise ValueError('data_shape should have length 3, with dimensions CxHxW')
        if not data_shape[0] == 3:
            raise ValueError('This iterator expects inputs to have 3 channels.')

[docs]    def check_valid_image(self, data):
        """Checks if the input data is valid"""
        if len(data[0].shape) == 0:
            raise RuntimeError('Data shape is wrong')

[docs]    def imdecode(self, s):
        """Decodes a string or byte string to an NDArray.
        See mx.img.imdecode for more details."""
        def locate():
            """Locate the image file/index if decode fails."""
            if self.seq is not None:
                idx = self.seq[(self.cur % self.num_image) - 1]
            else:
                idx = (self.cur % self.num_image) - 1
            if self.imglist is not None:
                _, fname = self.imglist[idx]
                msg = "filename: {}".format(fname)
            else:
                msg = "index: {}".format(idx)
            return "Broken image " + msg
        try:
            img = imdecode(s)
        except Exception as e:
            raise RuntimeError("{}, {}".format(locate(), e))
        return img

[docs]    def read_image(self, fname):
        """Reads an input image `fname` and returns the decoded raw bytes.
        Examples
        --------
        >>> dataIter.read_image('Face.jpg') # returns decoded raw bytes.
        """
        with open(os.path.join(self.path_root, fname), 'rb') as fin:
            img = fin.read()
        return img

[docs]    def augmentation_transform(self, data):
        """Transforms input data with specified augmentation."""
        for aug in self.auglist:
            data = aug(data)
        return data

[docs]    def postprocess_data(self, datum):
        """Final postprocessing step before image is loaded into the batch."""
        return nd.transpose(datum, axes=(2, 0, 1))