Source code for mxnet.gluon.nn.basic_layers

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# coding: utf-8
# pylint: disable= arguments-differ
"""Basic neural network layers."""

from ..block import Block, HybridBlock
from ..utils import _indent


[docs]class Sequential(Block): """Stacks `Block`s sequentially. Example:: net = nn.Sequential() # use net's name_scope to give child Blocks appropriate names. with net.name_scope(): net.add(nn.Dense(10, activation='relu')) net.add(nn.Dense(20)) """ def __init__(self, prefix=None, params=None): super(Sequential, self).__init__(prefix=prefix, params=params)
[docs] def add(self, block): """Adds block on top of the stack.""" self.register_child(block)
def forward(self, x): for block in self._children: x = block(x) return x def __repr__(self): s = '{name}(\n{modstr}\n)' modstr = '\n'.join([' ({key}): {block}'.format(key=key, block=_indent(block.__repr__(), 2)) for key, block in enumerate(self._children) if isinstance(block, Block)]) return s.format(name=self.__class__.__name__, modstr=modstr) def __getitem__(self, i): return self._children[i] def __len__(self): return len(self._children)
[docs]class HybridSequential(HybridBlock): """Stacks `HybridBlock`s sequentially. Example:: net = nn.Sequential() # use net's name_scope to give child Blocks appropriate names. with net.name_scope(): net.add(nn.Dense(10, activation='relu')) net.add(nn.Dense(20)) """ def __init__(self, prefix=None, params=None): super(HybridSequential, self).__init__(prefix=prefix, params=params)
[docs] def add(self, block): """Adds block on top of the stack.""" self.register_child(block)
def hybrid_forward(self, F, x): for block in self._children: x = block(x) return x def __repr__(self): s = '{name}(\n{modstr}\n)' modstr = '\n'.join([' ({key}): {block}'.format(key=key, block=_indent(block.__repr__(), 2)) for key, block in enumerate(self._children) if isinstance(block, Block)]) return s.format(name=self.__class__.__name__, modstr=modstr) def __getitem__(self, i): return self._children[i] def __len__(self): return len(self._children)
[docs]class Dense(HybridBlock): """Just your regular densely-connected NN layer. `Dense` implements the operation: `output = activation(dot(input, weight) + bias)` where `activation` is the element-wise activation function passed as the `activation` argument, `weight` is a weights matrix created by the layer, and `bias` is a bias vector created by the layer (only applicable if `use_bias` is `True`). Note: the input must be a tensor with rank 2. Use `flatten` to convert it to rank 2 manually if necessary. Parameters ---------- units : int Dimensionality of the output space. activation : str Activation function to use. See help on `Activation` layer. If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias : bool Whether the layer uses a bias vector. weight_initializer : str or `Initializer` Initializer for the `kernel` weights matrix. bias_initializer: str or `Initializer` Initializer for the bias vector. in_units : int, optional Size of the input data. If not specified, initialization will be deferred to the first time `forward` is called and `in_units` will be inferred from the shape of input data. prefix : str or None See document of `Block`. params : ParameterDict or None See document of `Block`. Input shape: A 2D input with shape `(batch_size, in_units)`. Output shape: The output would have shape `(batch_size, units)`. """ def __init__(self, units, activation=None, use_bias=True, weight_initializer=None, bias_initializer='zeros', in_units=0, **kwargs): super(Dense, self).__init__(**kwargs) with self.name_scope(): self._units = units self._in_units = in_units self.weight = self.params.get('weight', shape=(units, in_units), init=weight_initializer, allow_deferred_init=True) if use_bias: self.bias = self.params.get('bias', shape=(units,), init=bias_initializer, allow_deferred_init=True) else: self.bias = None if activation is not None: self.act = Activation(activation, prefix=activation+'_') else: self.act = None def hybrid_forward(self, F, x, weight, bias=None): if bias is None: act = F.FullyConnected(x, weight, no_bias=True, num_hidden=self._units, name='fwd') else: act = F.FullyConnected(x, weight, bias, num_hidden=self._units, name='fwd') if self.act is not None: act = self.act(act) return act def __repr__(self): s = '{name}({layout}, {act})' return s.format(name=self.__class__.__name__, act=self.act if self.act else 'linear', layout='{0} -> {1}'.format(self._in_units, self._units) if self._in_units else self._units)
[docs]class Activation(HybridBlock): """Applies an activation function to input. Parameters ---------- activation : str Name of activation function to use. See :func:`~mxnet.ndarray.Activation` for available choices. Input shape: Arbitrary. Output shape: Same shape as input. """ def __init__(self, activation, **kwargs): self._act_type = activation super(Activation, self).__init__(**kwargs) def _alias(self): return self._act_type def hybrid_forward(self, F, x): return F.Activation(x, act_type=self._act_type, name='fwd') def __repr__(self): s = '{name}({_act_type})' return s.format(name=self.__class__.__name__, **self.__dict__)
[docs]class Dropout(HybridBlock): """Applies Dropout to the input. Dropout consists in randomly setting a fraction `rate` of input units to 0 at each update during training time, which helps prevent overfitting. Parameters ---------- rate : float Fraction of the input units to drop. Must be a number between 0 and 1. Input shape: Arbitrary. Output shape: Same shape as input. References ---------- `Dropout: A Simple Way to Prevent Neural Networks from Overfitting `_ """ def __init__(self, rate, **kwargs): super(Dropout, self).__init__(**kwargs) self._rate = rate def hybrid_forward(self, F, x): return F.Dropout(x, p=self._rate, name='fwd') def __repr__(self): s = '{name}(p = {_rate})' return s.format(name=self.__class__.__name__, **self.__dict__)
[docs]class BatchNorm(HybridBlock): """Batch normalization layer (Ioffe and Szegedy, 2014). Normalizes the input at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1. Parameters ---------- axis : int, default 1 The axis that should be normalized. This is typically the channels (C) axis. For instance, after a `Conv2D` layer with `layout='NCHW'`, set `axis=1` in `BatchNorm`. If `layout='NHWC'`, then set `axis=3`. momentum: float, default 0.9 Momentum for the moving average. epsilon: float, default 1e-5 Small float added to variance to avoid dividing by zero. center: bool, default True If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. scale: bool, default True If True, multiply by `gamma`. If False, `gamma` is not used. When the next layer is linear (also e.g. `nn.relu`), this can be disabled since the scaling will be done by the next layer. beta_initializer: str or `Initializer`, default 'zeros' Initializer for the beta weight. gamma_initializer: str or `Initializer`, default 'ones' Initializer for the gamma weight. moving_mean_initializer: str or `Initializer`, default 'zeros' Initializer for the moving mean. moving_variance_initializer: str or `Initializer`, default 'ones' Initializer for the moving variance. in_channels : int, default 0 Number of channels (feature maps) in input data. If not specified, initialization will be deferred to the first time `forward` is called and `in_channels` will be inferred from the shape of input data. Input shape: Arbitrary. Output shape: Same shape as input. """ def __init__(self, axis=1, momentum=0.9, epsilon=1e-5, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', running_mean_initializer='zeros', running_variance_initializer='ones', in_channels=0, **kwargs): super(BatchNorm, self).__init__(**kwargs) self._kwargs = {'axis': axis, 'eps': epsilon, 'momentum': momentum, 'fix_gamma': not scale} if in_channels != 0: self.in_channels = in_channels self.gamma = self.params.get('gamma', grad_req='write' if scale else 'null', shape=(in_channels,), init=gamma_initializer, allow_deferred_init=True, differentiable=scale) self.beta = self.params.get('beta', grad_req='write' if center else 'null', shape=(in_channels,), init=beta_initializer, allow_deferred_init=True, differentiable=center) self.running_mean = self.params.get('running_mean', grad_req='null', shape=(in_channels,), init=running_mean_initializer, allow_deferred_init=True, differentiable=False) self.running_var = self.params.get('running_var', grad_req='null', shape=(in_channels,), init=running_variance_initializer, allow_deferred_init=True, differentiable=False) def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var): return F.BatchNorm(x, gamma, beta, running_mean, running_var, name='fwd', **self._kwargs) def __repr__(self): s = '{name}({content}' if hasattr(self, 'in_channels'): s += ', in_channels={0}'.format(self.in_channels) s += ')' return s.format(name=self.__class__.__name__, content=', '.join(['='.join([k, v.__repr__()]) for k, v in self._kwargs.items()]))
[docs]class LeakyReLU(HybridBlock): """Leaky version of a Rectified Linear Unit. It allows a small gradient when the unit is not active:: `f(x) = alpha * x for x < 0`, `f(x) = x for x >= 0`. Parameters ---------- alpha : float slope coefficient for the negative half axis. Must be >= 0. Input shape: Arbitrary. Output shape: Same shape as input. """ def __init__(self, alpha, **kwargs): super(LeakyReLU, self).__init__(**kwargs) self._alpha = alpha def hybrid_forward(self, F, x): return F.LeakyReLU(x, act_type='leaky', slope=self._alpha, name='fwd') def __repr__(self): s = '{name}({alpha})' return s.format(name=self.__class__.__name__, alpha=self._alpha)
[docs]class Embedding(HybridBlock): """Turns non-negative integers (indexes/tokens) into dense vectors of fixed size. eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]] Parameters ---------- input_dim : int Size of the vocabulary, i.e. maximum integer index + 1. output_dim : int Dimension of the dense embedding. dtype : str or np.dtype, default 'float32' Data type of output embeddings. weight_initializer : Initializer Initializer for the `embeddings` matrix. Input shape: 2D tensor with shape: `(N, M)`. Output shape: 3D tensor with shape: `(N, M, output_dim)`. """ def __init__(self, input_dim, output_dim, dtype='float32', weight_initializer=None, **kwargs): super(Embedding, self).__init__(**kwargs) self._kwargs = {'input_dim': input_dim, 'output_dim': output_dim, 'dtype': dtype} self.weight = self.params.get('weight', shape=(input_dim, output_dim), init=weight_initializer, allow_deferred_init=True) def hybrid_forward(self, F, x, weight): return F.Embedding(x, weight, name='fwd', **self._kwargs) def __repr__(self): s = '{block_name}({input_dim} -> {output_dim}, {dtype})' return s.format(block_name=self.__class__.__name__, **self._kwargs)
class Flatten(HybridBlock): """Flattens the input to two dimensional. Input shape: Arbitrary shape `(N, a, b, c, ...)` Output shape: 2D tensor with shape: `(N, a*b*c...)` """ def __init__(self, **kwargs): super(Flatten, self).__init__(**kwargs) def hybrid_forward(self, F, x): return x.reshape((0, -1)) def __repr__(self): return self.__class__.__name__