mxnet
Classes | Typedefs | Enumerations | Functions
mxnet::cpp Namespace Reference

Classes

class  Accuracy
 
class  AdaDeltaOptimizer
 
class  AdaGradOptimizer
 
class  AdamOptimizer
 
class  Bilinear
 
class  Constant
 
class  Context
 Context interface. More...
 
class  DataBatch
 Default object for holding a mini-batch of data and related information. More...
 
class  DataIter
 
class  EvalMetric
 
class  Executor
 Executor interface. More...
 
class  FactorScheduler
 
class  FeedForward
 
struct  FeedForwardConfig
 
class  Initializer
 
class  KVStore
 
class  LogLoss
 
class  LRScheduler
 lr scheduler interface More...
 
class  MAE
 
class  Monitor
 Monitor interface. More...
 
class  MSE
 
class  MSRAPrelu
 
class  MXDataIter
 
struct  MXDataIterBlob
 
class  MXDataIterMap
 
class  NDArray
 NDArray interface. More...
 
struct  NDBlob
 struct to store NDArrayHandle More...
 
class  Normal
 
class  One
 
class  Operator
 Operator interface. More...
 
class  OpMap
 OpMap instance holds a map of all the symbol creators so we can get symbol creators by name. This is used internally by Symbol and Operator. More...
 
class  Optimizer
 Optimizer interface. More...
 
class  OptimizerRegistry
 
class  PSNR
 
class  RMSE
 
class  RMSPropOptimizer
 
class  SGDOptimizer
 
struct  Shape
 dynamic shape class that can hold shape of arbirary dimension More...
 
class  SignumOptimizer
 
struct  SymBlob
 struct to store SymbolHandle More...
 
class  Symbol
 Symbol interface. More...
 
class  Uniform
 
class  Xavier
 
class  Zero
 

Typedefs

typedef unsigned index_t
 
typedef std::function< Optimizer *()> OptimizerCreator
 

Enumerations

enum  OpReqType { kNullOp, kWriteTo, kWriteInplace, kAddTo }
 
enum  DeviceType { kCPU = 1, kGPU = 2, kCPUPinned = 3 }
 
enum  PickMode { PickMode::kClip = 0, PickMode::kWrap = 1 }
 
enum  DotForwardStype { DotForwardStype::kNone = 0, DotForwardStype::kCsr = 1, DotForwardStype::kDefault = 2, DotForwardStype::kRow_sparse = 3 }
 
enum  Batch_dotForwardStype { Batch_dotForwardStype::kNone = 0, Batch_dotForwardStype::kCsr = 1, Batch_dotForwardStype::kDefault = 2, Batch_dotForwardStype::kRow_sparse = 3 }
 
enum  CastDtype {
  CastDtype::kFloat16 = 0, CastDtype::kFloat32 = 1, CastDtype::kFloat64 = 2, CastDtype::kInt32 = 3,
  CastDtype::kInt64 = 4, CastDtype::kInt8 = 5, CastDtype::kUint8 = 6
}
 
enum  TopkRetTyp { TopkRetTyp::kBoth = 0, TopkRetTyp::kIndices = 1, TopkRetTyp::kMask = 2, TopkRetTyp::kValue = 3 }
 
enum  TopkDtype {
  TopkDtype::kFloat16 = 0, TopkDtype::kFloat32 = 1, TopkDtype::kFloat64 = 2, TopkDtype::kInt32 = 3,
  TopkDtype::kUint8 = 4
}
 
enum  ArgsortDtype {
  ArgsortDtype::kFloat16 = 0, ArgsortDtype::kFloat32 = 1, ArgsortDtype::kFloat64 = 2, ArgsortDtype::kInt32 = 3,
  ArgsortDtype::kUint8 = 4
}
 
enum  EmbeddingDtype {
  EmbeddingDtype::kFloat16 = 0, EmbeddingDtype::kFloat32 = 1, EmbeddingDtype::kFloat64 = 2, EmbeddingDtype::kInt32 = 3,
  EmbeddingDtype::kInt64 = 4, EmbeddingDtype::kInt8 = 5, EmbeddingDtype::kUint8 = 6
}
 
enum  TakeMode { TakeMode::kClip = 0, TakeMode::kRaise = 1, TakeMode::kWrap = 2 }
 
enum  One_hotDtype {
  One_hotDtype::kFloat16 = 0, One_hotDtype::kFloat32 = 1, One_hotDtype::kFloat64 = 2, One_hotDtype::kInt32 = 3,
  One_hotDtype::kInt64 = 4, One_hotDtype::kInt8 = 5, One_hotDtype::kUint8 = 6
}
 
enum  Cast_storageStype { Cast_storageStype::kCsr = 0, Cast_storageStype::kDefault = 1, Cast_storageStype::kRow_sparse = 2 }
 
enum  PoolingPoolType { PoolingPoolType::kAvg = 0, PoolingPoolType::kLp = 1, PoolingPoolType::kMax = 2, PoolingPoolType::kSum = 3 }
 
enum  PoolingPoolingConvention { PoolingPoolingConvention::kFull = 0, PoolingPoolingConvention::kSame = 1, PoolingPoolingConvention::kValid = 2 }
 
enum  DeconvolutionCudnnTune { DeconvolutionCudnnTune::kNone = 0, DeconvolutionCudnnTune::kFastest = 1, DeconvolutionCudnnTune::kLimited_workspace = 2, DeconvolutionCudnnTune::kOff = 3 }
 
enum  DeconvolutionLayout {
  DeconvolutionLayout::kNone = 0, DeconvolutionLayout::kNCDHW = 1, DeconvolutionLayout::kNCHW = 2, DeconvolutionLayout::kNCW = 3,
  DeconvolutionLayout::kNDHWC = 4, DeconvolutionLayout::kNHWC = 5
}
 
enum  ActivationActType {
  ActivationActType::kRelu = 0, ActivationActType::kSigmoid = 1, ActivationActType::kSoftrelu = 2, ActivationActType::kSoftsign = 3,
  ActivationActType::kTanh = 4
}
 
enum  CTCLossBlankLabel { CTCLossBlankLabel::kFirst = 0, CTCLossBlankLabel::kLast = 1 }
 
enum  ConvolutionCudnnTune { ConvolutionCudnnTune::kNone = 0, ConvolutionCudnnTune::kFastest = 1, ConvolutionCudnnTune::kLimited_workspace = 2, ConvolutionCudnnTune::kOff = 3 }
 
enum  ConvolutionLayout {
  ConvolutionLayout::kNone = 0, ConvolutionLayout::kNCDHW = 1, ConvolutionLayout::kNCHW = 2, ConvolutionLayout::kNCW = 3,
  ConvolutionLayout::kNDHWC = 4, ConvolutionLayout::kNHWC = 5
}
 
enum  UpSamplingSampleType { UpSamplingSampleType::kBilinear = 0, UpSamplingSampleType::kNearest = 1 }
 
enum  UpSamplingMultiInputMode { UpSamplingMultiInputMode::kConcat = 0, UpSamplingMultiInputMode::kSum = 1 }
 
enum  DropoutMode { DropoutMode::kAlways = 0, DropoutMode::kTraining = 1 }
 
enum  SoftmaxActivationMode { SoftmaxActivationMode::kChannel = 0, SoftmaxActivationMode::kInstance = 1 }
 
enum  PadMode { PadMode::kConstant = 0, PadMode::kEdge = 1, PadMode::kReflect = 2 }
 
enum  LeakyReLUActType {
  LeakyReLUActType::kElu = 0, LeakyReLUActType::kLeaky = 1, LeakyReLUActType::kPrelu = 2, LeakyReLUActType::kRrelu = 3,
  LeakyReLUActType::kSelu = 4
}
 
enum  GridGeneratorTransformType { GridGeneratorTransformType::kAffine = 0, GridGeneratorTransformType::kWarp = 1 }
 
enum  Pooling_v1PoolType { Pooling_v1PoolType::kAvg = 0, Pooling_v1PoolType::kMax = 1, Pooling_v1PoolType::kSum = 2 }
 
enum  Pooling_v1PoolingConvention { Pooling_v1PoolingConvention::kFull = 0, Pooling_v1PoolingConvention::kValid = 1 }
 
enum  RNNMode { RNNMode::kGru = 0, RNNMode::kLstm = 1, RNNMode::kRnn_relu = 2, RNNMode::kRnn_tanh = 3 }
 
enum  Convolution_v1CudnnTune { Convolution_v1CudnnTune::kNone = 0, Convolution_v1CudnnTune::kFastest = 1, Convolution_v1CudnnTune::kLimited_workspace = 2, Convolution_v1CudnnTune::kOff = 3 }
 
enum  Convolution_v1Layout {
  Convolution_v1Layout::kNone = 0, Convolution_v1Layout::kNCDHW = 1, Convolution_v1Layout::kNCHW = 2, Convolution_v1Layout::kNDHWC = 3,
  Convolution_v1Layout::kNHWC = 4
}
 
enum  SpatialTransformerTransformType { SpatialTransformerTransformType::kAffine = 0 }
 
enum  SpatialTransformerSamplerType { SpatialTransformerSamplerType::kBilinear = 0 }
 
enum  SoftmaxOutputNormalization { SoftmaxOutputNormalization::kBatch = 0, SoftmaxOutputNormalization::kNull = 1, SoftmaxOutputNormalization::kValid = 2 }
 
enum  SoftmaxNormalization { SoftmaxNormalization::kBatch = 0, SoftmaxNormalization::kNull = 1, SoftmaxNormalization::kValid = 2 }
 
enum  L2NormalizationMode { L2NormalizationMode::kChannel = 0, L2NormalizationMode::kInstance = 1, L2NormalizationMode::kSpatial = 2 }
 
enum  MakeLossNormalization { MakeLossNormalization::kBatch = 0, MakeLossNormalization::kNull = 1, MakeLossNormalization::kValid = 2 }
 

Functions

NDArray _default_monitor_func (const NDArray &x)
 Default function for monitor that computes statistics of the input tensor, which is the mean absolute |x|/size(x) More...
 
std::ostream & operator<< (std::ostream &out, const NDArray &ndarray)
 
Symbol khatri_rao (const std::string &symbol_name, const std::vector< Symbol > &args)
 
Symbol Custom (const std::string &symbol_name, const std::vector< Symbol > &data, const std::string &op_type)
 
Symbol broadcast_power (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_maximum (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_minimum (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_hypot (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol Reshape (const std::string &symbol_name, Symbol data, Shape shape=Shape(), bool reverse=false, Shape target_shape=Shape(), bool keep_highest=false)
 
Symbol Flatten (const std::string &symbol_name, Symbol data)
 
Symbol transpose (const std::string &symbol_name, Symbol data, Shape axes=Shape())
 
Symbol expand_dims (const std::string &symbol_name, Symbol data, int axis)
 
Symbol slice (const std::string &symbol_name, Symbol data, Shape begin, Shape end, Shape step=Shape())
 
Symbol slice_axis (const std::string &symbol_name, Symbol data, int axis, int begin, dmlc::optional< int > end)
 
Symbol slice_like (const std::string &symbol_name, Symbol data, Symbol shape_like, Shape axes=Shape())
 
Symbol clip (const std::string &symbol_name, Symbol data, mx_float a_min, mx_float a_max)
 
Symbol repeat (const std::string &symbol_name, Symbol data, int repeats, dmlc::optional< int > axis=dmlc::optional< int >())
 
Symbol tile (const std::string &symbol_name, Symbol data, Shape reps)
 
Symbol reverse (const std::string &symbol_name, Symbol data, Shape axis)
 
Symbol stack (const std::string &symbol_name, const std::vector< Symbol > &data, int num_args, int axis=0)
 
Symbol squeeze (const std::string &symbol_name, const std::vector< Symbol > &data, dmlc::optional< Shape > axis=dmlc::optional< Shape >())
 
Symbol depth_to_space (const std::string &symbol_name, Symbol data, int block_size)
 
Symbol space_to_depth (const std::string &symbol_name, Symbol data, int block_size)
 
Symbol zeros_like (const std::string &symbol_name, Symbol data)
 
Symbol ones_like (const std::string &symbol_name, Symbol data)
 
Symbol add_n (const std::string &symbol_name, const std::vector< Symbol > &args)
 
Symbol argmax (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(), bool keepdims=false)
 
Symbol argmin (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(), bool keepdims=false)
 
Symbol argmax_channel (const std::string &symbol_name, Symbol data)
 
Symbol pick (const std::string &symbol_name, Symbol data, Symbol index, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool keepdims=false, PickMode mode=PickMode::kClip)
 
Symbol dot (const std::string &symbol_name, Symbol lhs, Symbol rhs, bool transpose_a=false, bool transpose_b=false, DotForwardStype forward_stype=DotForwardStype::kNone)
 
Symbol batch_dot (const std::string &symbol_name, Symbol lhs, Symbol rhs, bool transpose_a=false, bool transpose_b=false, Batch_dotForwardStype forward_stype=Batch_dotForwardStype::kNone)
 
Symbol broadcast_add (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_sub (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_mul (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_div (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_mod (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol relu (const std::string &symbol_name, Symbol data)
 
Symbol sigmoid (const std::string &symbol_name, Symbol data)
 
Symbol hard_sigmoid (const std::string &symbol_name, Symbol data, mx_float alpha=0.2, mx_float beta=0.5)
 
Symbol softsign (const std::string &symbol_name, Symbol data)
 
Symbol BlockGrad (const std::string &symbol_name, Symbol data)
 
Symbol make_loss (const std::string &symbol_name, Symbol data)
 
Symbol reshape_like (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol shape_array (const std::string &symbol_name, Symbol data, dmlc::optional< int > lhs_begin=dmlc::optional< int >(), dmlc::optional< int > lhs_end=dmlc::optional< int >(), dmlc::optional< int > rhs_begin=dmlc::optional< int >(), dmlc::optional< int > rhs_end=dmlc::optional< int >())
 
Symbol size_array (const std::string &symbol_name, Symbol data)
 
Symbol Cast (const std::string &symbol_name, Symbol data, CastDtype dtype)
 
Symbol negative (const std::string &symbol_name, Symbol data)
 
Symbol reciprocal (const std::string &symbol_name, Symbol data)
 
Symbol abs (const std::string &symbol_name, Symbol data)
 
Symbol sign (const std::string &symbol_name, Symbol data)
 
Symbol round (const std::string &symbol_name, Symbol data)
 
Symbol rint (const std::string &symbol_name, Symbol data)
 
Symbol ceil (const std::string &symbol_name, Symbol data)
 
Symbol floor (const std::string &symbol_name, Symbol data)
 
Symbol trunc (const std::string &symbol_name, Symbol data)
 
Symbol fix (const std::string &symbol_name, Symbol data)
 
Symbol square (const std::string &symbol_name, Symbol data)
 
Symbol sqrt (const std::string &symbol_name, Symbol data)
 
Symbol rsqrt (const std::string &symbol_name, Symbol data)
 
Symbol cbrt (const std::string &symbol_name, Symbol data)
 
Symbol erf (const std::string &symbol_name, Symbol data)
 
Symbol rcbrt (const std::string &symbol_name, Symbol data)
 
Symbol exp (const std::string &symbol_name, Symbol data)
 
Symbol log (const std::string &symbol_name, Symbol data)
 
Symbol log10 (const std::string &symbol_name, Symbol data)
 
Symbol log2 (const std::string &symbol_name, Symbol data)
 
Symbol log1p (const std::string &symbol_name, Symbol data)
 
Symbol expm1 (const std::string &symbol_name, Symbol data)
 
Symbol gamma (const std::string &symbol_name, Symbol data)
 
Symbol gammaln (const std::string &symbol_name, Symbol data)
 
Symbol logical_not (const std::string &symbol_name, Symbol data)
 
Symbol sum (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol mean (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol prod (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol nansum (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol nanprod (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol max (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol min (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol broadcast_axis (const std::string &symbol_name, Symbol data, Shape axis=Shape(), Shape size=Shape())
 
Symbol broadcast_to (const std::string &symbol_name, Symbol data, Shape shape=Shape())
 
Symbol broadcast_like (const std::string &symbol_name, Symbol lhs, Symbol rhs, dmlc::optional< Shape > lhs_axes=dmlc::optional< Shape >(), dmlc::optional< Shape > rhs_axes=dmlc::optional< Shape >())
 
Symbol norm (const std::string &symbol_name, Symbol data, int ord=2, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false)
 
Symbol topk (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), int k=1, TopkRetTyp ret_typ=TopkRetTyp::kIndices, bool is_ascend=false, TopkDtype dtype=TopkDtype::kFloat32)
 
Symbol sort (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)
 
Symbol argsort (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true, ArgsortDtype dtype=ArgsortDtype::kFloat32)
 
Symbol elemwise_add (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol elemwise_sub (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol elemwise_mul (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol elemwise_div (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol Embedding (const std::string &symbol_name, Symbol data, Symbol weight, int input_dim, int output_dim, EmbeddingDtype dtype=EmbeddingDtype::kFloat32, bool sparse_grad=false)
 
Symbol take (const std::string &symbol_name, Symbol a, Symbol indices, int axis=0, TakeMode mode=TakeMode::kClip)
 
Symbol batch_take (const std::string &symbol_name, Symbol a, Symbol indices)
 
Symbol one_hot (const std::string &symbol_name, Symbol indices, int depth, double on_value=1, double off_value=0, One_hotDtype dtype=One_hotDtype::kFloat32)
 
Symbol gather_nd (const std::string &symbol_name, Symbol data, Symbol indices)
 
Symbol scatter_nd (const std::string &symbol_name, Symbol data, Symbol indices, Shape shape)
 
Symbol broadcast_equal (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_not_equal (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_greater (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_greater_equal (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_lesser (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_lesser_equal (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_logical_and (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_logical_or (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol broadcast_logical_xor (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol diag (const std::string &symbol_name, Symbol data, int k=0, int axis1=0, int axis2=1)
 
Symbol where (const std::string &symbol_name, Symbol condition, Symbol x, Symbol y)
 
Symbol smooth_l1 (const std::string &symbol_name, Symbol data, mx_float scalar)
 
Symbol cast_storage (const std::string &symbol_name, Symbol data, Cast_storageStype stype)
 
Symbol sin (const std::string &symbol_name, Symbol data)
 
Symbol cos (const std::string &symbol_name, Symbol data)
 
Symbol tan (const std::string &symbol_name, Symbol data)
 
Symbol arcsin (const std::string &symbol_name, Symbol data)
 
Symbol arccos (const std::string &symbol_name, Symbol data)
 
Symbol arctan (const std::string &symbol_name, Symbol data)
 
Symbol degrees (const std::string &symbol_name, Symbol data)
 
Symbol radians (const std::string &symbol_name, Symbol data)
 
Symbol sinh (const std::string &symbol_name, Symbol data)
 
Symbol cosh (const std::string &symbol_name, Symbol data)
 
Symbol tanh (const std::string &symbol_name, Symbol data)
 
Symbol arcsinh (const std::string &symbol_name, Symbol data)
 
Symbol arccosh (const std::string &symbol_name, Symbol data)
 
Symbol arctanh (const std::string &symbol_name, Symbol data)
 
Symbol Pooling (const std::string &symbol_name, Symbol data, Shape kernel=Shape(), PoolingPoolType pool_type=PoolingPoolType::kMax, bool global_pool=false, bool cudnn_off=false, PoolingPoolingConvention pooling_convention=PoolingPoolingConvention::kValid, Shape stride=Shape(), Shape pad=Shape(), dmlc::optional< int > p_value=dmlc::optional< int >(), dmlc::optional< bool > count_include_pad=dmlc::optional< bool >())
 
Symbol softmax (const std::string &symbol_name, Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())
 
Symbol softmin (const std::string &symbol_name, Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())
 
Symbol log_softmax (const std::string &symbol_name, Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())
 
Symbol Deconvolution (const std::string &symbol_name, Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), Shape adj=Shape(), Shape target_shape=Shape(), uint32_t num_group=1, uint64_t workspace=512, bool no_bias=true, DeconvolutionCudnnTune cudnn_tune=DeconvolutionCudnnTune::kNone, bool cudnn_off=false, DeconvolutionLayout layout=DeconvolutionLayout::kNone)
 
Symbol Activation (const std::string &symbol_name, Symbol data, ActivationActType act_type)
 
Symbol BatchNorm (const std::string &symbol_name, Symbol data, Symbol gamma, Symbol beta, Symbol moving_mean, Symbol moving_var, double eps=0.001, mx_float momentum=0.9, bool fix_gamma=true, bool use_global_stats=false, bool output_mean_var=false, int axis=1, bool cudnn_off=false)
 
Symbol CTCLoss (const std::string &symbol_name, Symbol data, Symbol label, Symbol data_lengths, Symbol label_lengths, bool use_data_lengths=false, bool use_label_lengths=false, CTCLossBlankLabel blank_label=CTCLossBlankLabel::kFirst)
 
Symbol Convolution (const std::string &symbol_name, Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), uint32_t num_group=1, uint64_t workspace=1024, bool no_bias=false, ConvolutionCudnnTune cudnn_tune=ConvolutionCudnnTune::kNone, bool cudnn_off=false, ConvolutionLayout layout=ConvolutionLayout::kNone)
 
Symbol UpSampling (const std::string &symbol_name, const std::vector< Symbol > &data, int scale, UpSamplingSampleType sample_type, int num_args, int num_filter=0, UpSamplingMultiInputMode multi_input_mode=UpSamplingMultiInputMode::kConcat, uint64_t workspace=512)
 
Symbol Concat (const std::string &symbol_name, const std::vector< Symbol > &data, int num_args, int dim=1)
 
Symbol LayerNorm (const std::string &symbol_name, Symbol data, Symbol gamma, Symbol beta, int axis=-1, mx_float eps=1e-05, bool output_mean_var=false)
 
Symbol LRN (const std::string &symbol_name, Symbol data, uint32_t nsize, mx_float alpha=0.0001, mx_float beta=0.75, mx_float knorm=2)
 
Symbol Dropout (const std::string &symbol_name, Symbol data, mx_float p=0.5, DropoutMode mode=DropoutMode::kTraining, Shape axes=Shape())
 
Symbol SoftmaxActivation (const std::string &symbol_name, Symbol data, SoftmaxActivationMode mode=SoftmaxActivationMode::kInstance)
 
Symbol FullyConnected (const std::string &symbol_name, Symbol data, Symbol weight, Symbol bias, int num_hidden, bool no_bias=false, bool flatten=true)
 
Symbol Pad (const std::string &symbol_name, Symbol data, PadMode mode, Shape pad_width, double constant_value=0)
 
Symbol LeakyReLU (const std::string &symbol_name, Symbol data, Symbol gamma, LeakyReLUActType act_type=LeakyReLUActType::kLeaky, mx_float slope=0.25, mx_float lower_bound=0.125, mx_float upper_bound=0.334)
 
Symbol SwapAxis (const std::string &symbol_name, Symbol data, uint32_t dim1=0, uint32_t dim2=0)
 
Symbol BatchNorm_v1 (const std::string &symbol_name, Symbol data, Symbol gamma, Symbol beta, mx_float eps=0.001, mx_float momentum=0.9, bool fix_gamma=true, bool use_global_stats=false, bool output_mean_var=false)
 
Symbol softmax_cross_entropy (const std::string &symbol_name, Symbol data, Symbol label)
 
Symbol LinearRegressionOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float grad_scale=1)
 
Symbol MAERegressionOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float grad_scale=1)
 
Symbol LogisticRegressionOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float grad_scale=1)
 
Symbol IdentityAttachKLSparseReg (const std::string &symbol_name, Symbol data, mx_float sparseness_target=0.1, mx_float penalty=0.001, mx_float momentum=0.9)
 
Symbol signsgd_update (const std::string &symbol_name, Symbol weight, Symbol grad, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1)
 
Symbol signum_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol mom, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float wd_lh=0)
 
Symbol sgd_update (const std::string &symbol_name, Symbol weight, Symbol grad, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol sgd_mom_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol mom, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol mp_sgd_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol weight32, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol mp_sgd_mom_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol mom, Symbol weight32, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol ftml_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol d, Symbol v, Symbol z, mx_float lr, int t, mx_float beta1=0.6, mx_float beta2=0.999, double epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_grad=-1)
 
Symbol adam_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol mean, Symbol var, mx_float lr, mx_float beta1=0.9, mx_float beta2=0.999, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol rmsprop_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol n, mx_float lr, mx_float gamma1=0.95, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float clip_weights=-1)
 
Symbol rmspropalex_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol n, Symbol g, Symbol delta, mx_float lr, mx_float gamma1=0.95, mx_float gamma2=0.9, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float clip_weights=-1)
 
Symbol ftrl_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol z, Symbol n, mx_float lr, mx_float lamda1=0.01, mx_float beta=1, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1)
 
Symbol SliceChannel (const std::string &symbol_name, Symbol data, int num_outputs, int axis=1, bool squeeze_axis=false)
 
Symbol InstanceNorm (const std::string &symbol_name, Symbol data, Symbol gamma, Symbol beta, mx_float eps=0.001)
 
Symbol GridGenerator (const std::string &symbol_name, Symbol data, GridGeneratorTransformType transform_type, Shape target_shape=Shape(0, 0))
 
Symbol Pooling_v1 (const std::string &symbol_name, Symbol data, Shape kernel=Shape(), Pooling_v1PoolType pool_type=Pooling_v1PoolType::kMax, bool global_pool=false, Pooling_v1PoolingConvention pooling_convention=Pooling_v1PoolingConvention::kValid, Shape stride=Shape(), Shape pad=Shape())
 
Symbol RNN (const std::string &symbol_name, Symbol data, Symbol parameters, Symbol state, Symbol state_cell, uint32_t state_size, uint32_t num_layers, RNNMode mode, bool bidirectional=false, mx_float p=0, bool state_outputs=false, dmlc::optional< int > projection_size=dmlc::optional< int >(), dmlc::optional< double > lstm_state_clip_min=dmlc::optional< double >(), dmlc::optional< double > lstm_state_clip_max=dmlc::optional< double >(), bool lstm_state_clip_nan=false)
 
Symbol Convolution_v1 (const std::string &symbol_name, Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), uint32_t num_group=1, uint64_t workspace=1024, bool no_bias=false, Convolution_v1CudnnTune cudnn_tune=Convolution_v1CudnnTune::kNone, bool cudnn_off=false, Convolution_v1Layout layout=Convolution_v1Layout::kNone)
 
Symbol Crop (const std::string &symbol_name, const std::vector< Symbol > &data, int num_args, Shape offset=Shape(0, 0), Shape h_w=Shape(0, 0), bool center_crop=false)
 
Symbol SequenceReverse (const std::string &symbol_name, Symbol data, Symbol sequence_length, bool use_sequence_length=false, int axis=0)
 
Symbol SpatialTransformer (const std::string &symbol_name, Symbol data, Symbol loc, SpatialTransformerTransformType transform_type, SpatialTransformerSamplerType sampler_type, Shape target_shape=Shape(0, 0), dmlc::optional< bool > cudnn_off=dmlc::optional< bool >())
 
Symbol SoftmaxOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float grad_scale=1, mx_float ignore_label=-1, bool multi_output=false, bool use_ignore=false, bool preserve_shape=false, SoftmaxOutputNormalization normalization=SoftmaxOutputNormalization::kNull, bool out_grad=false, mx_float smooth_alpha=0)
 
Symbol Softmax (const std::string &symbol_name, Symbol data, mx_float grad_scale=1, mx_float ignore_label=-1, bool multi_output=false, bool use_ignore=false, bool preserve_shape=false, SoftmaxNormalization normalization=SoftmaxNormalization::kNull, bool out_grad=false, mx_float smooth_alpha=0)
 
Symbol BilinearSampler (const std::string &symbol_name, Symbol data, Symbol grid, dmlc::optional< bool > cudnn_off=dmlc::optional< bool >())
 
Symbol ROIPooling (const std::string &symbol_name, Symbol data, Symbol rois, Shape pooled_size, mx_float spatial_scale)
 
Symbol SequenceLast (const std::string &symbol_name, Symbol data, Symbol sequence_length, bool use_sequence_length=false, int axis=0)
 
Symbol L2Normalization (const std::string &symbol_name, Symbol data, mx_float eps=1e-10, L2NormalizationMode mode=L2NormalizationMode::kInstance)
 
Symbol MakeLoss (const std::string &symbol_name, Symbol data, mx_float grad_scale=1, mx_float valid_thresh=0, MakeLossNormalization normalization=MakeLossNormalization::kNull)
 
Symbol SVMOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float margin=1, mx_float regularization_coefficient=1, bool use_linear=false)
 
Symbol Correlation (const std::string &symbol_name, Symbol data1, Symbol data2, uint32_t kernel_size=1, uint32_t max_displacement=1, uint32_t stride1=1, uint32_t stride2=1, uint32_t pad_size=0, bool is_multiply=true)
 
Symbol SequenceMask (const std::string &symbol_name, Symbol data, Symbol sequence_length, bool use_sequence_length=false, mx_float value=0, int axis=0)
 
Symbol choose_element_0index (const std::string &symbol_name, Symbol lhs, Symbol rhs)
 
Symbol fill_element_0index (const std::string &symbol_name, Symbol lhs, Symbol mhs, Symbol rhs)
 
Symbol khatri_rao (const std::vector< Symbol > &args)
 
Symbol Custom (const std::vector< Symbol > &data, const std::string &op_type)
 
Symbol broadcast_power (Symbol lhs, Symbol rhs)
 
Symbol broadcast_maximum (Symbol lhs, Symbol rhs)
 
Symbol broadcast_minimum (Symbol lhs, Symbol rhs)
 
Symbol broadcast_hypot (Symbol lhs, Symbol rhs)
 
Symbol Reshape (Symbol data, Shape shape=Shape(), bool reverse=false, Shape target_shape=Shape(), bool keep_highest=false)
 
Symbol Flatten (Symbol data)
 
Symbol transpose (Symbol data, Shape axes=Shape())
 
Symbol expand_dims (Symbol data, int axis)
 
Symbol slice (Symbol data, Shape begin, Shape end, Shape step=Shape())
 
Symbol slice_axis (Symbol data, int axis, int begin, dmlc::optional< int > end)
 
Symbol slice_like (Symbol data, Symbol shape_like, Shape axes=Shape())
 
Symbol clip (Symbol data, mx_float a_min, mx_float a_max)
 
Symbol repeat (Symbol data, int repeats, dmlc::optional< int > axis=dmlc::optional< int >())
 
Symbol tile (Symbol data, Shape reps)
 
Symbol reverse (Symbol data, Shape axis)
 
Symbol stack (const std::vector< Symbol > &data, int num_args, int axis=0)
 
Symbol squeeze (const std::vector< Symbol > &data, dmlc::optional< Shape > axis=dmlc::optional< Shape >())
 
Symbol depth_to_space (Symbol data, int block_size)
 
Symbol space_to_depth (Symbol data, int block_size)
 
Symbol zeros_like (Symbol data)
 
Symbol ones_like (Symbol data)
 
Symbol add_n (const std::vector< Symbol > &args)
 
Symbol argmax (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(), bool keepdims=false)
 
Symbol argmin (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(), bool keepdims=false)
 
Symbol argmax_channel (Symbol data)
 
Symbol pick (Symbol data, Symbol index, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool keepdims=false, PickMode mode=PickMode::kClip)
 
Symbol dot (Symbol lhs, Symbol rhs, bool transpose_a=false, bool transpose_b=false, DotForwardStype forward_stype=DotForwardStype::kNone)
 
Symbol batch_dot (Symbol lhs, Symbol rhs, bool transpose_a=false, bool transpose_b=false, Batch_dotForwardStype forward_stype=Batch_dotForwardStype::kNone)
 
Symbol broadcast_add (Symbol lhs, Symbol rhs)
 
Symbol broadcast_sub (Symbol lhs, Symbol rhs)
 
Symbol broadcast_mul (Symbol lhs, Symbol rhs)
 
Symbol broadcast_div (Symbol lhs, Symbol rhs)
 
Symbol broadcast_mod (Symbol lhs, Symbol rhs)
 
Symbol relu (Symbol data)
 
Symbol sigmoid (Symbol data)
 
Symbol hard_sigmoid (Symbol data, mx_float alpha=0.2, mx_float beta=0.5)
 
Symbol softsign (Symbol data)
 
Symbol BlockGrad (Symbol data)
 
Symbol make_loss (Symbol data)
 
Symbol reshape_like (Symbol lhs, Symbol rhs)
 
Symbol shape_array (Symbol data, dmlc::optional< int > lhs_begin=dmlc::optional< int >(), dmlc::optional< int > lhs_end=dmlc::optional< int >(), dmlc::optional< int > rhs_begin=dmlc::optional< int >(), dmlc::optional< int > rhs_end=dmlc::optional< int >())
 
Symbol size_array (Symbol data)
 
Symbol Cast (Symbol data, CastDtype dtype)
 
Symbol negative (Symbol data)
 
Symbol reciprocal (Symbol data)
 
Symbol abs (Symbol data)
 
Symbol sign (Symbol data)
 
Symbol round (Symbol data)
 
Symbol rint (Symbol data)
 
Symbol ceil (Symbol data)
 
Symbol floor (Symbol data)
 
Symbol trunc (Symbol data)
 
Symbol fix (Symbol data)
 
Symbol square (Symbol data)
 
Symbol sqrt (Symbol data)
 
Symbol rsqrt (Symbol data)
 
Symbol cbrt (Symbol data)
 
Symbol erf (Symbol data)
 
Symbol rcbrt (Symbol data)
 
Symbol exp (Symbol data)
 
Symbol log (Symbol data)
 
Symbol log10 (Symbol data)
 
Symbol log2 (Symbol data)
 
Symbol log1p (Symbol data)
 
Symbol expm1 (Symbol data)
 
Symbol gamma (Symbol data)
 
Symbol gammaln (Symbol data)
 
Symbol logical_not (Symbol data)
 
Symbol sum (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol mean (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol prod (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol nansum (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol nanprod (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol max (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol min (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
 
Symbol broadcast_axis (Symbol data, Shape axis=Shape(), Shape size=Shape())
 
Symbol broadcast_to (Symbol data, Shape shape=Shape())
 
Symbol broadcast_like (Symbol lhs, Symbol rhs, dmlc::optional< Shape > lhs_axes=dmlc::optional< Shape >(), dmlc::optional< Shape > rhs_axes=dmlc::optional< Shape >())
 
Symbol norm (Symbol data, int ord=2, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false)
 
Symbol topk (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), int k=1, TopkRetTyp ret_typ=TopkRetTyp::kIndices, bool is_ascend=false, TopkDtype dtype=TopkDtype::kFloat32)
 
Symbol sort (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)
 
Symbol argsort (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true, ArgsortDtype dtype=ArgsortDtype::kFloat32)
 
Symbol elemwise_add (Symbol lhs, Symbol rhs)
 
Symbol elemwise_sub (Symbol lhs, Symbol rhs)
 
Symbol elemwise_mul (Symbol lhs, Symbol rhs)
 
Symbol elemwise_div (Symbol lhs, Symbol rhs)
 
Symbol Embedding (Symbol data, Symbol weight, int input_dim, int output_dim, EmbeddingDtype dtype=EmbeddingDtype::kFloat32, bool sparse_grad=false)
 
Symbol take (Symbol a, Symbol indices, int axis=0, TakeMode mode=TakeMode::kClip)
 
Symbol batch_take (Symbol a, Symbol indices)
 
Symbol one_hot (Symbol indices, int depth, double on_value=1, double off_value=0, One_hotDtype dtype=One_hotDtype::kFloat32)
 
Symbol gather_nd (Symbol data, Symbol indices)
 
Symbol scatter_nd (Symbol data, Symbol indices, Shape shape)
 
Symbol broadcast_equal (Symbol lhs, Symbol rhs)
 
Symbol broadcast_not_equal (Symbol lhs, Symbol rhs)
 
Symbol broadcast_greater (Symbol lhs, Symbol rhs)
 
Symbol broadcast_greater_equal (Symbol lhs, Symbol rhs)
 
Symbol broadcast_lesser (Symbol lhs, Symbol rhs)
 
Symbol broadcast_lesser_equal (Symbol lhs, Symbol rhs)
 
Symbol broadcast_logical_and (Symbol lhs, Symbol rhs)
 
Symbol broadcast_logical_or (Symbol lhs, Symbol rhs)
 
Symbol broadcast_logical_xor (Symbol lhs, Symbol rhs)
 
Symbol diag (Symbol data, int k=0, int axis1=0, int axis2=1)
 
Symbol where (Symbol condition, Symbol x, Symbol y)
 
Symbol smooth_l1 (Symbol data, mx_float scalar)
 
Symbol cast_storage (Symbol data, Cast_storageStype stype)
 
Symbol sin (Symbol data)
 
Symbol cos (Symbol data)
 
Symbol tan (Symbol data)
 
Symbol arcsin (Symbol data)
 
Symbol arccos (Symbol data)
 
Symbol arctan (Symbol data)
 
Symbol degrees (Symbol data)
 
Symbol radians (Symbol data)
 
Symbol sinh (Symbol data)
 
Symbol cosh (Symbol data)
 
Symbol tanh (Symbol data)
 
Symbol arcsinh (Symbol data)
 
Symbol arccosh (Symbol data)
 
Symbol arctanh (Symbol data)
 
Symbol Pooling (Symbol data, Shape kernel=Shape(), PoolingPoolType pool_type=PoolingPoolType::kMax, bool global_pool=false, bool cudnn_off=false, PoolingPoolingConvention pooling_convention=PoolingPoolingConvention::kValid, Shape stride=Shape(), Shape pad=Shape(), dmlc::optional< int > p_value=dmlc::optional< int >(), dmlc::optional< bool > count_include_pad=dmlc::optional< bool >())
 
Symbol softmax (Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())
 
Symbol softmin (Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())
 
Symbol log_softmax (Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())
 
Symbol Deconvolution (Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), Shape adj=Shape(), Shape target_shape=Shape(), uint32_t num_group=1, uint64_t workspace=512, bool no_bias=true, DeconvolutionCudnnTune cudnn_tune=DeconvolutionCudnnTune::kNone, bool cudnn_off=false, DeconvolutionLayout layout=DeconvolutionLayout::kNone)
 
Symbol Activation (Symbol data, ActivationActType act_type)
 
Symbol BatchNorm (Symbol data, Symbol gamma, Symbol beta, Symbol moving_mean, Symbol moving_var, double eps=0.001, mx_float momentum=0.9, bool fix_gamma=true, bool use_global_stats=false, bool output_mean_var=false, int axis=1, bool cudnn_off=false)
 
Symbol CTCLoss (Symbol data, Symbol label, Symbol data_lengths, Symbol label_lengths, bool use_data_lengths=false, bool use_label_lengths=false, CTCLossBlankLabel blank_label=CTCLossBlankLabel::kFirst)
 
Symbol Convolution (Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), uint32_t num_group=1, uint64_t workspace=1024, bool no_bias=false, ConvolutionCudnnTune cudnn_tune=ConvolutionCudnnTune::kNone, bool cudnn_off=false, ConvolutionLayout layout=ConvolutionLayout::kNone)
 
Symbol UpSampling (const std::vector< Symbol > &data, int scale, UpSamplingSampleType sample_type, int num_args, int num_filter=0, UpSamplingMultiInputMode multi_input_mode=UpSamplingMultiInputMode::kConcat, uint64_t workspace=512)
 
Symbol Concat (const std::vector< Symbol > &data, int num_args, int dim=1)
 
Symbol LayerNorm (Symbol data, Symbol gamma, Symbol beta, int axis=-1, mx_float eps=1e-05, bool output_mean_var=false)
 
Symbol LRN (Symbol data, uint32_t nsize, mx_float alpha=0.0001, mx_float beta=0.75, mx_float knorm=2)
 
Symbol Dropout (Symbol data, mx_float p=0.5, DropoutMode mode=DropoutMode::kTraining, Shape axes=Shape())
 
Symbol SoftmaxActivation (Symbol data, SoftmaxActivationMode mode=SoftmaxActivationMode::kInstance)
 
Symbol FullyConnected (Symbol data, Symbol weight, Symbol bias, int num_hidden, bool no_bias=false, bool flatten=true)
 
Symbol Pad (Symbol data, PadMode mode, Shape pad_width, double constant_value=0)
 
Symbol LeakyReLU (Symbol data, Symbol gamma, LeakyReLUActType act_type=LeakyReLUActType::kLeaky, mx_float slope=0.25, mx_float lower_bound=0.125, mx_float upper_bound=0.334)
 
Symbol SwapAxis (Symbol data, uint32_t dim1=0, uint32_t dim2=0)
 
Symbol BatchNorm_v1 (Symbol data, Symbol gamma, Symbol beta, mx_float eps=0.001, mx_float momentum=0.9, bool fix_gamma=true, bool use_global_stats=false, bool output_mean_var=false)
 
Symbol softmax_cross_entropy (Symbol data, Symbol label)
 
Symbol LinearRegressionOutput (Symbol data, Symbol label, mx_float grad_scale=1)
 
Symbol MAERegressionOutput (Symbol data, Symbol label, mx_float grad_scale=1)
 
Symbol LogisticRegressionOutput (Symbol data, Symbol label, mx_float grad_scale=1)
 
Symbol IdentityAttachKLSparseReg (Symbol data, mx_float sparseness_target=0.1, mx_float penalty=0.001, mx_float momentum=0.9)
 
Symbol signsgd_update (Symbol weight, Symbol grad, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1)
 
Symbol signum_update (Symbol weight, Symbol grad, Symbol mom, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float wd_lh=0)
 
Symbol sgd_update (Symbol weight, Symbol grad, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol sgd_mom_update (Symbol weight, Symbol grad, Symbol mom, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol mp_sgd_update (Symbol weight, Symbol grad, Symbol weight32, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol mp_sgd_mom_update (Symbol weight, Symbol grad, Symbol mom, Symbol weight32, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol ftml_update (Symbol weight, Symbol grad, Symbol d, Symbol v, Symbol z, mx_float lr, int t, mx_float beta1=0.6, mx_float beta2=0.999, double epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_grad=-1)
 
Symbol adam_update (Symbol weight, Symbol grad, Symbol mean, Symbol var, mx_float lr, mx_float beta1=0.9, mx_float beta2=0.999, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)
 
Symbol rmsprop_update (Symbol weight, Symbol grad, Symbol n, mx_float lr, mx_float gamma1=0.95, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float clip_weights=-1)
 
Symbol rmspropalex_update (Symbol weight, Symbol grad, Symbol n, Symbol g, Symbol delta, mx_float lr, mx_float gamma1=0.95, mx_float gamma2=0.9, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float clip_weights=-1)
 
Symbol ftrl_update (Symbol weight, Symbol grad, Symbol z, Symbol n, mx_float lr, mx_float lamda1=0.01, mx_float beta=1, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1)
 
Symbol SliceChannel (Symbol data, int num_outputs, int axis=1, bool squeeze_axis=false)
 
Symbol InstanceNorm (Symbol data, Symbol gamma, Symbol beta, mx_float eps=0.001)
 
Symbol GridGenerator (Symbol data, GridGeneratorTransformType transform_type, Shape target_shape=Shape(0, 0))
 
Symbol Pooling_v1 (Symbol data, Shape kernel=Shape(), Pooling_v1PoolType pool_type=Pooling_v1PoolType::kMax, bool global_pool=false, Pooling_v1PoolingConvention pooling_convention=Pooling_v1PoolingConvention::kValid, Shape stride=Shape(), Shape pad=Shape())
 
Symbol RNN (Symbol data, Symbol parameters, Symbol state, Symbol state_cell, uint32_t state_size, uint32_t num_layers, RNNMode mode, bool bidirectional=false, mx_float p=0, bool state_outputs=false, dmlc::optional< int > projection_size=dmlc::optional< int >(), dmlc::optional< double > lstm_state_clip_min=dmlc::optional< double >(), dmlc::optional< double > lstm_state_clip_max=dmlc::optional< double >(), bool lstm_state_clip_nan=false)
 
Symbol Convolution_v1 (Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), uint32_t num_group=1, uint64_t workspace=1024, bool no_bias=false, Convolution_v1CudnnTune cudnn_tune=Convolution_v1CudnnTune::kNone, bool cudnn_off=false, Convolution_v1Layout layout=Convolution_v1Layout::kNone)
 
Symbol Crop (const std::vector< Symbol > &data, int num_args, Shape offset=Shape(0, 0), Shape h_w=Shape(0, 0), bool center_crop=false)
 
Symbol SequenceReverse (Symbol data, Symbol sequence_length, bool use_sequence_length=false, int axis=0)
 
Symbol SpatialTransformer (Symbol data, Symbol loc, SpatialTransformerTransformType transform_type, SpatialTransformerSamplerType sampler_type, Shape target_shape=Shape(0, 0), dmlc::optional< bool > cudnn_off=dmlc::optional< bool >())
 
Symbol SoftmaxOutput (Symbol data, Symbol label, mx_float grad_scale=1, mx_float ignore_label=-1, bool multi_output=false, bool use_ignore=false, bool preserve_shape=false, SoftmaxOutputNormalization normalization=SoftmaxOutputNormalization::kNull, bool out_grad=false, mx_float smooth_alpha=0)
 
Symbol Softmax (Symbol data, mx_float grad_scale=1, mx_float ignore_label=-1, bool multi_output=false, bool use_ignore=false, bool preserve_shape=false, SoftmaxNormalization normalization=SoftmaxNormalization::kNull, bool out_grad=false, mx_float smooth_alpha=0)
 
Symbol BilinearSampler (Symbol data, Symbol grid, dmlc::optional< bool > cudnn_off=dmlc::optional< bool >())
 
Symbol ROIPooling (Symbol data, Symbol rois, Shape pooled_size, mx_float spatial_scale)
 
Symbol SequenceLast (Symbol data, Symbol sequence_length, bool use_sequence_length=false, int axis=0)
 
Symbol L2Normalization (Symbol data, mx_float eps=1e-10, L2NormalizationMode mode=L2NormalizationMode::kInstance)
 
Symbol MakeLoss (Symbol data, mx_float grad_scale=1, mx_float valid_thresh=0, MakeLossNormalization normalization=MakeLossNormalization::kNull)
 
Symbol SVMOutput (Symbol data, Symbol label, mx_float margin=1, mx_float regularization_coefficient=1, bool use_linear=false)
 
Symbol Correlation (Symbol data1, Symbol data2, uint32_t kernel_size=1, uint32_t max_displacement=1, uint32_t stride1=1, uint32_t stride2=1, uint32_t pad_size=0, bool is_multiply=true)
 
Symbol SequenceMask (Symbol data, Symbol sequence_length, bool use_sequence_length=false, mx_float value=0, int axis=0)
 
Symbol choose_element_0index (Symbol lhs, Symbol rhs)
 
Symbol fill_element_0index (Symbol lhs, Symbol mhs, Symbol rhs)
 
Symbol _Plus (Symbol lhs, Symbol rhs)
 
Symbol _Mul (Symbol lhs, Symbol rhs)
 
Symbol _Minus (Symbol lhs, Symbol rhs)
 
Symbol _Div (Symbol lhs, Symbol rhs)
 
Symbol _Mod (Symbol lhs, Symbol rhs)
 
Symbol _Power (Symbol lhs, Symbol rhs)
 
Symbol _Maximum (Symbol lhs, Symbol rhs)
 
Symbol _Minimum (Symbol lhs, Symbol rhs)
 
Symbol _PlusScalar (Symbol lhs, mx_float scalar)
 
Symbol _MinusScalar (Symbol lhs, mx_float scalar)
 
Symbol _RMinusScalar (mx_float scalar, Symbol rhs)
 
Symbol _MulScalar (Symbol lhs, mx_float scalar)
 
Symbol _DivScalar (Symbol lhs, mx_float scalar)
 
Symbol _RDivScalar (mx_float scalar, Symbol rhs)
 
Symbol _ModScalar (Symbol lhs, mx_float scalar)
 
Symbol _RModScalar (mx_float scalar, Symbol rhs)
 
Symbol _PowerScalar (Symbol lhs, mx_float scalar)
 
Symbol _RPowerScalar (mx_float scalar, Symbol rhs)
 
Symbol _MaximumScalar (Symbol lhs, mx_float scalar)
 
Symbol _MinimumScalar (Symbol lhs, mx_float scalar)
 
Symbol Crop (const std::string &symbol_name, int num_args, Symbol data, Symbol crop_like, Shape offset=Shape(0, 0), Shape h_w=Shape(0, 0), bool center_crop=false)
 
Symbol Activation (const std::string &symbol_name, Symbol data, const std::string &act_type)
 Apply activation function to input. Softmax Activation is only available with CUDNN on GPUand will be computed at each location across channel if input is 4D. More...
 
std::ostream & operator<< (std::ostream &os, const Shape &shape)
 allow string printing of the shape More...
 
std::istream & operator>> (std::istream &is, Shape &shape)
 read shape from the istream More...
 
Symbol operator+ (mx_float lhs, const Symbol &rhs)
 
Symbol operator- (mx_float lhs, const Symbol &rhs)
 
Symbol operator* (mx_float lhs, const Symbol &rhs)
 
Symbol operator/ (mx_float lhs, const Symbol &rhs)
 
Symbol operator% (mx_float lhs, const Symbol &rhs)
 

Typedef Documentation

typedef unsigned mxnet::cpp::index_t
typedef std::function<Optimizer*()> mxnet::cpp::OptimizerCreator

Enumeration Type Documentation

Activation function to be applied.

Enumerator
kRelu 
kSigmoid 
kSoftrelu 
kSoftsign 
kTanh 

DType of the output indices. It is only valid when ret_typ is "indices" or "both". An error will be raised if the selected data type cannot precisely

Enumerator
kFloat16 
kFloat32 
kFloat64 
kInt32 
kUint8 

The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still produce an output of the

Enumerator
kNone 
kCsr 
kDefault 
kRow_sparse 

Output storage type.

Enumerator
kCsr 
kDefault 
kRow_sparse 
enum mxnet::cpp::CastDtype
strong

Output data type.

Enumerator
kFloat16 
kFloat32 
kFloat64 
kInt32 
kInt64 
kInt8 
kUint8 

Whether to pick convolution algo by running performance test. Leads to higher startup time but may give faster speed. Options are: 'off': no tuning 'limited_workspace': run test and pick the fastest algorithm that doesn't 'fastest': pick the fastest algorithm and ignore workspace limit. If set to None (default), behavior is determined by environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off, 1 for limited workspace (default), 2 for fastest.

Enumerator
kNone 
kFastest 
kLimited_workspace 
kOff 

Set layout for input, output and weight. Empty for default layout: NCHW for 2d and NCDHW for 3d.

Enumerator
kNone 
kNCDHW 
kNCHW 
kNDHWC 
kNHWC 

Whether to pick convolution algo by running performance test.

Enumerator
kNone 
kFastest 
kLimited_workspace 
kOff 

Set layout for input, output and weight. Empty for default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are

Enumerator
kNone 
kNCDHW 
kNCHW 
kNCW 
kNDHWC 
kNHWC 

Set the label that is reserved for blank label.If "first", 0-th label is reserved, and label values for tokens in the vocabulary are between 1 and alphabet_size-1, and the padding mask is -1. If "last", last label value alphabet_size-1 is reserved for blank label instead, and label values for tokens in the vocabulary are between 0 and alphabet_size-2, and the

Enumerator
kFirst 
kLast 

Whether to pick convolution algorithm by running performance test.

Enumerator
kNone 
kFastest 
kLimited_workspace 
kOff 

Set layout for input, output and weight. Empty for default layout, NCW for 1d,

Enumerator
kNone 
kNCDHW 
kNCHW 
kNCW 
kNDHWC 
kNHWC 
Enumerator
kCPU 
kGPU 
kCPUPinned 

The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still produce an output of the

Enumerator
kNone 
kCsr 
kDefault 
kRow_sparse 

Whether to only turn on dropout during training or to also turn on for

Enumerator
kAlways 
kTraining 

Data type of weight.

Enumerator
kFloat16 
kFloat32 
kFloat64 
kInt32 
kInt64 
kInt8 
kUint8 

The type of transformation. For affine, input data should be an affine matrix of size (batch, 6). For warp, input data should be an optical flow of size

Enumerator
kAffine 
kWarp 

Specify the dimension along which to compute L2 norm.

Enumerator
kChannel 
kInstance 
kSpatial 

Activation function to be applied.

Enumerator
kElu 
kLeaky 
kPrelu 
kRrelu 
kSelu 

If this is set to null, the output gradient will not be normalized. If this is set to batch, the output gradient will be divided by the batch size. If this is set to valid, the output gradient will be divided by the number of valid input

Enumerator
kBatch 
kNull 
kValid 

DType of the output

Enumerator
kFloat16 
kFloat32 
kFloat64 
kInt32 
kInt64 
kInt8 
kUint8 
Enumerator
kNullOp 

no operation, do not write anything

kWriteTo 

write gradient to provided space

kWriteInplace 

perform an inplace write, Target shares memory with one of input arguments. This option only happen when

kAddTo 

add to the provided space

enum mxnet::cpp::PadMode
strong

Padding type to use. "constant" pads with constant_value "edge" pads using the edge values of the input array "reflect" pads by reflecting values with

Enumerator
kConstant 
kEdge 
kReflect 
enum mxnet::cpp::PickMode
strong

Specify how out-of-bound indices behave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap" means to wrap

Enumerator
kClip 
kWrap 

Pooling convention to be applied.

Enumerator
kFull 
kValid 

Pooling type to be applied.

Enumerator
kAvg 
kMax 
kSum 

Pooling convention to be applied.

Enumerator
kFull 
kSame 
kValid 

Pooling type to be applied.

Enumerator
kAvg 
kLp 
kMax 
kSum 
enum mxnet::cpp::RNNMode
strong

the type of RNN to compute

Enumerator
kGru 
kLstm 
kRnn_relu 
kRnn_tanh 

Specifies how to compute the softmax. If set to instance, it computes softmax for each instance. If set to channel, It computes cross channel

Enumerator
kChannel 
kInstance 

Normalizes the gradient.

Enumerator
kBatch 
kNull 
kValid 

Normalizes the gradient.

Enumerator
kBatch 
kNull 
kValid 

sampling type

Enumerator
kBilinear 

transformation type

Enumerator
kAffine 
enum mxnet::cpp::TakeMode
strong

Specify how out-of-bound indices bahave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap" means to wrap

Enumerator
kClip 
kRaise 
kWrap 
enum mxnet::cpp::TopkDtype
strong

DType of the output indices when ret_typ is "indices" or "both". An error will

Enumerator
kFloat16 
kFloat32 
kFloat64 
kInt32 
kUint8 

The return type. "value" means to return the top k values, "indices" means to return the indices of the top k values, "mask" means to return a mask array containing 0 and 1. 1 means the top k values. "both" means to return a list of both values and

Enumerator
kBoth 
kIndices 
kMask 
kValue 

How to handle multiple input. concat means concatenate upsampled images along the channel dimension. sum means add all images together, only available for

Enumerator
kConcat 
kSum 

upsampling method

Enumerator
kBilinear 
kNearest 

Function Documentation

NDArray mxnet::cpp::_default_monitor_func ( const NDArray x)

Default function for monitor that computes statistics of the input tensor, which is the mean absolute |x|/size(x)

Parameters
xThe input tensor
Returns
The statistics of the input tensor
Symbol mxnet::cpp::_Div ( Symbol  lhs,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_DivScalar ( Symbol  lhs,
mx_float  scalar 
)
inline
Symbol mxnet::cpp::_Maximum ( Symbol  lhs,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_MaximumScalar ( Symbol  lhs,
mx_float  scalar 
)
inline
Symbol mxnet::cpp::_Minimum ( Symbol  lhs,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_MinimumScalar ( Symbol  lhs,
mx_float  scalar 
)
inline
Symbol mxnet::cpp::_Minus ( Symbol  lhs,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_MinusScalar ( Symbol  lhs,
mx_float  scalar 
)
inline
Symbol mxnet::cpp::_Mod ( Symbol  lhs,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_ModScalar ( Symbol  lhs,
mx_float  scalar 
)
inline
Symbol mxnet::cpp::_Mul ( Symbol  lhs,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_MulScalar ( Symbol  lhs,
mx_float  scalar 
)
inline
Symbol mxnet::cpp::_Plus ( Symbol  lhs,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_PlusScalar ( Symbol  lhs,
mx_float  scalar 
)
inline
Symbol mxnet::cpp::_Power ( Symbol  lhs,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_PowerScalar ( Symbol  lhs,
mx_float  scalar 
)
inline
Symbol mxnet::cpp::_RDivScalar ( mx_float  scalar,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_RMinusScalar ( mx_float  scalar,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_RModScalar ( mx_float  scalar,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::_RPowerScalar ( mx_float  scalar,
Symbol  rhs 
)
inline
Symbol mxnet::cpp::abs ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise absolute value of the input.

   Example::

   abs([-2, 0, 3]) = [2, 0, 3]

   The storage type of ``abs`` output depends upon the input storage type:

   - abs(default) = default
   - abs(row_sparse) = row_sparse
   - abs(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L662
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::abs ( Symbol  data)
inline

Returns element-wise absolute value of the input.

   Example::

   abs([-2, 0, 3]) = [2, 0, 3]

   The storage type of ``abs`` output depends upon the input storage type:

   - abs(default) = default
   - abs(row_sparse) = row_sparse
   - abs(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L662
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::Activation ( const std::string &  symbol_name,
Symbol  data,
const std::string &  act_type 
)
inline

Apply activation function to input. Softmax Activation is only available with CUDNN on GPUand will be computed at each location across channel if input is 4D.

Parameters
symbol_namename of the resulting symbol.
dataInput data to activation function.
act_typeActivation function to be applied.
Returns
new symbol
Symbol mxnet::cpp::Activation ( const std::string &  symbol_name,
Symbol  data,
ActivationActType  act_type 
)
inline

Applies an activation function element-wise to the input.

   The following activation functions are supported:

   - `relu`: Rectified Linear Unit, :math:`y = max(x, 0)`
   - `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
   - `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) +
   - `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
   - `softsign`: :math:`y = \frac{x}{1 + abs(x)}`



   Defined in src/operator/nn/activation.cc:L168
Parameters
symbol_namename of the resulting symbol
dataThe input array.
act_typeActivation function to be applied.
Returns
new symbol
Symbol mxnet::cpp::Activation ( Symbol  data,
ActivationActType  act_type 
)
inline

Applies an activation function element-wise to the input.

   The following activation functions are supported:

   - `relu`: Rectified Linear Unit, :math:`y = max(x, 0)`
   - `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
   - `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) +
   - `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
   - `softsign`: :math:`y = \frac{x}{1 + abs(x)}`



   Defined in src/operator/nn/activation.cc:L168
Parameters
dataThe input array.
act_typeActivation function to be applied.
Returns
new symbol
Symbol mxnet::cpp::adam_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  mean,
Symbol  var,
mx_float  lr,
mx_float  beta1 = 0.9,
mx_float  beta2 = 0.999,
mx_float  epsilon = 1e-08,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Update function for Adam optimizer. Adam is seen as a generalization of AdaGrad.

Adam update consists of the following steps, where g represents gradient and m, are 1st and 2nd order moment estimates (mean and variance).

.. math::

g_t = J(W_{t-1})\ m_t = m_{t-1} + (1 - ) g_t\ v_t = v_{t-1} + (1 - ) g_t^2\ W_t = W_{t-1} - { m_t }{ { v_t } + }

It updates the weights using::

m = beta1*m + (1-beta1)*grad v = beta2*v + (1-beta2)*(grad**2) w += - learning_rate * m / (sqrt(v) + epsilon)

However, if grad's storage type is row_sparse, lazy_update is True and type of weight is the same as those of m and v, only the row slices whose indices appear in grad.indices are updated (for w, m

for row in grad.indices: m[row] = beta1*m[row] + (1-beta1)*grad[row] v[row] = beta2*v[row] + (1-beta2)*(grad[row]**2) w[row] += - learning_rate * m[row] / (sqrt(v[row]) + epsilon)

   Defined in src/operator/optimizer_op.cc:L495
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
meanMoving mean
varMoving variance
lrLearning rate
beta1The decay rate for the 1st moment estimates.
beta2The decay rate for the 2nd moment estimates.
epsilonA small constant for numerical stability.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse
Returns
new symbol
Symbol mxnet::cpp::adam_update ( Symbol  weight,
Symbol  grad,
Symbol  mean,
Symbol  var,
mx_float  lr,
mx_float  beta1 = 0.9,
mx_float  beta2 = 0.999,
mx_float  epsilon = 1e-08,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Update function for Adam optimizer. Adam is seen as a generalization of AdaGrad.

Adam update consists of the following steps, where g represents gradient and m, are 1st and 2nd order moment estimates (mean and variance).

.. math::

g_t = J(W_{t-1})\ m_t = m_{t-1} + (1 - ) g_t\ v_t = v_{t-1} + (1 - ) g_t^2\ W_t = W_{t-1} - { m_t }{ { v_t } + }

It updates the weights using::

m = beta1*m + (1-beta1)*grad v = beta2*v + (1-beta2)*(grad**2) w += - learning_rate * m / (sqrt(v) + epsilon)

However, if grad's storage type is row_sparse, lazy_update is True and type of weight is the same as those of m and v, only the row slices whose indices appear in grad.indices are updated (for w, m

for row in grad.indices: m[row] = beta1*m[row] + (1-beta1)*grad[row] v[row] = beta2*v[row] + (1-beta2)*(grad[row]**2) w[row] += - learning_rate * m[row] / (sqrt(v[row]) + epsilon)

   Defined in src/operator/optimizer_op.cc:L495
Parameters
weightWeight
gradGradient
meanMoving mean
varMoving variance
lrLearning rate
beta1The decay rate for the 1st moment estimates.
beta2The decay rate for the 2nd moment estimates.
epsilonA small constant for numerical stability.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse
Returns
new symbol
Symbol mxnet::cpp::add_n ( const std::string &  symbol_name,
const std::vector< Symbol > &  args 
)
inline

Adds all input arguments element-wise.

   .. math::
   add\_n(a_1, a_2, ..., a_n) = a_1 + a_2 + ... + a_n

   ``add_n`` is potentially more efficient than calling ``add`` by `n` times.

   The storage type of ``add_n`` output depends on storage types of inputs

   - add_n(row_sparse, row_sparse, ..) = row_sparse
   - add_n(default, csr, default) = default
   - add_n(any input combinations longer than 4 (>4) with at least one default
   - otherwise, ``add_n`` falls all inputs back to default storage and generates



   Defined in src/operator/tensor/elemwise_sum.cc:L156
Parameters
symbol_namename of the resulting symbol
argsPositional input arguments
Returns
new symbol
Symbol mxnet::cpp::add_n ( const std::vector< Symbol > &  args)
inline

Adds all input arguments element-wise.

   .. math::
   add\_n(a_1, a_2, ..., a_n) = a_1 + a_2 + ... + a_n

   ``add_n`` is potentially more efficient than calling ``add`` by `n` times.

   The storage type of ``add_n`` output depends on storage types of inputs

   - add_n(row_sparse, row_sparse, ..) = row_sparse
   - add_n(default, csr, default) = default
   - add_n(any input combinations longer than 4 (>4) with at least one default
   - otherwise, ``add_n`` falls all inputs back to default storage and generates



   Defined in src/operator/tensor/elemwise_sum.cc:L156
Parameters
argsPositional input arguments
Returns
new symbol
Symbol mxnet::cpp::arccos ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise inverse cosine of the input array.

   The input should be in range `[-1, 1]`.
   The output is in the closed interval :math:`[0, \pi]`

   .. math::
   arccos([-1, -.707, 0, .707, 1]) = [\pi, 3\pi/4, \pi/2, \pi/4, 0]

   The storage type of ``arccos`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L123
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arccos ( Symbol  data)
inline

Returns element-wise inverse cosine of the input array.

   The input should be in range `[-1, 1]`.
   The output is in the closed interval :math:`[0, \pi]`

   .. math::
   arccos([-1, -.707, 0, .707, 1]) = [\pi, 3\pi/4, \pi/2, \pi/4, 0]

   The storage type of ``arccos`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L123
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arccosh ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the element-wise inverse hyperbolic cosine of the input array, \ computed element-wise.

The storage type of arccosh output is always dense

   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L264
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arccosh ( Symbol  data)
inline

Returns the element-wise inverse hyperbolic cosine of the input array, \ computed element-wise.

The storage type of arccosh output is always dense

   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L264
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arcsin ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise inverse sine of the input array.

   The input should be in the range `[-1, 1]`.
   The output is in the closed interval of [:math:`-\pi/2`, :math:`\pi/2`].

   .. math::
   arcsin([-1, -.707, 0, .707, 1]) = [-\pi/2, -\pi/4, 0, \pi/4, \pi/2]

   The storage type of ``arcsin`` output depends upon the input storage type:

   - arcsin(default) = default
   - arcsin(row_sparse) = row_sparse
   - arcsin(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L104
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arcsin ( Symbol  data)
inline

Returns element-wise inverse sine of the input array.

   The input should be in the range `[-1, 1]`.
   The output is in the closed interval of [:math:`-\pi/2`, :math:`\pi/2`].

   .. math::
   arcsin([-1, -.707, 0, .707, 1]) = [-\pi/2, -\pi/4, 0, \pi/4, \pi/2]

   The storage type of ``arcsin`` output depends upon the input storage type:

   - arcsin(default) = default
   - arcsin(row_sparse) = row_sparse
   - arcsin(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L104
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arcsinh ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the element-wise inverse hyperbolic sine of the input array, \ computed element-wise.

The storage type of arcsinh output depends upon the input storage type:

  • arcsinh(default) = default
  • arcsinh(row_sparse) = row_sparse
  • arcsinh(csr) = csr
   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L250
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arcsinh ( Symbol  data)
inline

Returns the element-wise inverse hyperbolic sine of the input array, \ computed element-wise.

The storage type of arcsinh output depends upon the input storage type:

  • arcsinh(default) = default
  • arcsinh(row_sparse) = row_sparse
  • arcsinh(csr) = csr
   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L250
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arctan ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise inverse tangent of the input array.

   The output is in the closed interval :math:`[-\pi/2, \pi/2]`

   .. math::
   arctan([-1, 0, 1]) = [-\pi/4, 0, \pi/4]

   The storage type of ``arctan`` output depends upon the input storage type:

   - arctan(default) = default
   - arctan(row_sparse) = row_sparse
   - arctan(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L144
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arctan ( Symbol  data)
inline

Returns element-wise inverse tangent of the input array.

   The output is in the closed interval :math:`[-\pi/2, \pi/2]`

   .. math::
   arctan([-1, 0, 1]) = [-\pi/4, 0, \pi/4]

   The storage type of ``arctan`` output depends upon the input storage type:

   - arctan(default) = default
   - arctan(row_sparse) = row_sparse
   - arctan(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L144
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arctanh ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the element-wise inverse hyperbolic tangent of the input array, \ computed element-wise.

The storage type of arctanh output depends upon the input storage type:

  • arctanh(default) = default
  • arctanh(row_sparse) = row_sparse
  • arctanh(csr) = csr
   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L281
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::arctanh ( Symbol  data)
inline

Returns the element-wise inverse hyperbolic tangent of the input array, \ computed element-wise.

The storage type of arctanh output depends upon the input storage type:

  • arctanh(default) = default
  • arctanh(row_sparse) = row_sparse
  • arctanh(csr) = csr
   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L281
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::argmax ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(),
bool  keepdims = false 
)
inline

Returns indices of the maximum values along an axis.

   In the case of multiple occurrences of maximum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   // argmax along axis 0
   argmax(x, axis=0) = [ 1.,  1.,  1.]

   // argmax along axis 1
   argmax(x, axis=1) = [ 2.,  2.]

   // argmax along axis 1 keeping same dims as an input array
   argmax(x, axis=1, keepdims=True) = [[ 2.],
   [ 2.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L52
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis along which to perform the reduction. Negative values means indexing from right to left. `Requires axis to be set as int, because global \param keepdims If this is set toTrue`, the reduced axis is left in the result as
Returns
new symbol
Symbol mxnet::cpp::argmax ( Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(),
bool  keepdims = false 
)
inline

Returns indices of the maximum values along an axis.

   In the case of multiple occurrences of maximum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   // argmax along axis 0
   argmax(x, axis=0) = [ 1.,  1.,  1.]

   // argmax along axis 1
   argmax(x, axis=1) = [ 2.,  2.]

   // argmax along axis 1 keeping same dims as an input array
   argmax(x, axis=1, keepdims=True) = [[ 2.],
   [ 2.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L52
Parameters
dataThe input
axisThe axis along which to perform the reduction. Negative values means indexing from right to left. `Requires axis to be set as int, because global \param keepdims If this is set toTrue`, the reduced axis is left in the result as
Returns
new symbol
Symbol mxnet::cpp::argmax_channel ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns argmax indices of each channel from the input array.

   The result will be an NDArray of shape (num_channel,).

   In case of multiple occurrences of the maximum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   argmax_channel(x) = [ 2.,  2.]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L97
Parameters
symbol_namename of the resulting symbol
dataThe input array
Returns
new symbol
Symbol mxnet::cpp::argmax_channel ( Symbol  data)
inline

Returns argmax indices of each channel from the input array.

   The result will be an NDArray of shape (num_channel,).

   In case of multiple occurrences of the maximum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   argmax_channel(x) = [ 2.,  2.]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L97
Parameters
dataThe input array
Returns
new symbol
Symbol mxnet::cpp::argmin ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(),
bool  keepdims = false 
)
inline

Returns indices of the minimum values along an axis.

   In the case of multiple occurrences of minimum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   // argmin along axis 0
   argmin(x, axis=0) = [ 0.,  0.,  0.]

   // argmin along axis 1
   argmin(x, axis=1) = [ 0.,  0.]

   // argmin along axis 1 keeping same dims as an input array
   argmin(x, axis=1, keepdims=True) = [[ 0.],
   [ 0.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L77
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis along which to perform the reduction. Negative values means indexing from right to left. `Requires axis to be set as int, because global \param keepdims If this is set toTrue`, the reduced axis is left in the result as
Returns
new symbol
Symbol mxnet::cpp::argmin ( Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(),
bool  keepdims = false 
)
inline

Returns indices of the minimum values along an axis.

   In the case of multiple occurrences of minimum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   // argmin along axis 0
   argmin(x, axis=0) = [ 0.,  0.,  0.]

   // argmin along axis 1
   argmin(x, axis=1) = [ 0.,  0.]

   // argmin along axis 1 keeping same dims as an input array
   argmin(x, axis=1, keepdims=True) = [[ 0.],
   [ 0.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L77
Parameters
dataThe input
axisThe axis along which to perform the reduction. Negative values means indexing from right to left. `Requires axis to be set as int, because global \param keepdims If this is set toTrue`, the reduced axis is left in the result as
Returns
new symbol
Symbol mxnet::cpp::argsort ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(-1),
bool  is_ascend = true,
ArgsortDtype  dtype = ArgsortDtype::kFloat32 
)
inline

Returns the indices that would sort an input array along the given axis.

   This function performs sorting along the given axis and returns an array of
   as an input array that index data in sorted order.

   Examples::

   x = [[ 0.3,  0.2,  0.4],
   [ 0.1,  0.3,  0.2]]

   // sort along axis -1
   argsort(x) = [[ 1.,  0.,  2.],
   [ 0.,  2.,  1.]]

   // sort along axis 0
   argsort(x, axis=0) = [[ 1.,  0.,  1.]
   [ 0.,  1.,  0.]]

   // flatten and then sort
   argsort(x) = [ 3.,  1.,  5.,  0.,  4.,  2.]


   Defined in src/operator/tensor/ordering_op.cc:L177
Parameters
symbol_namename of the resulting symbol
dataThe input array
axisAxis along which to sort the input tensor. If not given, the flattened
is_ascendWhether to sort in ascending or descending order.
dtypeDType of the output indices. It is only valid when ret_typ is "indices" or "both". An error will be raised if the selected data type cannot precisely
Returns
new symbol
Symbol mxnet::cpp::argsort ( Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(-1),
bool  is_ascend = true,
ArgsortDtype  dtype = ArgsortDtype::kFloat32 
)
inline

Returns the indices that would sort an input array along the given axis.

   This function performs sorting along the given axis and returns an array of
   as an input array that index data in sorted order.

   Examples::

   x = [[ 0.3,  0.2,  0.4],
   [ 0.1,  0.3,  0.2]]

   // sort along axis -1
   argsort(x) = [[ 1.,  0.,  2.],
   [ 0.,  2.,  1.]]

   // sort along axis 0
   argsort(x, axis=0) = [[ 1.,  0.,  1.]
   [ 0.,  1.,  0.]]

   // flatten and then sort
   argsort(x) = [ 3.,  1.,  5.,  0.,  4.,  2.]


   Defined in src/operator/tensor/ordering_op.cc:L177
Parameters
dataThe input array
axisAxis along which to sort the input tensor. If not given, the flattened
is_ascendWhether to sort in ascending or descending order.
dtypeDType of the output indices. It is only valid when ret_typ is "indices" or "both". An error will be raised if the selected data type cannot precisely
Returns
new symbol
Symbol mxnet::cpp::batch_dot ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs,
bool  transpose_a = false,
bool  transpose_b = false,
Batch_dotForwardStype  forward_stype = Batch_dotForwardStype::kNone 
)
inline

Batchwise dot product.

   ``batch_dot`` is used to compute dot product of ``x`` and ``y`` when ``x`` and
   ``y`` are data in batch, namely 3D arrays in shape of `(batch_size, :, :)`.

   For example, given ``x`` with shape `(batch_size, n, m)` and ``y`` with shape
   `(batch_size, m, k)`, the result array will have shape `(batch_size, n, k)`,
   which is computed by::

   batch_dot(x,y)[i,:,:] = dot(x[i,:,:], y[i,:,:])



   Defined in src/operator/tensor/dot.cc:L125
Parameters
symbol_namename of the resulting symbol
lhsThe first input
rhsThe second input
transpose_aIf true then transpose the first input before dot.
transpose_bIf true then transpose the second input before dot.
forward_stypeThe desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still
Returns
new symbol
Symbol mxnet::cpp::batch_dot ( Symbol  lhs,
Symbol  rhs,
bool  transpose_a = false,
bool  transpose_b = false,
Batch_dotForwardStype  forward_stype = Batch_dotForwardStype::kNone 
)
inline

Batchwise dot product.

   ``batch_dot`` is used to compute dot product of ``x`` and ``y`` when ``x`` and
   ``y`` are data in batch, namely 3D arrays in shape of `(batch_size, :, :)`.

   For example, given ``x`` with shape `(batch_size, n, m)` and ``y`` with shape
   `(batch_size, m, k)`, the result array will have shape `(batch_size, n, k)`,
   which is computed by::

   batch_dot(x,y)[i,:,:] = dot(x[i,:,:], y[i,:,:])



   Defined in src/operator/tensor/dot.cc:L125
Parameters
lhsThe first input
rhsThe second input
transpose_aIf true then transpose the first input before dot.
transpose_bIf true then transpose the second input before dot.
forward_stypeThe desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still
Returns
new symbol
Symbol mxnet::cpp::batch_take ( const std::string &  symbol_name,
Symbol  a,
Symbol  indices 
)
inline

Takes elements from a data batch.

   .. note::
   `batch_take` is deprecated. Use `pick` instead.

   Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
   an output array of shape ``(i0,)`` with::

   output[i] = input[i, indices[i]]

   Examples::

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // takes elements with specified indices
   batch_take(x, [0,1,0]) = [ 1.  4.  5.]



   Defined in src/operator/tensor/indexing_op.cc:L750
Parameters
symbol_namename of the resulting symbol
aThe input array
indicesThe index array
Returns
new symbol
Symbol mxnet::cpp::batch_take ( Symbol  a,
Symbol  indices 
)
inline

Takes elements from a data batch.

   .. note::
   `batch_take` is deprecated. Use `pick` instead.

   Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
   an output array of shape ``(i0,)`` with::

   output[i] = input[i, indices[i]]

   Examples::

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // takes elements with specified indices
   batch_take(x, [0,1,0]) = [ 1.  4.  5.]



   Defined in src/operator/tensor/indexing_op.cc:L750
Parameters
aThe input array
indicesThe index array
Returns
new symbol
Symbol mxnet::cpp::BatchNorm ( const std::string &  symbol_name,
Symbol  data,
Symbol  gamma,
Symbol  beta,
Symbol  moving_mean,
Symbol  moving_var,
double  eps = 0.001,
mx_float  momentum = 0.9,
bool  fix_gamma = true,
bool  use_global_stats = false,
bool  output_mean_var = false,
int  axis = 1,
bool  cudnn_off = false 
)
inline

Batch normalization.

   Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis:

   .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

   Then compute the normalized output, which has the same shape as input, as

   .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] -

   Both *mean* and *var* returns a scalar by treating the input as a vector.

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   the inverse of ``data_var``, which are needed for the backward pass. Note that
   two outputs are blocked.

   Besides the inputs and the outputs, this operator accepts two auxiliary
   states, ``moving_mean`` and ``moving_var``, which are *k*-length
   vectors. They are global statistics for the whole dataset, which are updated
   by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

   If ``use_global_stats`` is set to be true, then ``moving_mean`` and
   ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
   the output. It is often used during inference.

   The parameter ``axis`` specifies which axis of the input shape denotes
   the 'channel' (separately normalized groups).  The default is 1.  Specifying -1
   axis to be the last item in the input shape.

   Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
   then set ``gamma`` to 1 and its gradient to 0.

   .. Note::
   When ``fix_gamma`` is set to True, no sparse support is provided. If
   the sparse tensors will fallback.



   Defined in src/operator/nn/batch_norm.cc:L574
Parameters
symbol_namename of the resulting symbol
dataInput data to batch normalization
gammagamma array
betabeta array
moving_meanrunning mean of input
moving_varrunning variance of input
epsEpsilon to prevent div 0. Must be no less than CUDNN_BN_MIN_EPSILON defined
momentumMomentum for moving average
fix_gammaFix gamma while training
use_global_statsWhether use global moving statistics instead of local
output_mean_varOutput the mean and inverse std
axisSpecify which shape axis the channel is specified
cudnn_offDo not select CUDNN operator, if available
Returns
new symbol
Symbol mxnet::cpp::BatchNorm ( Symbol  data,
Symbol  gamma,
Symbol  beta,
Symbol  moving_mean,
Symbol  moving_var,
double  eps = 0.001,
mx_float  momentum = 0.9,
bool  fix_gamma = true,
bool  use_global_stats = false,
bool  output_mean_var = false,
int  axis = 1,
bool  cudnn_off = false 
)
inline

Batch normalization.

   Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis:

   .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

   Then compute the normalized output, which has the same shape as input, as

   .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] -

   Both *mean* and *var* returns a scalar by treating the input as a vector.

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   the inverse of ``data_var``, which are needed for the backward pass. Note that
   two outputs are blocked.

   Besides the inputs and the outputs, this operator accepts two auxiliary
   states, ``moving_mean`` and ``moving_var``, which are *k*-length
   vectors. They are global statistics for the whole dataset, which are updated
   by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

   If ``use_global_stats`` is set to be true, then ``moving_mean`` and
   ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
   the output. It is often used during inference.

   The parameter ``axis`` specifies which axis of the input shape denotes
   the 'channel' (separately normalized groups).  The default is 1.  Specifying -1
   axis to be the last item in the input shape.

   Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
   then set ``gamma`` to 1 and its gradient to 0.

   .. Note::
   When ``fix_gamma`` is set to True, no sparse support is provided. If
   the sparse tensors will fallback.



   Defined in src/operator/nn/batch_norm.cc:L574
Parameters
dataInput data to batch normalization
gammagamma array
betabeta array
moving_meanrunning mean of input
moving_varrunning variance of input
epsEpsilon to prevent div 0. Must be no less than CUDNN_BN_MIN_EPSILON defined
momentumMomentum for moving average
fix_gammaFix gamma while training
use_global_statsWhether use global moving statistics instead of local
output_mean_varOutput the mean and inverse std
axisSpecify which shape axis the channel is specified
cudnn_offDo not select CUDNN operator, if available
Returns
new symbol
Symbol mxnet::cpp::BatchNorm_v1 ( const std::string &  symbol_name,
Symbol  data,
Symbol  gamma,
Symbol  beta,
mx_float  eps = 0.001,
mx_float  momentum = 0.9,
bool  fix_gamma = true,
bool  use_global_stats = false,
bool  output_mean_var = false 
)
inline

Batch normalization.

   This operator is DEPRECATED. Perform BatchNorm on the input.

   Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis:

   .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

   Then compute the normalized output, which has the same shape as input, as

   .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] -

   Both *mean* and *var* returns a scalar by treating the input as a vector.

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   ``data_var`` as well, which are needed for the backward pass.

   Besides the inputs and the outputs, this operator accepts two auxiliary
   states, ``moving_mean`` and ``moving_var``, which are *k*-length
   vectors. They are global statistics for the whole dataset, which are updated
   by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

   If ``use_global_stats`` is set to be true, then ``moving_mean`` and
   ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
   the output. It is often used during inference.

   Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
   then set ``gamma`` to 1 and its gradient to 0.

   There's no sparse support for this operator, and it will exhibit problematic
   sparse tensors.



   Defined in src/operator/batch_norm_v1.cc:L95
Parameters
symbol_namename of the resulting symbol
dataInput data to batch normalization
gammagamma array
betabeta array
epsEpsilon to prevent div 0
momentumMomentum for moving average
fix_gammaFix gamma while training
use_global_statsWhether use global moving statistics instead of local
output_mean_varOutput All,normal mean and var
Returns
new symbol
Symbol mxnet::cpp::BatchNorm_v1 ( Symbol  data,
Symbol  gamma,
Symbol  beta,
mx_float  eps = 0.001,
mx_float  momentum = 0.9,
bool  fix_gamma = true,
bool  use_global_stats = false,
bool  output_mean_var = false 
)
inline

Batch normalization.

   This operator is DEPRECATED. Perform BatchNorm on the input.

   Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis:

   .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

   Then compute the normalized output, which has the same shape as input, as

   .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] -

   Both *mean* and *var* returns a scalar by treating the input as a vector.

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   ``data_var`` as well, which are needed for the backward pass.

   Besides the inputs and the outputs, this operator accepts two auxiliary
   states, ``moving_mean`` and ``moving_var``, which are *k*-length
   vectors. They are global statistics for the whole dataset, which are updated
   by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

   If ``use_global_stats`` is set to be true, then ``moving_mean`` and
   ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
   the output. It is often used during inference.

   Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
   then set ``gamma`` to 1 and its gradient to 0.

   There's no sparse support for this operator, and it will exhibit problematic
   sparse tensors.



   Defined in src/operator/batch_norm_v1.cc:L95
Parameters
dataInput data to batch normalization
gammagamma array
betabeta array
epsEpsilon to prevent div 0
momentumMomentum for moving average
fix_gammaFix gamma while training
use_global_statsWhether use global moving statistics instead of local
output_mean_varOutput All,normal mean and var
Returns
new symbol
Symbol mxnet::cpp::BilinearSampler ( const std::string &  symbol_name,
Symbol  data,
Symbol  grid,
dmlc::optional< bool >  cudnn_off = dmlc::optional<bool>() 
)
inline

Applies bilinear sampling to input feature map.

   Bilinear Sampling is the key of  [NIPS2015] \"Spatial Transformer Networks\".
   except that the operator has the backward pass.

   Given :math:`data` and :math:`grid`, then the output is computed by

   .. math::
   x_{src} = grid[batch, 0, y_{dst}, x_{dst}] \\
   y_{src} = grid[batch, 1, y_{dst}, x_{dst}] \\
   output[batch, channel, y_{dst}, x_{dst}] = G(data[batch, channel, y_{src},

   :math:`x_{dst}`, :math:`y_{dst}` enumerate all spatial locations in
   The out-boundary points will be padded with zeros.The shape of the output will

   The operator assumes that :math:`data` has 'NCHW' layout and :math:`grid` has

   BilinearSampler often cooperates with GridGenerator which generates sampling
   GridGenerator supports two kinds of transformation: ``affine`` and ``warp``.
   If users want to design a CustomOp to manipulate :math:`grid`, please firstly

   Example 1::

   ## Zoom out data two times
   data = array([[[[1, 4, 3, 6],
   [1, 8, 8, 9],
   [0, 4, 1, 5],
   [1, 0, 1, 3]]]])

   affine_matrix = array([[2, 0, 0],
   [0, 2, 0]])

   affine_matrix = reshape(affine_matrix, shape=(1, 6))

   grid = GridGenerator(data=affine_matrix, transform_type='affine',

   out = BilinearSampler(data, grid)

   out
   [[[[ 0,   0,     0,   0],
   [ 0,   3.5,   6.5, 0],
   [ 0,   1.25,  2.5, 0],
   [ 0,   0,     0,   0]]]


   Example 2::

   ## shift data horizontally by -1 pixel

   data = array([[[[1, 4, 3, 6],
   [1, 8, 8, 9],
   [0, 4, 1, 5],
   [1, 0, 1, 3]]]])

   warp_maxtrix = array([[[[1, 1, 1, 1],
   [1, 1, 1, 1],
   [1, 1, 1, 1],
   [1, 1, 1, 1]],
   [[0, 0, 0, 0],
   [0, 0, 0, 0],
   [0, 0, 0, 0],
   [0, 0, 0, 0]]]])

   grid = GridGenerator(data=warp_matrix, transform_type='warp')
   out = BilinearSampler(data, grid)

   out
   [[[[ 4,  3,  6,  0],
   [ 8,  8,  9,  0],
   [ 4,  1,  5,  0],
   [ 0,  1,  3,  0]]]


   Defined in src/operator/bilinear_sampler.cc:L256
Parameters
symbol_namename of the resulting symbol
dataInput data to the BilinearsamplerOp.
gridInput grid to the BilinearsamplerOp.grid has two channels: x_src, y_src
cudnn_offwhether to turn cudnn off
Returns
new symbol
Symbol mxnet::cpp::BilinearSampler ( Symbol  data,
Symbol  grid,
dmlc::optional< bool >  cudnn_off = dmlc::optional<bool>() 
)
inline

Applies bilinear sampling to input feature map.

   Bilinear Sampling is the key of  [NIPS2015] \"Spatial Transformer Networks\".
   except that the operator has the backward pass.

   Given :math:`data` and :math:`grid`, then the output is computed by

   .. math::
   x_{src} = grid[batch, 0, y_{dst}, x_{dst}] \\
   y_{src} = grid[batch, 1, y_{dst}, x_{dst}] \\
   output[batch, channel, y_{dst}, x_{dst}] = G(data[batch, channel, y_{src},

   :math:`x_{dst}`, :math:`y_{dst}` enumerate all spatial locations in
   The out-boundary points will be padded with zeros.The shape of the output will

   The operator assumes that :math:`data` has 'NCHW' layout and :math:`grid` has

   BilinearSampler often cooperates with GridGenerator which generates sampling
   GridGenerator supports two kinds of transformation: ``affine`` and ``warp``.
   If users want to design a CustomOp to manipulate :math:`grid`, please firstly

   Example 1::

   ## Zoom out data two times
   data = array([[[[1, 4, 3, 6],
   [1, 8, 8, 9],
   [0, 4, 1, 5],
   [1, 0, 1, 3]]]])

   affine_matrix = array([[2, 0, 0],
   [0, 2, 0]])

   affine_matrix = reshape(affine_matrix, shape=(1, 6))

   grid = GridGenerator(data=affine_matrix, transform_type='affine',

   out = BilinearSampler(data, grid)

   out
   [[[[ 0,   0,     0,   0],
   [ 0,   3.5,   6.5, 0],
   [ 0,   1.25,  2.5, 0],
   [ 0,   0,     0,   0]]]


   Example 2::

   ## shift data horizontally by -1 pixel

   data = array([[[[1, 4, 3, 6],
   [1, 8, 8, 9],
   [0, 4, 1, 5],
   [1, 0, 1, 3]]]])

   warp_maxtrix = array([[[[1, 1, 1, 1],
   [1, 1, 1, 1],
   [1, 1, 1, 1],
   [1, 1, 1, 1]],
   [[0, 0, 0, 0],
   [0, 0, 0, 0],
   [0, 0, 0, 0],
   [0, 0, 0, 0]]]])

   grid = GridGenerator(data=warp_matrix, transform_type='warp')
   out = BilinearSampler(data, grid)

   out
   [[[[ 4,  3,  6,  0],
   [ 8,  8,  9,  0],
   [ 4,  1,  5,  0],
   [ 0,  1,  3,  0]]]


   Defined in src/operator/bilinear_sampler.cc:L256
Parameters
dataInput data to the BilinearsamplerOp.
gridInput grid to the BilinearsamplerOp.grid has two channels: x_src, y_src
cudnn_offwhether to turn cudnn off
Returns
new symbol
Symbol mxnet::cpp::BlockGrad ( const std::string &  symbol_name,
Symbol  data 
)
inline

Stops gradient computation.

   Stops the accumulated gradient of the inputs from flowing through this operator
   in the backward direction. In other words, this operator prevents the
   of its inputs to be taken into account for computing gradients.

   Example::

   v1 = [1, 2]
   v2 = [0, 1]
   a = Variable('a')
   b = Variable('b')
   b_stop_grad = stop_gradient(3 * b)
   loss = MakeLoss(b_stop_grad + a)

   executor = loss.simple_bind(ctx=cpu(), a=(1,2), b=(1,2))
   executor.forward(is_train=True, a=v1, b=v2)
   executor.outputs
   [ 1.  5.]

   executor.backward()
   executor.grad_arrays
   [ 0.  0.]
   [ 1.  1.]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L267
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::BlockGrad ( Symbol  data)
inline

Stops gradient computation.

   Stops the accumulated gradient of the inputs from flowing through this operator
   in the backward direction. In other words, this operator prevents the
   of its inputs to be taken into account for computing gradients.

   Example::

   v1 = [1, 2]
   v2 = [0, 1]
   a = Variable('a')
   b = Variable('b')
   b_stop_grad = stop_gradient(3 * b)
   loss = MakeLoss(b_stop_grad + a)

   executor = loss.simple_bind(ctx=cpu(), a=(1,2), b=(1,2))
   executor.forward(is_train=True, a=v1, b=v2)
   executor.outputs
   [ 1.  5.]

   executor.backward()
   executor.grad_arrays
   [ 0.  0.]
   [ 1.  1.]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L267
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::broadcast_add ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise sum of the input arrays with broadcasting.

   `broadcast_plus` is an alias to the function `broadcast_add`.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_add(x, y) = [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]

   broadcast_plus(x, y) = [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]

   Supported sparse operations:

   broadcast_add(csr, dense(1D)) = dense
   broadcast_add(dense(1D), csr) = dense



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L58
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_add ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise sum of the input arrays with broadcasting.

   `broadcast_plus` is an alias to the function `broadcast_add`.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_add(x, y) = [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]

   broadcast_plus(x, y) = [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]

   Supported sparse operations:

   broadcast_add(csr, dense(1D)) = dense
   broadcast_add(dense(1D), csr) = dense



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L58
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_axis ( const std::string &  symbol_name,
Symbol  data,
Shape  axis = Shape(),
Shape  size = Shape() 
)
inline

Broadcasts the input array over particular axes.

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   Example::

   // given x of shape (1,2,1)
   x = [[[ 1.],
   [ 2.]]]

   // broadcast x on on axis 2
   broadcast_axis(x, axis=2, size=3) = [[[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]]
   // broadcast x on on axes 0 and 2
   broadcast_axis(x, axis=(0,2), size=(2,3)) = [[[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]],
   [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]]


   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L238
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axes to perform the broadcasting.
sizeTarget sizes of the broadcasting axes.
Returns
new symbol
Symbol mxnet::cpp::broadcast_axis ( Symbol  data,
Shape  axis = Shape(),
Shape  size = Shape() 
)
inline

Broadcasts the input array over particular axes.

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   Example::

   // given x of shape (1,2,1)
   x = [[[ 1.],
   [ 2.]]]

   // broadcast x on on axis 2
   broadcast_axis(x, axis=2, size=3) = [[[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]]
   // broadcast x on on axes 0 and 2
   broadcast_axis(x, axis=(0,2), size=(2,3)) = [[[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]],
   [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]]


   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L238
Parameters
dataThe input
axisThe axes to perform the broadcasting.
sizeTarget sizes of the broadcasting axes.
Returns
new symbol
Symbol mxnet::cpp::broadcast_div ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise division of the input arrays with broadcasting.

   Example::

   x = [[ 6.,  6.,  6.],
   [ 6.,  6.,  6.]]

   y = [[ 2.],
   [ 3.]]

   broadcast_div(x, y) = [[ 3.,  3.,  3.],
   [ 2.,  2.,  2.]]

   Supported sparse operations:

   broadcast_div(csr, dense(1D)) = csr



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L187
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_div ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise division of the input arrays with broadcasting.

   Example::

   x = [[ 6.,  6.,  6.],
   [ 6.,  6.,  6.]]

   y = [[ 2.],
   [ 3.]]

   broadcast_div(x, y) = [[ 3.,  3.,  3.],
   [ 2.,  2.,  2.]]

   Supported sparse operations:

   broadcast_div(csr, dense(1D)) = csr



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L187
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_equal ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise equal to (==) comparison operation with

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_equal(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L46
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_equal ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise equal to (==) comparison operation with

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_equal(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L46
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_greater ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise greater than (>) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_greater(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L82
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_greater ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise greater than (>) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_greater(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L82
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_greater_equal ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise greater than or equal to (>=) comparison

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_greater_equal(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L100
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_greater_equal ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise greater than or equal to (>=) comparison

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_greater_equal(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L100
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_hypot ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the hypotenuse of a right angled triangle, given its "legs" with broadcasting.

It is equivalent to doing :math:sqrt(x_1^2 + x_2^2).

Example::

x = [[ 3., 3., 3.]]

y = [[ 4.], [ 4.]]

broadcast_hypot(x, y) = [[ 5., 5., 5.], [ 5., 5., 5.]]

z = [[ 0.], [ 4.]]

broadcast_hypot(x, z) = [[ 3., 3., 3.], [ 5., 5., 5.]]

   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L156
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_hypot ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the hypotenuse of a right angled triangle, given its "legs" with broadcasting.

It is equivalent to doing :math:sqrt(x_1^2 + x_2^2).

Example::

x = [[ 3., 3., 3.]]

y = [[ 4.], [ 4.]]

broadcast_hypot(x, y) = [[ 5., 5., 5.], [ 5., 5., 5.]]

z = [[ 0.], [ 4.]]

broadcast_hypot(x, z) = [[ 3., 3., 3.], [ 5., 5., 5.]]

   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L156
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_lesser ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise lesser than (<) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_lesser(x, y) = [[ 0.,  0.,  0.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L118
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_lesser ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise lesser than (<) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_lesser(x, y) = [[ 0.,  0.,  0.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L118
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_lesser_equal ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise lesser than or equal to (<=) comparison

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_lesser_equal(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L136
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_lesser_equal ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise lesser than or equal to (<=) comparison

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_lesser_equal(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L136
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_like ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs,
dmlc::optional< Shape lhs_axes = dmlc::optional<Shape>(),
dmlc::optional< Shape rhs_axes = dmlc::optional<Shape>() 
)
inline

Broadcasts lhs to have the same shape as rhs.

   Broadcasting is a mechanism that allows NDArrays to perform arithmetic
   with arrays of different shapes efficiently without creating multiple copies of
   Also see, `Broadcasting
   <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   For example::

   broadcast_like([[1,2,3]], [[5,6,7],[7,8,9]]) = [[ 1.,  2.,  3.],
   [ 1.,  2.,  3.]])

   broadcast_like([9], [1,2,3,4,5], lhs_axes=(0,), rhs_axes=(-1,)) = [9,9,9,9,9]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L315
Parameters
symbol_namename of the resulting symbol
lhsFirst input.
rhsSecond input.
lhs_axesAxes to perform broadcast on in the first input array
rhs_axesAxes to copy from the second input array
Returns
new symbol
Symbol mxnet::cpp::broadcast_like ( Symbol  lhs,
Symbol  rhs,
dmlc::optional< Shape lhs_axes = dmlc::optional<Shape>(),
dmlc::optional< Shape rhs_axes = dmlc::optional<Shape>() 
)
inline

Broadcasts lhs to have the same shape as rhs.

   Broadcasting is a mechanism that allows NDArrays to perform arithmetic
   with arrays of different shapes efficiently without creating multiple copies of
   Also see, `Broadcasting
   <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   For example::

   broadcast_like([[1,2,3]], [[5,6,7],[7,8,9]]) = [[ 1.,  2.,  3.],
   [ 1.,  2.,  3.]])

   broadcast_like([9], [1,2,3,4,5], lhs_axes=(0,), rhs_axes=(-1,)) = [9,9,9,9,9]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L315
Parameters
lhsFirst input.
rhsSecond input.
lhs_axesAxes to perform broadcast on in the first input array
rhs_axesAxes to copy from the second input array
Returns
new symbol
Symbol mxnet::cpp::broadcast_logical_and ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise logical and with broadcasting.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_logical_and(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L154
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_logical_and ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise logical and with broadcasting.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_logical_and(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L154
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_logical_or ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise logical or with broadcasting.

   Example::

   x = [[ 1.,  1.,  0.],
   [ 1.,  1.,  0.]]

   y = [[ 1.],
   [ 0.]]

   broadcast_logical_or(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L172
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_logical_or ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise logical or with broadcasting.

   Example::

   x = [[ 1.,  1.,  0.],
   [ 1.,  1.,  0.]]

   y = [[ 1.],
   [ 0.]]

   broadcast_logical_or(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L172
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_logical_xor ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise logical xor with broadcasting.

   Example::

   x = [[ 1.,  1.,  0.],
   [ 1.,  1.,  0.]]

   y = [[ 1.],
   [ 0.]]

   broadcast_logical_xor(x, y) = [[ 0.,  0.,  1.],
   [ 1.,  1.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L190
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_logical_xor ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise logical xor with broadcasting.

   Example::

   x = [[ 1.,  1.,  0.],
   [ 1.,  1.,  0.]]

   y = [[ 1.],
   [ 0.]]

   broadcast_logical_xor(x, y) = [[ 0.,  0.,  1.],
   [ 1.,  1.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L190
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_maximum ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise maximum of the input arrays with broadcasting.

   This function compares two input arrays and returns a new array having the

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_maximum(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L80
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_maximum ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise maximum of the input arrays with broadcasting.

   This function compares two input arrays and returns a new array having the

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_maximum(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L80
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_minimum ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise minimum of the input arrays with broadcasting.

   This function compares two input arrays and returns a new array having the

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_maximum(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L115
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_minimum ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise minimum of the input arrays with broadcasting.

   This function compares two input arrays and returns a new array having the

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_maximum(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L115
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_mod ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise modulo of the input arrays with broadcasting.

   Example::

   x = [[ 8.,  8.,  8.],
   [ 8.,  8.,  8.]]

   y = [[ 2.],
   [ 3.]]

   broadcast_mod(x, y) = [[ 0.,  0.,  0.],
   [ 2.,  2.,  2.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L222
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_mod ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise modulo of the input arrays with broadcasting.

   Example::

   x = [[ 8.,  8.,  8.],
   [ 8.,  8.,  8.]]

   y = [[ 2.],
   [ 3.]]

   broadcast_mod(x, y) = [[ 0.,  0.,  0.],
   [ 2.,  2.,  2.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L222
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_mul ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise product of the input arrays with broadcasting.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_mul(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]

   Supported sparse operations:

   broadcast_mul(csr, dense(1D)) = csr



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L146
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_mul ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise product of the input arrays with broadcasting.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_mul(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]

   Supported sparse operations:

   broadcast_mul(csr, dense(1D)) = csr



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L146
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_not_equal ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise not equal to (!=) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_not_equal(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L64
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_not_equal ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns the result of element-wise not equal to (!=) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_not_equal(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L64
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_power ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns result of first array elements raised to powers from second array,

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_power(x, y) = [[ 2.,  2.,  2.],
   [ 4.,  4.,  4.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L45
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_power ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns result of first array elements raised to powers from second array,

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_power(x, y) = [[ 2.,  2.,  2.],
   [ 4.,  4.,  4.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L45
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_sub ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise difference of the input arrays with broadcasting.

   `broadcast_minus` is an alias to the function `broadcast_sub`.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_sub(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]

   broadcast_minus(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]

   Supported sparse operations:

   broadcast_sub/minus(csr, dense(1D)) = dense
   broadcast_sub/minus(dense(1D), csr) = dense



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L106
Parameters
symbol_namename of the resulting symbol
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_sub ( Symbol  lhs,
Symbol  rhs 
)
inline

Returns element-wise difference of the input arrays with broadcasting.

   `broadcast_minus` is an alias to the function `broadcast_sub`.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_sub(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]

   broadcast_minus(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]

   Supported sparse operations:

   broadcast_sub/minus(csr, dense(1D)) = dense
   broadcast_sub/minus(dense(1D), csr) = dense



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L106
Parameters
lhsFirst input to the function
rhsSecond input to the function
Returns
new symbol
Symbol mxnet::cpp::broadcast_to ( const std::string &  symbol_name,
Symbol  data,
Shape  shape = Shape() 
)
inline

Broadcasts the input array to a new shape.

   Broadcasting is a mechanism that allows NDArrays to perform arithmetic
   with arrays of different shapes efficiently without creating multiple copies of
   Also see, `Broadcasting
   <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   For example::

   broadcast_to([[1,2,3]], shape=(2,3)) = [[ 1.,  2.,  3.],
   [ 1.,  2.,  3.]])

   The dimension which you do not want to change can also be kept as `0` which
   So with `shape=(2,0)`, we will obtain the same result as in the above example.



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L262
Parameters
symbol_namename of the resulting symbol
dataThe input
shapeThe shape of the desired array. We can set the dim to zero if it's same as the original. E.g A = broadcast_to(B, shape=(10, 0, 0)) has the same
Returns
new symbol
Symbol mxnet::cpp::broadcast_to ( Symbol  data,
Shape  shape = Shape() 
)
inline

Broadcasts the input array to a new shape.

   Broadcasting is a mechanism that allows NDArrays to perform arithmetic
   with arrays of different shapes efficiently without creating multiple copies of
   Also see, `Broadcasting
   <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   For example::

   broadcast_to([[1,2,3]], shape=(2,3)) = [[ 1.,  2.,  3.],
   [ 1.,  2.,  3.]])

   The dimension which you do not want to change can also be kept as `0` which
   So with `shape=(2,0)`, we will obtain the same result as in the above example.



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L262
Parameters
dataThe input
shapeThe shape of the desired array. We can set the dim to zero if it's same as the original. E.g A = broadcast_to(B, shape=(10, 0, 0)) has the same
Returns
new symbol
Symbol mxnet::cpp::Cast ( const std::string &  symbol_name,
Symbol  data,
CastDtype  dtype 
)
inline

Casts all elements of the input to a new type.

   .. note:: ``Cast`` is deprecated. Use ``cast`` instead.

   Example::

   cast([0.9, 1.3], dtype='int32') = [0, 1]
   cast([1e20, 11.1], dtype='float16') = [inf, 11.09375]
   cast([300, 11.1, 10.9, -1, -3], dtype='uint8') = [44, 11, 10, 255, 253]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L596
Parameters
symbol_namename of the resulting symbol
dataThe input.
dtypeOutput data type.
Returns
new symbol
Symbol mxnet::cpp::Cast ( Symbol  data,
CastDtype  dtype 
)
inline

Casts all elements of the input to a new type.

   .. note:: ``Cast`` is deprecated. Use ``cast`` instead.

   Example::

   cast([0.9, 1.3], dtype='int32') = [0, 1]
   cast([1e20, 11.1], dtype='float16') = [inf, 11.09375]
   cast([300, 11.1, 10.9, -1, -3], dtype='uint8') = [44, 11, 10, 255, 253]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L596
Parameters
dataThe input.
dtypeOutput data type.
Returns
new symbol
Symbol mxnet::cpp::cast_storage ( const std::string &  symbol_name,
Symbol  data,
Cast_storageStype  stype 
)
inline

Casts tensor storage type to the new type.

   When an NDArray with default storage type is cast to csr or row_sparse storage,
   the result is compact, which means:

   - for csr, zero values will not be retained
   - for row_sparse, row slices of all zeros will not be retained

   The storage type of ``cast_storage`` output depends on stype parameter:

   - cast_storage(csr, 'default') = default
   - cast_storage(row_sparse, 'default') = default
   - cast_storage(default, 'csr') = csr
   - cast_storage(default, 'row_sparse') = row_sparse
   - cast_storage(csr, 'csr') = csr
   - cast_storage(row_sparse, 'row_sparse') = row_sparse

   Example::

   dense = [[ 0.,  1.,  0.],
   [ 2.,  0.,  3.],
   [ 0.,  0.,  0.],
   [ 0.,  0.,  0.]]

   # cast to row_sparse storage type
   rsp = cast_storage(dense, 'row_sparse')
   rsp.indices = [0, 1]
   rsp.values = [[ 0.,  1.,  0.],
   [ 2.,  0.,  3.]]

   # cast to csr storage type
   csr = cast_storage(dense, 'csr')
   csr.indices = [1, 0, 2]
   csr.values = [ 1.,  2.,  3.]
   csr.indptr = [0, 1, 3, 3, 3]



   Defined in src/operator/tensor/cast_storage.cc:L71
Parameters
symbol_namename of the resulting symbol
dataThe input.
stypeOutput storage type.
Returns
new symbol
Symbol mxnet::cpp::cast_storage ( Symbol  data,
Cast_storageStype  stype 
)
inline

Casts tensor storage type to the new type.

   When an NDArray with default storage type is cast to csr or row_sparse storage,
   the result is compact, which means:

   - for csr, zero values will not be retained
   - for row_sparse, row slices of all zeros will not be retained

   The storage type of ``cast_storage`` output depends on stype parameter:

   - cast_storage(csr, 'default') = default
   - cast_storage(row_sparse, 'default') = default
   - cast_storage(default, 'csr') = csr
   - cast_storage(default, 'row_sparse') = row_sparse
   - cast_storage(csr, 'csr') = csr
   - cast_storage(row_sparse, 'row_sparse') = row_sparse

   Example::

   dense = [[ 0.,  1.,  0.],
   [ 2.,  0.,  3.],
   [ 0.,  0.,  0.],
   [ 0.,  0.,  0.]]

   # cast to row_sparse storage type
   rsp = cast_storage(dense, 'row_sparse')
   rsp.indices = [0, 1]
   rsp.values = [[ 0.,  1.,  0.],
   [ 2.,  0.,  3.]]

   # cast to csr storage type
   csr = cast_storage(dense, 'csr')
   csr.indices = [1, 0, 2]
   csr.values = [ 1.,  2.,  3.]
   csr.indptr = [0, 1, 3, 3, 3]



   Defined in src/operator/tensor/cast_storage.cc:L71
Parameters
dataThe input.
stypeOutput storage type.
Returns
new symbol
Symbol mxnet::cpp::cbrt ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise cube-root value of the input.

   .. math::
   cbrt(x) = \sqrt[3]{x}

   Example::

   cbrt([1, 8, -125]) = [1, 2, -5]

   The storage type of ``cbrt`` output depends upon the input storage type:

   - cbrt(default) = default
   - cbrt(row_sparse) = row_sparse
   - cbrt(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L883
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::cbrt ( Symbol  data)
inline

Returns element-wise cube-root value of the input.

   .. math::
   cbrt(x) = \sqrt[3]{x}

   Example::

   cbrt([1, 8, -125]) = [1, 2, -5]

   The storage type of ``cbrt`` output depends upon the input storage type:

   - cbrt(default) = default
   - cbrt(row_sparse) = row_sparse
   - cbrt(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L883
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::ceil ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise ceiling of the input.

   The ceil of the scalar x is the smallest integer i, such that i >= x.

   Example::

   ceil([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  2.,  2.,  3.]

   The storage type of ``ceil`` output depends upon the input storage type:

   - ceil(default) = default
   - ceil(row_sparse) = row_sparse
   - ceil(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L740
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::ceil ( Symbol  data)
inline

Returns element-wise ceiling of the input.

   The ceil of the scalar x is the smallest integer i, such that i >= x.

   Example::

   ceil([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  2.,  2.,  3.]

   The storage type of ``ceil`` output depends upon the input storage type:

   - ceil(default) = default
   - ceil(row_sparse) = row_sparse
   - ceil(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L740
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::choose_element_0index ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs. This function assume rhs uses 0-based

Parameters
symbol_namename of the resulting symbol
lhsLeft operand to the function.
rhsRight operand to the function.
Returns
new symbol
Symbol mxnet::cpp::choose_element_0index ( Symbol  lhs,
Symbol  rhs 
)
inline

Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs. This function assume rhs uses 0-based

Parameters
lhsLeft operand to the function.
rhsRight operand to the function.
Returns
new symbol
Symbol mxnet::cpp::clip ( const std::string &  symbol_name,
Symbol  data,
mx_float  a_min,
mx_float  a_max 
)
inline

Clips (limits) the values in an array.

   Given an interval, values outside the interval are clipped to the interval
   Clipping ``x`` between `a_min` and `a_x` would be::

   clip(x, a_min, a_max) = max(min(x, a_max), a_min))

   Example::

   x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

   clip(x,1,8) = [ 1.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.]

   The storage type of ``clip`` output depends on storage types of inputs and the
   parameter values:

   - clip(default) = default
   - clip(row_sparse, a_min <= 0, a_max >= 0) = row_sparse
   - clip(csr, a_min <= 0, a_max >= 0) = csr
   - clip(row_sparse, a_min < 0, a_max < 0) = default
   - clip(row_sparse, a_min > 0, a_max > 0) = default
   - clip(csr, a_min < 0, a_max < 0) = csr
   - clip(csr, a_min > 0, a_max > 0) = csr



   Defined in src/operator/tensor/matrix_op.cc:L619
Parameters
symbol_namename of the resulting symbol
dataInput array.
a_minMinimum value
a_maxMaximum value
Returns
new symbol
Symbol mxnet::cpp::clip ( Symbol  data,
mx_float  a_min,
mx_float  a_max 
)
inline

Clips (limits) the values in an array.

   Given an interval, values outside the interval are clipped to the interval
   Clipping ``x`` between `a_min` and `a_x` would be::

   clip(x, a_min, a_max) = max(min(x, a_max), a_min))

   Example::

   x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

   clip(x,1,8) = [ 1.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.]

   The storage type of ``clip`` output depends on storage types of inputs and the
   parameter values:

   - clip(default) = default
   - clip(row_sparse, a_min <= 0, a_max >= 0) = row_sparse
   - clip(csr, a_min <= 0, a_max >= 0) = csr
   - clip(row_sparse, a_min < 0, a_max < 0) = default
   - clip(row_sparse, a_min > 0, a_max > 0) = default
   - clip(csr, a_min < 0, a_max < 0) = csr
   - clip(csr, a_min > 0, a_max > 0) = csr



   Defined in src/operator/tensor/matrix_op.cc:L619
Parameters
dataInput array.
a_minMinimum value
a_maxMaximum value
Returns
new symbol
Symbol mxnet::cpp::Concat ( const std::string &  symbol_name,
const std::vector< Symbol > &  data,
int  num_args,
int  dim = 1 
)
inline

Joins input arrays along a given axis.

   .. note:: `Concat` is deprecated. Use `concat` instead.

   The dimensions of the input arrays should be the same except the axis along
   which they will be concatenated.
   The dimension of the output array along the concatenated axis will be equal
   to the sum of the corresponding dimensions of the input arrays.

   The storage type of ``concat`` output depends on storage types of inputs

   - concat(csr, csr, ..., csr, dim=0) = csr
   - otherwise, ``concat`` generates output with default storage

   Example::

   x = [[1,1],[2,2]]
   y = [[3,3],[4,4],[5,5]]
   z = [[6,6], [7,7],[8,8]]

   concat(x,y,z,dim=0) = [[ 1.,  1.],
   [ 2.,  2.],
   [ 3.,  3.],
   [ 4.,  4.],
   [ 5.,  5.],
   [ 6.,  6.],
   [ 7.,  7.],
   [ 8.,  8.]]

   Note that you cannot concat x,y,z along dimension 1 since dimension
   0 is not the same for all the input arrays.

   concat(y,z,dim=1) = [[ 3.,  3.,  6.,  6.],
   [ 4.,  4.,  7.,  7.],
   [ 5.,  5.,  8.,  8.]]



   Defined in src/operator/nn/concat.cc:L368
Parameters
symbol_namename of the resulting symbol
dataList of arrays to concatenate
num_argsNumber of inputs to be concated.
dimthe dimension to be concated.
Returns
new symbol
Symbol mxnet::cpp::Concat ( const std::vector< Symbol > &  data,
int  num_args,
int  dim = 1 
)
inline

Joins input arrays along a given axis.

   .. note:: `Concat` is deprecated. Use `concat` instead.

   The dimensions of the input arrays should be the same except the axis along
   which they will be concatenated.
   The dimension of the output array along the concatenated axis will be equal
   to the sum of the corresponding dimensions of the input arrays.

   The storage type of ``concat`` output depends on storage types of inputs

   - concat(csr, csr, ..., csr, dim=0) = csr
   - otherwise, ``concat`` generates output with default storage

   Example::

   x = [[1,1],[2,2]]
   y = [[3,3],[4,4],[5,5]]
   z = [[6,6], [7,7],[8,8]]

   concat(x,y,z,dim=0) = [[ 1.,  1.],
   [ 2.,  2.],
   [ 3.,  3.],
   [ 4.,  4.],
   [ 5.,  5.],
   [ 6.,  6.],
   [ 7.,  7.],
   [ 8.,  8.]]

   Note that you cannot concat x,y,z along dimension 1 since dimension
   0 is not the same for all the input arrays.

   concat(y,z,dim=1) = [[ 3.,  3.,  6.,  6.],
   [ 4.,  4.,  7.,  7.],
   [ 5.,  5.,  8.,  8.]]



   Defined in src/operator/nn/concat.cc:L368
Parameters
dataList of arrays to concatenate
num_argsNumber of inputs to be concated.
dimthe dimension to be concated.
Returns
new symbol
Symbol mxnet::cpp::Convolution ( const std::string &  symbol_name,
Symbol  data,
Symbol  weight,
Symbol  bias,
Shape  kernel,
uint32_t  num_filter,
Shape  stride = Shape(),
Shape  dilate = Shape(),
Shape  pad = Shape(),
uint32_t  num_group = 1,
uint64_t  workspace = 1024,
bool  no_bias = false,
ConvolutionCudnnTune  cudnn_tune = ConvolutionCudnnTune::kNone,
bool  cudnn_off = false,
ConvolutionLayout  layout = ConvolutionLayout::kNone 
)
inline

Compute N-D convolution on *(N+2)*-D input.

   In the 2-D convolution, given input data with shape *(batch_size,
   channel, height, width)*, the output is computed by

   .. math::

   out[n,i,:,:] = bias[i] + \sum_{j=0}^{channel} data[n,j,:,:] \star
   weight[i,j,:,:]

   where :math:`\star` is the 2-D cross-correlation operator.

   For general 2-D convolution, the shapes are

   - **data**: *(batch_size, channel, height, width)*
   - **weight**: *(num_filter, channel, kernel[0], kernel[1])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_height, out_width)*.

   Define::

   f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1

   then we have::

   out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
   out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])

   If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

   The default data ``layout`` is *NCHW*, namely *(batch_size, channel, height,
   width)*. We can choose other layouts such as *NWC*.

   If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
   evenly into *g* parts along the channel axis, and also evenly split ``weight``
   along the first dimension. Next compute the convolution on the *i*-th part of
   the data with the *i*-th weight part. The output is obtained by concatenating
   the *g* results.

   1-D convolution does not have *height* dimension but only *width* in space.

   - **data**: *(batch_size, channel, width)*
   - **weight**: *(num_filter, channel, kernel[0])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_width)*.

   3-D convolution adds an additional *depth* dimension besides *height* and
   *width*. The shapes are

   - **data**: *(batch_size, channel, depth, height, width)*
   - **weight**: *(num_filter, channel, kernel[0], kernel[1], kernel[2])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_depth, out_height, out_width)*.

   Both ``weight`` and ``bias`` are learnable parameters.

   There are other options to tune the performance.

   - **cudnn_tune**: enable this option leads to higher startup time but may give
   faster speed. Options are

   - **off**: no tuning
   - **limited_workspace**:run test and pick the fastest algorithm that doesn't
   exceed workspace limit.
   - **fastest**: pick the fastest algorithm and ignore workspace limit.
   - **None** (default): the behavior is determined by environment variable
   ``MXNET_CUDNN_AUTOTUNE_DEFAULT``. 0 for off, 1 for limited workspace
   (default), 2 for fastest.

   - **workspace**: A large number leads to more (GPU) memory usage but may improve
   the performance.



   Defined in src/operator/nn/convolution.cc:L461
Parameters
symbol_namename of the resulting symbol
dataInput data to the ConvolutionOp.
weightWeight matrix.
biasBias parameter.
kernelConvolution kernel size: (w,), (h, w) or (d, h, w)
num_filterConvolution filter(channel) number
strideConvolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each
dilateConvolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each
padZero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.
num_groupNumber of group partitions.
workspaceMaximum temporary workspace allowed (MB) in convolution.This parameter has two usages. When CUDNN is not used, it determines the effective batch size of the convolution kernel. When CUDNN is used, it controls the maximum temporary storage used for tuning the best CUDNN kernel when
no_biasWhether to disable bias parameter.
cudnn_tuneWhether to pick convolution algo by running performance test.
cudnn_offTurn off cudnn for this layer.
layoutSet layout for input, output and weight. Empty for default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are
Returns
new symbol
Symbol mxnet::cpp::Convolution ( Symbol  data,
Symbol  weight,
Symbol  bias,
Shape  kernel,
uint32_t  num_filter,
Shape  stride = Shape(),
Shape  dilate = Shape(),
Shape  pad = Shape(),
uint32_t  num_group = 1,
uint64_t  workspace = 1024,
bool  no_bias = false,
ConvolutionCudnnTune  cudnn_tune = ConvolutionCudnnTune::kNone,
bool  cudnn_off = false,
ConvolutionLayout  layout = ConvolutionLayout::kNone 
)
inline

Compute N-D convolution on *(N+2)*-D input.

   In the 2-D convolution, given input data with shape *(batch_size,
   channel, height, width)*, the output is computed by

   .. math::

   out[n,i,:,:] = bias[i] + \sum_{j=0}^{channel} data[n,j,:,:] \star
   weight[i,j,:,:]

   where :math:`\star` is the 2-D cross-correlation operator.

   For general 2-D convolution, the shapes are

   - **data**: *(batch_size, channel, height, width)*
   - **weight**: *(num_filter, channel, kernel[0], kernel[1])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_height, out_width)*.

   Define::

   f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1

   then we have::

   out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
   out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])

   If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

   The default data ``layout`` is *NCHW*, namely *(batch_size, channel, height,
   width)*. We can choose other layouts such as *NWC*.

   If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
   evenly into *g* parts along the channel axis, and also evenly split ``weight``
   along the first dimension. Next compute the convolution on the *i*-th part of
   the data with the *i*-th weight part. The output is obtained by concatenating
   the *g* results.

   1-D convolution does not have *height* dimension but only *width* in space.

   - **data**: *(batch_size, channel, width)*
   - **weight**: *(num_filter, channel, kernel[0])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_width)*.

   3-D convolution adds an additional *depth* dimension besides *height* and
   *width*. The shapes are

   - **data**: *(batch_size, channel, depth, height, width)*
   - **weight**: *(num_filter, channel, kernel[0], kernel[1], kernel[2])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_depth, out_height, out_width)*.

   Both ``weight`` and ``bias`` are learnable parameters.

   There are other options to tune the performance.

   - **cudnn_tune**: enable this option leads to higher startup time but may give
   faster speed. Options are

   - **off**: no tuning
   - **limited_workspace**:run test and pick the fastest algorithm that doesn't
   exceed workspace limit.
   - **fastest**: pick the fastest algorithm and ignore workspace limit.
   - **None** (default): the behavior is determined by environment variable
   ``MXNET_CUDNN_AUTOTUNE_DEFAULT``. 0 for off, 1 for limited workspace
   (default), 2 for fastest.

   - **workspace**: A large number leads to more (GPU) memory usage but may improve
   the performance.



   Defined in src/operator/nn/convolution.cc:L461
Parameters
dataInput data to the ConvolutionOp.
weightWeight matrix.
biasBias parameter.
kernelConvolution kernel size: (w,), (h, w) or (d, h, w)
num_filterConvolution filter(channel) number
strideConvolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each
dilateConvolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each
padZero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.
num_groupNumber of group partitions.
workspaceMaximum temporary workspace allowed (MB) in convolution.This parameter has two usages. When CUDNN is not used, it determines the effective batch size of the convolution kernel. When CUDNN is used, it controls the maximum temporary storage used for tuning the best CUDNN kernel when
no_biasWhether to disable bias parameter.
cudnn_tuneWhether to pick convolution algo by running performance test.
cudnn_offTurn off cudnn for this layer.
layoutSet layout for input, output and weight. Empty for default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are
Returns
new symbol
Symbol mxnet::cpp::Convolution_v1 ( const std::string &  symbol_name,
Symbol  data,
Symbol  weight,
Symbol  bias,
Shape  kernel,
uint32_t  num_filter,
Shape  stride = Shape(),
Shape  dilate = Shape(),
Shape  pad = Shape(),
uint32_t  num_group = 1,
uint64_t  workspace = 1024,
bool  no_bias = false,
Convolution_v1CudnnTune  cudnn_tune = Convolution_v1CudnnTune::kNone,
bool  cudnn_off = false,
Convolution_v1Layout  layout = Convolution_v1Layout::kNone 
)
inline

This operator is DEPRECATED. Apply convolution to input then add a bias.

Parameters
symbol_namename of the resulting symbol
dataInput data to the ConvolutionV1Op.
weightWeight matrix.
biasBias parameter.
kernelconvolution kernel size: (h, w) or (d, h, w)
num_filterconvolution filter(channel) number
strideconvolution stride: (h, w) or (d, h, w)
dilateconvolution dilate: (h, w) or (d, h, w)
padpad for convolution: (h, w) or (d, h, w)
num_groupNumber of group partitions. Equivalent to slicing input into num_group partitions, apply convolution on each, then concatenate the results
workspaceMaximum temporary workspace allowed for convolution (MB).This parameter determines the effective batch size of the convolution kernel, which may be smaller than the given batch size. Also, the workspace will be
no_biasWhether to disable bias parameter.
cudnn_tuneWhether to pick convolution algo by running performance test. Leads to higher startup time but may give faster speed. Options are: 'off': no tuning 'limited_workspace': run test and pick the fastest algorithm that doesn't 'fastest': pick the fastest algorithm and ignore workspace limit. If set to None (default), behavior is determined by environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off, 1 for limited workspace (default), 2 for fastest.
cudnn_offTurn off cudnn for this layer.
layoutSet layout for input, output and weight. Empty for default layout: NCHW for 2d and NCDHW for 3d.
Returns
new symbol
Symbol mxnet::cpp::Convolution_v1 ( Symbol  data,
Symbol  weight,
Symbol  bias,
Shape  kernel,
uint32_t  num_filter,
Shape  stride = Shape(),
Shape  dilate = Shape(),
Shape  pad = Shape(),
uint32_t  num_group = 1,
uint64_t  workspace = 1024,
bool  no_bias = false,
Convolution_v1CudnnTune  cudnn_tune = Convolution_v1CudnnTune::kNone,
bool  cudnn_off = false,
Convolution_v1Layout  layout = Convolution_v1Layout::kNone 
)
inline

This operator is DEPRECATED. Apply convolution to input then add a bias.

Parameters
dataInput data to the ConvolutionV1Op.
weightWeight matrix.
biasBias parameter.
kernelconvolution kernel size: (h, w) or (d, h, w)
num_filterconvolution filter(channel) number
strideconvolution stride: (h, w) or (d, h, w)
dilateconvolution dilate: (h, w) or (d, h, w)
padpad for convolution: (h, w) or (d, h, w)
num_groupNumber of group partitions. Equivalent to slicing input into num_group partitions, apply convolution on each, then concatenate the results
workspaceMaximum temporary workspace allowed for convolution (MB).This parameter determines the effective batch size of the convolution kernel, which may be smaller than the given batch size. Also, the workspace will be
no_biasWhether to disable bias parameter.
cudnn_tuneWhether to pick convolution algo by running performance test. Leads to higher startup time but may give faster speed. Options are: 'off': no tuning 'limited_workspace': run test and pick the fastest algorithm that doesn't 'fastest': pick the fastest algorithm and ignore workspace limit. If set to None (default), behavior is determined by environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off, 1 for limited workspace (default), 2 for fastest.
cudnn_offTurn off cudnn for this layer.
layoutSet layout for input, output and weight. Empty for default layout: NCHW for 2d and NCDHW for 3d.
Returns
new symbol
Symbol mxnet::cpp::Correlation ( const std::string &  symbol_name,
Symbol  data1,
Symbol  data2,
uint32_t  kernel_size = 1,
uint32_t  max_displacement = 1,
uint32_t  stride1 = 1,
uint32_t  stride2 = 1,
uint32_t  pad_size = 0,
bool  is_multiply = true 
)
inline

Applies correlation to inputs.

   The correlation layer performs multiplicative patch comparisons between two

   Given two multi-channel feature maps :math:`f_{1}, f_{2}`, with :math:`w`,
   the correlation layer lets the network compare each patch from :math:`f_{1}`

   For now we consider only a single comparison of two patches. The 'correlation'
   :math:`x_{2}` in the second map is then defined as:

   .. math::

   c(x_{1}, x_{2}) = \sum_{o \in [-k,k] \times [-k,k]} <f_{1}(x_{1} + o),

   for a square patch of size :math:`K:=2k+1`.

   Note that the equation above is identical to one step of a convolution in
   neural networks, but instead of convolving data with a filter, it convolves
   data. For this reason, it has no training weights.

   Computing :math:`c(x_{1}, x_{2})` involves :math:`c * K^{2}` multiplications.

   Given a maximum displacement :math:`d`, for each location :math:`x_{1}` it
   computes correlations :math:`c(x_{1}, x_{2})` only in a neighborhood of size
   by limiting the range of :math:`x_{2}`. We use strides :math:`s_{1}, s_{2}`, to
   quantize :math:`x_{1}` globally and to quantize :math:`x_{2}` within the
   centered around :math:`x_{1}`.

   The final output is defined by the following expression:

   .. math::
   out[n, q, i, j] = c(x_{i, j}, x_{q})

   where :math:`i` and :math:`j` enumerate spatial locations in :math:`f_{1}`, and


   Defined in src/operator/correlation.cc:L198
Parameters
symbol_namename of the resulting symbol
data1Input data1 to the correlation.
data2Input data2 to the correlation.
kernel_sizekernel size for Correlation must be an odd number
max_displacementMax displacement of Correlation
stride1stride1 quantize data1 globally
stride2stride2 quantize data2 within the neighborhood centered around data1
pad_sizepad for Correlation
is_multiplyoperation type is either multiplication or subduction
Returns
new symbol
Symbol mxnet::cpp::Correlation ( Symbol  data1,
Symbol  data2,
uint32_t  kernel_size = 1,
uint32_t  max_displacement = 1,
uint32_t  stride1 = 1,
uint32_t  stride2 = 1,
uint32_t  pad_size = 0,
bool  is_multiply = true 
)
inline

Applies correlation to inputs.

   The correlation layer performs multiplicative patch comparisons between two

   Given two multi-channel feature maps :math:`f_{1}, f_{2}`, with :math:`w`,
   the correlation layer lets the network compare each patch from :math:`f_{1}`

   For now we consider only a single comparison of two patches. The 'correlation'
   :math:`x_{2}` in the second map is then defined as:

   .. math::

   c(x_{1}, x_{2}) = \sum_{o \in [-k,k] \times [-k,k]} <f_{1}(x_{1} + o),

   for a square patch of size :math:`K:=2k+1`.

   Note that the equation above is identical to one step of a convolution in
   neural networks, but instead of convolving data with a filter, it convolves
   data. For this reason, it has no training weights.

   Computing :math:`c(x_{1}, x_{2})` involves :math:`c * K^{2}` multiplications.

   Given a maximum displacement :math:`d`, for each location :math:`x_{1}` it
   computes correlations :math:`c(x_{1}, x_{2})` only in a neighborhood of size
   by limiting the range of :math:`x_{2}`. We use strides :math:`s_{1}, s_{2}`, to
   quantize :math:`x_{1}` globally and to quantize :math:`x_{2}` within the
   centered around :math:`x_{1}`.

   The final output is defined by the following expression:

   .. math::
   out[n, q, i, j] = c(x_{i, j}, x_{q})

   where :math:`i` and :math:`j` enumerate spatial locations in :math:`f_{1}`, and


   Defined in src/operator/correlation.cc:L198
Parameters
data1Input data1 to the correlation.
data2Input data2 to the correlation.
kernel_sizekernel size for Correlation must be an odd number
max_displacementMax displacement of Correlation
stride1stride1 quantize data1 globally
stride2stride2 quantize data2 within the neighborhood centered around data1
pad_sizepad for Correlation
is_multiplyoperation type is either multiplication or subduction
Returns
new symbol
Symbol mxnet::cpp::cos ( const std::string &  symbol_name,
Symbol  data 
)
inline

Computes the element-wise cosine of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   cos([0, \pi/4, \pi/2]) = [1, 0.707, 0]

   The storage type of ``cos`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L63
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::cos ( Symbol  data)
inline

Computes the element-wise cosine of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   cos([0, \pi/4, \pi/2]) = [1, 0.707, 0]

   The storage type of ``cos`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L63
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::cosh ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the hyperbolic cosine of the input array, computed element-wise.

   .. math::
   cosh(x) = 0.5\times(exp(x) + exp(-x))

   The storage type of ``cosh`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L216
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::cosh ( Symbol  data)
inline

Returns the hyperbolic cosine of the input array, computed element-wise.

   .. math::
   cosh(x) = 0.5\times(exp(x) + exp(-x))

   The storage type of ``cosh`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L216
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::Crop ( const std::string &  symbol_name,
int  num_args,
Symbol  data,
Symbol  crop_like,
Shape  offset = Shape(0, 0),
Shape  h_w = Shape(0, 0),
bool  center_crop = false 
)
inline
Symbol mxnet::cpp::Crop ( const std::string &  symbol_name,
const std::vector< Symbol > &  data,
int  num_args,
Shape  offset = Shape(0,0),
Shape  h_w = Shape(0,0),
bool  center_crop = false 
)
inline
   .. note:: `Crop` is deprecated. Use `slice` instead.

   Crop the 2nd and 3rd dim of input data, with the corresponding size of h_w or
   with width and height of the second input symbol, i.e., with one input, we need
   specify the crop height and width, otherwise the second input symbol's size


   Defined in src/operator/crop.cc:L50
Parameters
symbol_namename of the resulting symbol
dataTensor or List of Tensors, the second input will be used as crop_like
num_argsNumber of inputs for crop, if equals one, then we will use the h_wfor crop height and width, else if equals two, then we will use the heightand width
offsetcrop offset coordinate: (y, x)
h_wcrop height and width: (h, w)
center_cropIf set to true, then it will use be the center_crop,or it will crop
Returns
new symbol
Symbol mxnet::cpp::Crop ( const std::vector< Symbol > &  data,
int  num_args,
Shape  offset = Shape(0,0),
Shape  h_w = Shape(0,0),
bool  center_crop = false 
)
inline
   .. note:: `Crop` is deprecated. Use `slice` instead.

   Crop the 2nd and 3rd dim of input data, with the corresponding size of h_w or
   with width and height of the second input symbol, i.e., with one input, we need
   specify the crop height and width, otherwise the second input symbol's size


   Defined in src/operator/crop.cc:L50
Parameters
dataTensor or List of Tensors, the second input will be used as crop_like
num_argsNumber of inputs for crop, if equals one, then we will use the h_wfor crop height and width, else if equals two, then we will use the heightand width
offsetcrop offset coordinate: (y, x)
h_wcrop height and width: (h, w)
center_cropIf set to true, then it will use be the center_crop,or it will crop
Returns
new symbol
Symbol mxnet::cpp::CTCLoss ( const std::string &  symbol_name,
Symbol  data,
Symbol  label,
Symbol  data_lengths,
Symbol  label_lengths,
bool  use_data_lengths = false,
bool  use_label_lengths = false,
CTCLossBlankLabel  blank_label = CTCLossBlankLabel::kFirst 
)
inline

Connectionist Temporal Classification Loss.

   .. note:: The existing alias ``contrib_CTCLoss`` is deprecated.

   The shapes of the inputs and outputs:

   - **data**: `(sequence_length, batch_size, alphabet_size)`
   - **label**: `(batch_size, label_sequence_length)`
   - **out**: `(batch_size)`

   The `data` tensor consists of sequences of activation vectors (without applying
   with i-th channel in the last dimension corresponding to i-th label
   for i between 0 and alphabet_size-1 (i.e always 0-indexed).
   Alphabet size should include one additional value reserved for blank label.
   When `blank_label` is ``"first"``, the ``0``-th channel is be reserved for
   activation of blank label, or otherwise if it is "last",
   reserved for blank label.

   ``label`` is an index matrix of integers. When `blank_label` is ``"first"``,
   the value 0 is then reserved for blank label, and should not be passed in this
   when `blank_label` is ``"last"``, the value `(alphabet_size-1)` is reserved for

   If a sequence of labels is shorter than *label_sequence_length*, use the special
   padding value at the end of the sequence to conform it to the correct
   length. The padding value is `0` when `blank_label` is ``"first"``, and `-1`

   For example, suppose the vocabulary is `[a, b, c]`, and in one batch we have
   'ba', 'cbb', and 'abac'. When `blank_label` is ``"first"``, we can index the
   `{'a': 1, 'b': 2, 'c': 3}`, and we reserve the 0-th channel for blank label in
   The resulting `label` tensor should be padded to be::

   [[2, 1, 0, 0], [3, 2, 2, 0], [1, 2, 1, 3]]

   When `blank_label` is ``"last"``, we can index the labels as
   `{'a': 0, 'b': 1, 'c': 2}`, and we reserve the channel index 3 for blank label
   The resulting `label` tensor should be padded to be::

   [[1, 0, -1, -1], [2, 1, 1, -1], [0, 1, 0, 2]]

   ``out`` is a list of CTC loss values, one per example in the batch.

   See *Connectionist Temporal Classification: Labelling Unsegmented
   Sequence Data with Recurrent Neural Networks*, A. Graves *et al*. for more
   information on the definition and the algorithm.



   Defined in src/operator/nn/ctc_loss.cc:L100
Parameters
symbol_namename of the resulting symbol
dataInput ndarray
labelGround-truth labels for the loss.
data_lengthsLengths of data for each of the samples. Only required when
label_lengthsLengths of labels for each of the samples. Only required when
use_data_lengthsWhether the data lenghts are decided by data_lengths. If
use_label_lengthsWhether the label lenghts are decided by label_lengths, or derived from padding_mask. If false, the lengths are derived from the first occurrence of the value of padding_mask. The value of padding_mask is 0 when first CTC label is reserved for blank, and -1 when last label is
blank_labelSet the label that is reserved for blank label.If "first", 0-th label is reserved, and label values for tokens in the vocabulary are between 1 and alphabet_size-1, and the padding mask is -1. If "last", last label value alphabet_size-1 is reserved for blank label instead, and label values for tokens in the vocabulary are between 0 and alphabet_size-2,
Returns
new symbol
Symbol mxnet::cpp::CTCLoss ( Symbol  data,
Symbol  label,
Symbol  data_lengths,
Symbol  label_lengths,
bool  use_data_lengths = false,
bool  use_label_lengths = false,
CTCLossBlankLabel  blank_label = CTCLossBlankLabel::kFirst 
)
inline

Connectionist Temporal Classification Loss.

   .. note:: The existing alias ``contrib_CTCLoss`` is deprecated.

   The shapes of the inputs and outputs:

   - **data**: `(sequence_length, batch_size, alphabet_size)`
   - **label**: `(batch_size, label_sequence_length)`
   - **out**: `(batch_size)`

   The `data` tensor consists of sequences of activation vectors (without applying
   with i-th channel in the last dimension corresponding to i-th label
   for i between 0 and alphabet_size-1 (i.e always 0-indexed).
   Alphabet size should include one additional value reserved for blank label.
   When `blank_label` is ``"first"``, the ``0``-th channel is be reserved for
   activation of blank label, or otherwise if it is "last",
   reserved for blank label.

   ``label`` is an index matrix of integers. When `blank_label` is ``"first"``,
   the value 0 is then reserved for blank label, and should not be passed in this
   when `blank_label` is ``"last"``, the value `(alphabet_size-1)` is reserved for

   If a sequence of labels is shorter than *label_sequence_length*, use the special
   padding value at the end of the sequence to conform it to the correct
   length. The padding value is `0` when `blank_label` is ``"first"``, and `-1`

   For example, suppose the vocabulary is `[a, b, c]`, and in one batch we have
   'ba', 'cbb', and 'abac'. When `blank_label` is ``"first"``, we can index the
   `{'a': 1, 'b': 2, 'c': 3}`, and we reserve the 0-th channel for blank label in
   The resulting `label` tensor should be padded to be::

   [[2, 1, 0, 0], [3, 2, 2, 0], [1, 2, 1, 3]]

   When `blank_label` is ``"last"``, we can index the labels as
   `{'a': 0, 'b': 1, 'c': 2}`, and we reserve the channel index 3 for blank label
   The resulting `label` tensor should be padded to be::

   [[1, 0, -1, -1], [2, 1, 1, -1], [0, 1, 0, 2]]

   ``out`` is a list of CTC loss values, one per example in the batch.

   See *Connectionist Temporal Classification: Labelling Unsegmented
   Sequence Data with Recurrent Neural Networks*, A. Graves *et al*. for more
   information on the definition and the algorithm.



   Defined in src/operator/nn/ctc_loss.cc:L100
Parameters
dataInput ndarray
labelGround-truth labels for the loss.
data_lengthsLengths of data for each of the samples. Only required when
label_lengthsLengths of labels for each of the samples. Only required when
use_data_lengthsWhether the data lenghts are decided by data_lengths. If
use_label_lengthsWhether the label lenghts are decided by label_lengths, or derived from padding_mask. If false, the lengths are derived from the first occurrence of the value of padding_mask. The value of padding_mask is 0 when first CTC label is reserved for blank, and -1 when last label is
blank_labelSet the label that is reserved for blank label.If "first", 0-th label is reserved, and label values for tokens in the vocabulary are between 1 and alphabet_size-1, and the padding mask is -1. If "last", last label value alphabet_size-1 is reserved for blank label instead, and label values for tokens in the vocabulary are between 0 and alphabet_size-2,
Returns
new symbol
Symbol mxnet::cpp::Custom ( const std::string &  symbol_name,
const std::vector< Symbol > &  data,
const std::string &  op_type 
)
inline

Apply a custom operator implemented in a frontend language (like Python).

   Custom operators should override required methods like `forward` and `backward`.
   The custom operator must be registered before it can be used.
   Please check the tutorial here: /versions/1.4.1/faq/new_op.html.



   Defined in src/operator/custom/custom.cc:L547
Parameters
symbol_namename of the resulting symbol
dataInput data for the custom operator.
op_typeName of the custom operator. This is the name that is passed to
Returns
new symbol
Symbol mxnet::cpp::Custom ( const std::vector< Symbol > &  data,
const std::string &  op_type 
)
inline

Apply a custom operator implemented in a frontend language (like Python).

   Custom operators should override required methods like `forward` and `backward`.
   The custom operator must be registered before it can be used.
   Please check the tutorial here: /versions/1.4.1/faq/new_op.html.



   Defined in src/operator/custom/custom.cc:L547
Parameters
dataInput data for the custom operator.
op_typeName of the custom operator. This is the name that is passed to
Returns
new symbol
Symbol mxnet::cpp::Deconvolution ( const std::string &  symbol_name,
Symbol  data,
Symbol  weight,
Symbol  bias,
Shape  kernel,
uint32_t  num_filter,
Shape  stride = Shape(),
Shape  dilate = Shape(),
Shape  pad = Shape(),
Shape  adj = Shape(),
Shape  target_shape = Shape(),
uint32_t  num_group = 1,
uint64_t  workspace = 512,
bool  no_bias = true,
DeconvolutionCudnnTune  cudnn_tune = DeconvolutionCudnnTune::kNone,
bool  cudnn_off = false,
DeconvolutionLayout  layout = DeconvolutionLayout::kNone 
)
inline

Computes 1D or 2D transposed convolution (aka fractionally strided convolution) of the input tensor. This operation can be seen as the gradient of Convolution operation with respect to its input. Convolution usually reduces the size of the input. Transposed convolution works the other way, going from a smaller

Parameters
symbol_namename of the resulting symbol
dataInput tensor to the deconvolution operation.
weightWeights representing the kernel.
biasBias added to the result after the deconvolution operation.
kernelDeconvolution kernel size: (w,), (h, w) or (d, h, w). This is same as
num_filterNumber of output filters.
strideThe stride used for the corresponding convolution: (w,), (h, w) or (d,
dilateDilation factor for each dimension of the input: (w,), (h, w) or (d, h,
padThe amount of implicit zero padding added during convolution for each dimension of the input: (w,), (h, w) or (d, h, w). (kernel-1)/2 is usually a good choice. If target_shape is set, pad will be ignored and a padding
adjAdjustment for output shape: (w,), (h, w) or (d, h, w). If target_shape
target_shapeShape of the output tensor: (w,), (h, w) or (d, h, w).
num_groupNumber of groups partition.
workspaceMaximum temporary workspace allowed (MB) in deconvolution.This parameter has two usages. When CUDNN is not used, it determines the effective batch size of the deconvolution kernel. When CUDNN is used, it controls the maximum temporary storage used for tuning the best CUDNN kernel when
no_biasWhether to disable bias parameter.
cudnn_tuneWhether to pick convolution algorithm by running performance test.
cudnn_offTurn off cudnn for this layer.
layoutSet layout for input, output and weight. Empty for default layout, NCW
Returns
new symbol
Symbol mxnet::cpp::Deconvolution ( Symbol  data,
Symbol  weight,
Symbol  bias,
Shape  kernel,
uint32_t  num_filter,
Shape  stride = Shape(),
Shape  dilate = Shape(),
Shape  pad = Shape(),
Shape  adj = Shape(),
Shape  target_shape = Shape(),
uint32_t  num_group = 1,
uint64_t  workspace = 512,
bool  no_bias = true,
DeconvolutionCudnnTune  cudnn_tune = DeconvolutionCudnnTune::kNone,
bool  cudnn_off = false,
DeconvolutionLayout  layout = DeconvolutionLayout::kNone 
)
inline

Computes 1D or 2D transposed convolution (aka fractionally strided convolution) of the input tensor. This operation can be seen as the gradient of Convolution operation with respect to its input. Convolution usually reduces the size of the input. Transposed convolution works the other way, going from a smaller

Parameters
dataInput tensor to the deconvolution operation.
weightWeights representing the kernel.
biasBias added to the result after the deconvolution operation.
kernelDeconvolution kernel size: (w,), (h, w) or (d, h, w). This is same as
num_filterNumber of output filters.
strideThe stride used for the corresponding convolution: (w,), (h, w) or (d,
dilateDilation factor for each dimension of the input: (w,), (h, w) or (d, h,
padThe amount of implicit zero padding added during convolution for each dimension of the input: (w,), (h, w) or (d, h, w). (kernel-1)/2 is usually a good choice. If target_shape is set, pad will be ignored and a padding
adjAdjustment for output shape: (w,), (h, w) or (d, h, w). If target_shape
target_shapeShape of the output tensor: (w,), (h, w) or (d, h, w).
num_groupNumber of groups partition.
workspaceMaximum temporary workspace allowed (MB) in deconvolution.This parameter has two usages. When CUDNN is not used, it determines the effective batch size of the deconvolution kernel. When CUDNN is used, it controls the maximum temporary storage used for tuning the best CUDNN kernel when
no_biasWhether to disable bias parameter.
cudnn_tuneWhether to pick convolution algorithm by running performance test.
cudnn_offTurn off cudnn for this layer.
layoutSet layout for input, output and weight. Empty for default layout, NCW
Returns
new symbol
Symbol mxnet::cpp::degrees ( const std::string &  symbol_name,
Symbol  data 
)
inline

Converts each element of the input array from radians to degrees.

   .. math::
   degrees([0, \pi/2, \pi, 3\pi/2, 2\pi]) = [0, 90, 180, 270, 360]

   The storage type of ``degrees`` output depends upon the input storage type:

   - degrees(default) = default
   - degrees(row_sparse) = row_sparse
   - degrees(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L163
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::degrees ( Symbol  data)
inline

Converts each element of the input array from radians to degrees.

   .. math::
   degrees([0, \pi/2, \pi, 3\pi/2, 2\pi]) = [0, 90, 180, 270, 360]

   The storage type of ``degrees`` output depends upon the input storage type:

   - degrees(default) = default
   - degrees(row_sparse) = row_sparse
   - degrees(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L163
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::depth_to_space ( const std::string &  symbol_name,
Symbol  data,
int  block_size 
)
inline

Rearranges(permutes) data from depth into blocks of spatial data. Similar to ONNX DepthToSpace operator: https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace. The output is a new tensor where the values from depth dimension are moved in to height and width dimension. The reverse of this operation is

.. math::

{gather*} x = reshape(x, [N, block_size, block_size, C / (block_size ^ 2), H * x = transpose(x , [0, 3, 4, 1, 5, 2]) \ y = reshape(x , [N, C / (block_size ^ 2), H * block_size, W * {gather*}

where :math:x is an input tensor with default layout as :math:[N, C, H, W]: and :math:y is the output tensor of layout :math:`[N, C / (block_size ^ 2),

Example::

x = [[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17]], [[18, 19, 20], [21, 22, 23]]]]

depth_to_space(x, 2) = [[[[0, 6, 1, 7, 2, 8], [12, 18, 13, 19, 14, 20], [3, 9, 4, 10, 5, 11], [15, 21, 16, 22, 17, 23]]]]

   Defined in src/operator/tensor/matrix_op.cc:L946
Parameters
symbol_namename of the resulting symbol
dataInput ndarray
block_sizeBlocks of [block_size. block_size] are moved
Returns
new symbol
Symbol mxnet::cpp::depth_to_space ( Symbol  data,
int  block_size 
)
inline

Rearranges(permutes) data from depth into blocks of spatial data. Similar to ONNX DepthToSpace operator: https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace. The output is a new tensor where the values from depth dimension are moved in to height and width dimension. The reverse of this operation is

.. math::

{gather*} x = reshape(x, [N, block_size, block_size, C / (block_size ^ 2), H * x = transpose(x , [0, 3, 4, 1, 5, 2]) \ y = reshape(x , [N, C / (block_size ^ 2), H * block_size, W * {gather*}

where :math:x is an input tensor with default layout as :math:[N, C, H, W]: and :math:y is the output tensor of layout :math:`[N, C / (block_size ^ 2),

Example::

x = [[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17]], [[18, 19, 20], [21, 22, 23]]]]

depth_to_space(x, 2) = [[[[0, 6, 1, 7, 2, 8], [12, 18, 13, 19, 14, 20], [3, 9, 4, 10, 5, 11], [15, 21, 16, 22, 17, 23]]]]

   Defined in src/operator/tensor/matrix_op.cc:L946
Parameters
dataInput ndarray
block_sizeBlocks of [block_size. block_size] are moved
Returns
new symbol
Symbol mxnet::cpp::diag ( const std::string &  symbol_name,
Symbol  data,
int  k = 0,
int  axis1 = 0,
int  axis2 = 1 
)
inline

Extracts a diagonal or constructs a diagonal array.

   ``diag``'s behavior depends on the input array dimensions:

   - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other
   - N-D arrays: extracts the diagonals of the sub-arrays with axes specified by
   The output shape would be decided by removing the axes numbered ``axis1`` and
   input shape and appending to the result a new axis with the size of the

   For example, when the input shape is `(2, 3, 4, 5)`, ``axis1`` and ``axis2``
   respectively and ``k`` is 0, the resulting shape would be `(3, 5, 2)`.

   Examples::

   x = [[1, 2, 3],
   [4, 5, 6]]

   diag(x) = [1, 5]

   diag(x, k=1) = [2, 6]

   diag(x, k=-1) = [4]

   x = [1, 2, 3]

   diag(x) = [[1, 0, 0],
   [0, 2, 0],
   [0, 0, 3]]

   diag(x, k=1) = [[0, 1, 0],
   [0, 0, 2],
   [0, 0, 0]]

   diag(x, k=-1) = [[0, 0, 0],
   [1, 0, 0],
   [0, 2, 0]]

   x = [[[1, 2],
   [3, 4]],

   [[5, 6],
   [7, 8]]]

   diag(x) = [[1, 7],
   [2, 8]]

   diag(x, k=1) = [[3],
   [4]]

   diag(x, axis1=-2, axis2=-1) = [[1, 4],
   [5, 8]]



   Defined in src/operator/tensor/diag_op.cc:L87
Parameters
symbol_namename of the resulting symbol
dataInput ndarray
kDiagonal in question. The default is 0. Use k>0 for diagonals above the main diagonal, and k<0 for diagonals below the main diagonal. If input has shape (S0
axis1The first axis of the sub-arrays of interest. Ignored when the input is a
axis2The second axis of the sub-arrays of interest. Ignored when the input is
Returns
new symbol
Symbol mxnet::cpp::diag ( Symbol  data,
int  k = 0,
int  axis1 = 0,
int  axis2 = 1 
)
inline

Extracts a diagonal or constructs a diagonal array.

   ``diag``'s behavior depends on the input array dimensions:

   - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other
   - N-D arrays: extracts the diagonals of the sub-arrays with axes specified by
   The output shape would be decided by removing the axes numbered ``axis1`` and
   input shape and appending to the result a new axis with the size of the

   For example, when the input shape is `(2, 3, 4, 5)`, ``axis1`` and ``axis2``
   respectively and ``k`` is 0, the resulting shape would be `(3, 5, 2)`.

   Examples::

   x = [[1, 2, 3],
   [4, 5, 6]]

   diag(x) = [1, 5]

   diag(x, k=1) = [2, 6]

   diag(x, k=-1) = [4]

   x = [1, 2, 3]

   diag(x) = [[1, 0, 0],
   [0, 2, 0],
   [0, 0, 3]]

   diag(x, k=1) = [[0, 1, 0],
   [0, 0, 2],
   [0, 0, 0]]

   diag(x, k=-1) = [[0, 0, 0],
   [1, 0, 0],
   [0, 2, 0]]

   x = [[[1, 2],
   [3, 4]],

   [[5, 6],
   [7, 8]]]

   diag(x) = [[1, 7],
   [2, 8]]

   diag(x, k=1) = [[3],
   [4]]

   diag(x, axis1=-2, axis2=-1) = [[1, 4],
   [5, 8]]



   Defined in src/operator/tensor/diag_op.cc:L87
Parameters
dataInput ndarray
kDiagonal in question. The default is 0. Use k>0 for diagonals above the main diagonal, and k<0 for diagonals below the main diagonal. If input has shape (S0
axis1The first axis of the sub-arrays of interest. Ignored when the input is a
axis2The second axis of the sub-arrays of interest. Ignored when the input is
Returns
new symbol
Symbol mxnet::cpp::dot ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs,
bool  transpose_a = false,
bool  transpose_b = false,
DotForwardStype  forward_stype = DotForwardStype::kNone 
)
inline

Dot product of two arrays.

   ``dot``'s behavior depends on the input array dimensions:

   - 1-D arrays: inner product of vectors
   - 2-D arrays: matrix multiplication
   - N-D arrays: a sum product over the last axis of the first input and the first
   axis of the second input

   For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape
   result array will have shape `(n,m,r,s)`. It is computed by::

   dot(x,y)[i,j,a,b] = sum(x[i,j,:]*y[:,a,b])

   Example::

   x = reshape([0,1,2,3,4,5,6,7], shape=(2,2,2))
   y = reshape([7,6,5,4,3,2,1,0], shape=(2,2,2))
   dot(x,y)[0,0,1,1] = 0
   sum(x[0,0,:]*y[:,1,1]) = 0

   The storage type of ``dot`` output depends on storage types of inputs,
   forward_stype option for output storage type. Implemented sparse operations

   - dot(default, default, transpose_a=True/False, transpose_b=True/False) =
   - dot(csr, default, transpose_a=True) = default
   - dot(csr, default, transpose_a=True) = row_sparse
   - dot(csr, default) = default
   - dot(csr, row_sparse) = default
   - dot(default, csr) = csr (CPU only)
   - dot(default, csr, forward_stype='default') = default
   - dot(default, csr, transpose_b=True, forward_stype='default') = default

   If the combination of input storage types and forward_stype does not match any
   above patterns, ``dot`` will fallback and generate output with default storage.

   .. Note::

   If the storage type of the lhs is "csr", the storage type of gradient w.r.t rhs
   "row_sparse". Only a subset of optimizers support sparse gradients, including
   and Adam. Note that by default lazy updates is turned on, which may perform
   from standard updates. For more details, please check the Optimization API at:
   /api/python/optimization/optimization.html



   Defined in src/operator/tensor/dot.cc:L77
Parameters
symbol_namename of the resulting symbol
lhsThe first input
rhsThe second input
transpose_aIf true then transpose the first input before dot.
transpose_bIf true then transpose the second input before dot.
forward_stypeThe desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still
Returns
new symbol
Symbol mxnet::cpp::dot ( Symbol  lhs,
Symbol  rhs,
bool  transpose_a = false,
bool  transpose_b = false,
DotForwardStype  forward_stype = DotForwardStype::kNone 
)
inline

Dot product of two arrays.

   ``dot``'s behavior depends on the input array dimensions:

   - 1-D arrays: inner product of vectors
   - 2-D arrays: matrix multiplication
   - N-D arrays: a sum product over the last axis of the first input and the first
   axis of the second input

   For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape
   result array will have shape `(n,m,r,s)`. It is computed by::

   dot(x,y)[i,j,a,b] = sum(x[i,j,:]*y[:,a,b])

   Example::

   x = reshape([0,1,2,3,4,5,6,7], shape=(2,2,2))
   y = reshape([7,6,5,4,3,2,1,0], shape=(2,2,2))
   dot(x,y)[0,0,1,1] = 0
   sum(x[0,0,:]*y[:,1,1]) = 0

   The storage type of ``dot`` output depends on storage types of inputs,
   forward_stype option for output storage type. Implemented sparse operations

   - dot(default, default, transpose_a=True/False, transpose_b=True/False) =
   - dot(csr, default, transpose_a=True) = default
   - dot(csr, default, transpose_a=True) = row_sparse
   - dot(csr, default) = default
   - dot(csr, row_sparse) = default
   - dot(default, csr) = csr (CPU only)
   - dot(default, csr, forward_stype='default') = default
   - dot(default, csr, transpose_b=True, forward_stype='default') = default

   If the combination of input storage types and forward_stype does not match any
   above patterns, ``dot`` will fallback and generate output with default storage.

   .. Note::

   If the storage type of the lhs is "csr", the storage type of gradient w.r.t rhs
   "row_sparse". Only a subset of optimizers support sparse gradients, including
   and Adam. Note that by default lazy updates is turned on, which may perform
   from standard updates. For more details, please check the Optimization API at:
   /api/python/optimization/optimization.html



   Defined in src/operator/tensor/dot.cc:L77
Parameters
lhsThe first input
rhsThe second input
transpose_aIf true then transpose the first input before dot.
transpose_bIf true then transpose the second input before dot.
forward_stypeThe desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still
Returns
new symbol
Symbol mxnet::cpp::Dropout ( const std::string &  symbol_name,
Symbol  data,
mx_float  p = 0.5,
DropoutMode  mode = DropoutMode::kTraining,
Shape  axes = Shape() 
)
inline

Applies dropout operation to input array.

   - During training, each element of the input is set to zero with probability p.
   The whole array is rescaled by :math:`1/(1-p)` to keep the expected
   sum of the input unchanged.

   - During testing, this operator does not change the input if mode is 'training'.
   If mode is 'always', the same computaion as during training will be applied.

   Example::

   random.seed(998)
   input_array = array([[3., 0.5,  -0.5,  2., 7.],
   [2., -0.4,   7.,  3., 0.2]])
   a = symbol.Variable('a')
   dropout = symbol.Dropout(a, p = 0.2)
   executor = dropout.simple_bind(a = input_array.shape)

   ## If training
   executor.forward(is_train = True, a = input_array)
   executor.outputs
   [[ 3.75   0.625 -0.     2.5    8.75 ]
   [ 2.5   -0.5    8.75   3.75   0.   ]]

   ## If testing
   executor.forward(is_train = False, a = input_array)
   executor.outputs
   [[ 3.     0.5   -0.5    2.     7.   ]
   [ 2.    -0.4    7.     3.     0.2  ]]


   Defined in src/operator/nn/dropout.cc:L76
Parameters
symbol_namename of the resulting symbol
dataInput array to which dropout will be applied.
pFraction of the input that gets dropped out during training time.
modeWhether to only turn on dropout during training or to also turn on for
axesAxes for variational dropout kernel.
Returns
new symbol
Symbol mxnet::cpp::Dropout ( Symbol  data,
mx_float  p = 0.5,
DropoutMode  mode = DropoutMode::kTraining,
Shape  axes = Shape() 
)
inline

Applies dropout operation to input array.

   - During training, each element of the input is set to zero with probability p.
   The whole array is rescaled by :math:`1/(1-p)` to keep the expected
   sum of the input unchanged.

   - During testing, this operator does not change the input if mode is 'training'.
   If mode is 'always', the same computaion as during training will be applied.

   Example::

   random.seed(998)
   input_array = array([[3., 0.5,  -0.5,  2., 7.],
   [2., -0.4,   7.,  3., 0.2]])
   a = symbol.Variable('a')
   dropout = symbol.Dropout(a, p = 0.2)
   executor = dropout.simple_bind(a = input_array.shape)

   ## If training
   executor.forward(is_train = True, a = input_array)
   executor.outputs
   [[ 3.75   0.625 -0.     2.5    8.75 ]
   [ 2.5   -0.5    8.75   3.75   0.   ]]

   ## If testing
   executor.forward(is_train = False, a = input_array)
   executor.outputs
   [[ 3.     0.5   -0.5    2.     7.   ]
   [ 2.    -0.4    7.     3.     0.2  ]]


   Defined in src/operator/nn/dropout.cc:L76
Parameters
dataInput array to which dropout will be applied.
pFraction of the input that gets dropped out during training time.
modeWhether to only turn on dropout during training or to also turn on for
axesAxes for variational dropout kernel.
Returns
new symbol
Symbol mxnet::cpp::elemwise_add ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Adds arguments element-wise.

   The storage type of ``elemwise_add`` output depends on storage types of inputs

   - elemwise_add(row_sparse, row_sparse) = row_sparse
   - elemwise_add(csr, csr) = csr
   - elemwise_add(default, csr) = default
   - elemwise_add(csr, default) = default
   - elemwise_add(default, rsp) = default
   - elemwise_add(rsp, default) = default
   - otherwise, ``elemwise_add`` generates output with default storage
Parameters
symbol_namename of the resulting symbol
lhsfirst input
rhssecond input
Returns
new symbol
Symbol mxnet::cpp::elemwise_add ( Symbol  lhs,
Symbol  rhs 
)
inline

Adds arguments element-wise.

   The storage type of ``elemwise_add`` output depends on storage types of inputs

   - elemwise_add(row_sparse, row_sparse) = row_sparse
   - elemwise_add(csr, csr) = csr
   - elemwise_add(default, csr) = default
   - elemwise_add(csr, default) = default
   - elemwise_add(default, rsp) = default
   - elemwise_add(rsp, default) = default
   - otherwise, ``elemwise_add`` generates output with default storage
Parameters
lhsfirst input
rhssecond input
Returns
new symbol
Symbol mxnet::cpp::elemwise_div ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Divides arguments element-wise.

   The storage type of ``elemwise_div`` output is always dense
Parameters
symbol_namename of the resulting symbol
lhsfirst input
rhssecond input
Returns
new symbol
Symbol mxnet::cpp::elemwise_div ( Symbol  lhs,
Symbol  rhs 
)
inline

Divides arguments element-wise.

   The storage type of ``elemwise_div`` output is always dense
Parameters
lhsfirst input
rhssecond input
Returns
new symbol
Symbol mxnet::cpp::elemwise_mul ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Multiplies arguments element-wise.

   The storage type of ``elemwise_mul`` output depends on storage types of inputs

   - elemwise_mul(default, default) = default
   - elemwise_mul(row_sparse, row_sparse) = row_sparse
   - elemwise_mul(default, row_sparse) = row_sparse
   - elemwise_mul(row_sparse, default) = row_sparse
   - elemwise_mul(csr, csr) = csr
   - otherwise, ``elemwise_mul`` generates output with default storage
Parameters
symbol_namename of the resulting symbol
lhsfirst input
rhssecond input
Returns
new symbol
Symbol mxnet::cpp::elemwise_mul ( Symbol  lhs,
Symbol  rhs 
)
inline

Multiplies arguments element-wise.

   The storage type of ``elemwise_mul`` output depends on storage types of inputs

   - elemwise_mul(default, default) = default
   - elemwise_mul(row_sparse, row_sparse) = row_sparse
   - elemwise_mul(default, row_sparse) = row_sparse
   - elemwise_mul(row_sparse, default) = row_sparse
   - elemwise_mul(csr, csr) = csr
   - otherwise, ``elemwise_mul`` generates output with default storage
Parameters
lhsfirst input
rhssecond input
Returns
new symbol
Symbol mxnet::cpp::elemwise_sub ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Subtracts arguments element-wise.

   The storage type of ``elemwise_sub`` output depends on storage types of inputs

   - elemwise_sub(row_sparse, row_sparse) = row_sparse
   - elemwise_sub(csr, csr) = csr
   - elemwise_sub(default, csr) = default
   - elemwise_sub(csr, default) = default
   - elemwise_sub(default, rsp) = default
   - elemwise_sub(rsp, default) = default
   - otherwise, ``elemwise_sub`` generates output with default storage
Parameters
symbol_namename of the resulting symbol
lhsfirst input
rhssecond input
Returns
new symbol
Symbol mxnet::cpp::elemwise_sub ( Symbol  lhs,
Symbol  rhs 
)
inline

Subtracts arguments element-wise.

   The storage type of ``elemwise_sub`` output depends on storage types of inputs

   - elemwise_sub(row_sparse, row_sparse) = row_sparse
   - elemwise_sub(csr, csr) = csr
   - elemwise_sub(default, csr) = default
   - elemwise_sub(csr, default) = default
   - elemwise_sub(default, rsp) = default
   - elemwise_sub(rsp, default) = default
   - otherwise, ``elemwise_sub`` generates output with default storage
Parameters
lhsfirst input
rhssecond input
Returns
new symbol
Symbol mxnet::cpp::Embedding ( const std::string &  symbol_name,
Symbol  data,
Symbol  weight,
int  input_dim,
int  output_dim,
EmbeddingDtype  dtype = EmbeddingDtype::kFloat32,
bool  sparse_grad = false 
)
inline

Maps integer indices to vector representations (embeddings).

   This operator maps words to real-valued vectors in a high-dimensional space,
   called word embeddings. These embeddings can capture semantic and syntactic
   For example, it has been noted that in the learned embedding spaces, similar
   to be close to each other and dissimilar words far apart.

   For an input array of shape (d1, ..., dK),
   the shape of an output array is (d1, ..., dK, output_dim).
   All the input values should be integers in the range [0, input_dim).

   If the input_dim is ip0 and output_dim is op0, then shape of the embedding
   (ip0, op0).

   By default, if any index mentioned is too large, it is replaced by the index
   the last vector in an embedding matrix.

   Examples::

   input_dim = 4
   output_dim = 5

   // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3)
   y = [[  0.,   1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.,   9.],
   [ 10.,  11.,  12.,  13.,  14.],
   [ 15.,  16.,  17.,  18.,  19.]]

   // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)]
   x = [[ 1.,  3.],
   [ 0.,  2.]]

   // Mapped input x to its vector representation y.
   Embedding(x, y, 4, 5) = [[[  5.,   6.,   7.,   8.,   9.],
   [ 15.,  16.,  17.,  18.,  19.]],

   [[  0.,   1.,   2.,   3.,   4.],
   [ 10.,  11.,  12.,  13.,  14.]]]


   The storage type of weight can be either row_sparse or default.

   .. Note::

   If "sparse_grad" is set to True, the storage type of gradient w.r.t weights
   "row_sparse". Only a subset of optimizers support sparse gradients, including
   and Adam. Note that by default lazy updates is turned on, which may perform
   from standard updates. For more details, please check the Optimization API at:
   /api/python/optimization/optimization.html



   Defined in src/operator/tensor/indexing_op.cc:L519
Parameters
symbol_namename of the resulting symbol
dataThe input array to the embedding operator.
weightThe embedding weight matrix.
input_dimVocabulary size of the input indices.
output_dimDimension of the embedding vectors.
dtypeData type of weight.
sparse_gradCompute row sparse gradient in the backward calculation. If set to
Returns
new symbol
Symbol mxnet::cpp::Embedding ( Symbol  data,
Symbol  weight,
int  input_dim,
int  output_dim,
EmbeddingDtype  dtype = EmbeddingDtype::kFloat32,
bool  sparse_grad = false 
)
inline

Maps integer indices to vector representations (embeddings).

   This operator maps words to real-valued vectors in a high-dimensional space,
   called word embeddings. These embeddings can capture semantic and syntactic
   For example, it has been noted that in the learned embedding spaces, similar
   to be close to each other and dissimilar words far apart.

   For an input array of shape (d1, ..., dK),
   the shape of an output array is (d1, ..., dK, output_dim).
   All the input values should be integers in the range [0, input_dim).

   If the input_dim is ip0 and output_dim is op0, then shape of the embedding
   (ip0, op0).

   By default, if any index mentioned is too large, it is replaced by the index
   the last vector in an embedding matrix.

   Examples::

   input_dim = 4
   output_dim = 5

   // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3)
   y = [[  0.,   1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.,   9.],
   [ 10.,  11.,  12.,  13.,  14.],
   [ 15.,  16.,  17.,  18.,  19.]]

   // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)]
   x = [[ 1.,  3.],
   [ 0.,  2.]]

   // Mapped input x to its vector representation y.
   Embedding(x, y, 4, 5) = [[[  5.,   6.,   7.,   8.,   9.],
   [ 15.,  16.,  17.,  18.,  19.]],

   [[  0.,   1.,   2.,   3.,   4.],
   [ 10.,  11.,  12.,  13.,  14.]]]


   The storage type of weight can be either row_sparse or default.

   .. Note::

   If "sparse_grad" is set to True, the storage type of gradient w.r.t weights
   "row_sparse". Only a subset of optimizers support sparse gradients, including
   and Adam. Note that by default lazy updates is turned on, which may perform
   from standard updates. For more details, please check the Optimization API at:
   /api/python/optimization/optimization.html



   Defined in src/operator/tensor/indexing_op.cc:L519
Parameters
dataThe input array to the embedding operator.
weightThe embedding weight matrix.
input_dimVocabulary size of the input indices.
output_dimDimension of the embedding vectors.
dtypeData type of weight.
sparse_gradCompute row sparse gradient in the backward calculation. If set to
Returns
new symbol
Symbol mxnet::cpp::erf ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise gauss error function of the input.

   Example::

   erf([0, -1., 10.]) = [0., -0.8427, 1.]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L897
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::erf ( Symbol  data)
inline

Returns element-wise gauss error function of the input.

   Example::

   erf([0, -1., 10.]) = [0., -0.8427, 1.]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L897
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::exp ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise exponential value of the input.

   .. math::
   exp(x) = e^x \approx 2.718^x

   Example::

   exp([0, 1, 2]) = [1., 2.71828175, 7.38905621]

   The storage type of ``exp`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L939
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::exp ( Symbol  data)
inline

Returns element-wise exponential value of the input.

   .. math::
   exp(x) = e^x \approx 2.718^x

   Example::

   exp([0, 1, 2]) = [1., 2.71828175, 7.38905621]

   The storage type of ``exp`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L939
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::expand_dims ( const std::string &  symbol_name,
Symbol  data,
int  axis 
)
inline

Inserts a new axis of size 1 into the array shape

   For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)``
   will return a new array with shape ``(2,1,3,4)``.



   Defined in src/operator/tensor/matrix_op.cc:L348
Parameters
symbol_namename of the resulting symbol
dataSource input
axisPosition where new axis is to be inserted. Suppose that the input NDArray's dimension is ndim, the range of the inserted axis is `[-ndim,
Returns
new symbol
Symbol mxnet::cpp::expand_dims ( Symbol  data,
int  axis 
)
inline

Inserts a new axis of size 1 into the array shape

   For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)``
   will return a new array with shape ``(2,1,3,4)``.



   Defined in src/operator/tensor/matrix_op.cc:L348
Parameters
dataSource input
axisPosition where new axis is to be inserted. Suppose that the input NDArray's dimension is ndim, the range of the inserted axis is `[-ndim,
Returns
new symbol
Symbol mxnet::cpp::expm1 ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns exp(x) - 1 computed element-wise on the input.

   This function provides greater precision than ``exp(x) - 1`` for small values

   The storage type of ``expm1`` output depends upon the input storage type:

   - expm1(default) = default
   - expm1(row_sparse) = row_sparse
   - expm1(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1018
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::expm1 ( Symbol  data)
inline

Returns exp(x) - 1 computed element-wise on the input.

   This function provides greater precision than ``exp(x) - 1`` for small values

   The storage type of ``expm1`` output depends upon the input storage type:

   - expm1(default) = default
   - expm1(row_sparse) = row_sparse
   - expm1(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1018
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::fill_element_0index ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  mhs,
Symbol  rhs 
)
inline

Fill one element of each line(row for python, column for R/Julia) in lhs according to index indicated by rhs and values indicated by mhs. This function

Parameters
symbol_namename of the resulting symbol
lhsLeft operand to the function.
mhsMiddle operand to the function.
rhsRight operand to the function.
Returns
new symbol
Symbol mxnet::cpp::fill_element_0index ( Symbol  lhs,
Symbol  mhs,
Symbol  rhs 
)
inline

Fill one element of each line(row for python, column for R/Julia) in lhs according to index indicated by rhs and values indicated by mhs. This function

Parameters
lhsLeft operand to the function.
mhsMiddle operand to the function.
rhsRight operand to the function.
Returns
new symbol
Symbol mxnet::cpp::fix ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise rounded value to the nearest \ integer towards zero of the input.

Example::

fix([-2.1, -1.9, 1.9, 2.1]) = [-2., -1., 1., 2.]

The storage type of fix output depends upon the input storage type:

  • fix(default) = default
  • fix(row_sparse) = row_sparse
  • fix(csr) = csr
   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L797
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::fix ( Symbol  data)
inline

Returns element-wise rounded value to the nearest \ integer towards zero of the input.

Example::

fix([-2.1, -1.9, 1.9, 2.1]) = [-2., -1., 1., 2.]

The storage type of fix output depends upon the input storage type:

  • fix(default) = default
  • fix(row_sparse) = row_sparse
  • fix(csr) = csr
   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L797
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::Flatten ( const std::string &  symbol_name,
Symbol  data 
)
inline

Flattens the input array into a 2-D array by collapsing the higher dimensions.

   .. note:: `Flatten` is deprecated. Use `flatten` instead.

   For an input array with shape ``(d1, d2, ..., dk)``, `flatten` operation
   the input array into an output array of shape ``(d1, d2*...*dk)``.

   Note that the bahavior of this function is different from numpy.ndarray.flatten,
   which behaves similar to mxnet.ndarray.reshape((-1,)).

   Example::

   x = [[
   [1,2,3],
   [4,5,6],
   [7,8,9]
   ],
   [    [1,2,3],
   [4,5,6],
   [7,8,9]
   ]],

   flatten(x) = [[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
   [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]



   Defined in src/operator/tensor/matrix_op.cc:L259
Parameters
symbol_namename of the resulting symbol
dataInput array.
Returns
new symbol
Symbol mxnet::cpp::Flatten ( Symbol  data)
inline

Flattens the input array into a 2-D array by collapsing the higher dimensions.

   .. note:: `Flatten` is deprecated. Use `flatten` instead.

   For an input array with shape ``(d1, d2, ..., dk)``, `flatten` operation
   the input array into an output array of shape ``(d1, d2*...*dk)``.

   Note that the bahavior of this function is different from numpy.ndarray.flatten,
   which behaves similar to mxnet.ndarray.reshape((-1,)).

   Example::

   x = [[
   [1,2,3],
   [4,5,6],
   [7,8,9]
   ],
   [    [1,2,3],
   [4,5,6],
   [7,8,9]
   ]],

   flatten(x) = [[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
   [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]



   Defined in src/operator/tensor/matrix_op.cc:L259
Parameters
dataInput array.
Returns
new symbol
Symbol mxnet::cpp::floor ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise floor of the input.

   The floor of the scalar x is the largest integer i, such that i <= x.

   Example::

   floor([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-3., -2.,  1.,  1.,  2.]

   The storage type of ``floor`` output depends upon the input storage type:

   - floor(default) = default
   - floor(row_sparse) = row_sparse
   - floor(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L759
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::floor ( Symbol  data)
inline

Returns element-wise floor of the input.

   The floor of the scalar x is the largest integer i, such that i <= x.

   Example::

   floor([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-3., -2.,  1.,  1.,  2.]

   The storage type of ``floor`` output depends upon the input storage type:

   - floor(default) = default
   - floor(row_sparse) = row_sparse
   - floor(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L759
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::ftml_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  d,
Symbol  v,
Symbol  z,
mx_float  lr,
int  t,
mx_float  beta1 = 0.6,
mx_float  beta2 = 0.999,
double  epsilon = 1e-08,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_grad = -1 
)
inline

The FTML optimizer described in FTML - Follow the Moving Leader in Deep Learning, available at http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf.

.. math::

g_t = J(W_{t-1})\ v_t = v_{t-1} + (1 - ) g_t^2\ d_t = { 1 - ^t }{ } ({ { v_t }{ 1 - ^t } } = d_t - d_{t-1} z_t = z_{ t-1 } + (1 - ^t) g_t - W_{t-1} W_t = - { z_t }{ d_t }

   Defined in src/operator/optimizer_op.cc:L447
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
dInternal state d_t
vInternal state v_t
zInternal state z_t
lrLearning rate.
tNumber of update.
beta1Generally close to 0.5.
beta2Generally close to 1.
epsilonEpsilon to prevent div 0.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
Returns
new symbol
Symbol mxnet::cpp::ftml_update ( Symbol  weight,
Symbol  grad,
Symbol  d,
Symbol  v,
Symbol  z,
mx_float  lr,
int  t,
mx_float  beta1 = 0.6,
mx_float  beta2 = 0.999,
double  epsilon = 1e-08,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_grad = -1 
)
inline

The FTML optimizer described in FTML - Follow the Moving Leader in Deep Learning, available at http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf.

.. math::

g_t = J(W_{t-1})\ v_t = v_{t-1} + (1 - ) g_t^2\ d_t = { 1 - ^t }{ } ({ { v_t }{ 1 - ^t } } = d_t - d_{t-1} z_t = z_{ t-1 } + (1 - ^t) g_t - W_{t-1} W_t = - { z_t }{ d_t }

   Defined in src/operator/optimizer_op.cc:L447
Parameters
weightWeight
gradGradient
dInternal state d_t
vInternal state v_t
zInternal state z_t
lrLearning rate.
tNumber of update.
beta1Generally close to 0.5.
beta2Generally close to 1.
epsilonEpsilon to prevent div 0.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
Returns
new symbol
Symbol mxnet::cpp::ftrl_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  z,
Symbol  n,
mx_float  lr,
mx_float  lamda1 = 0.01,
mx_float  beta = 1,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1 
)
inline

Update function for Ftrl optimizer. Referenced from Ad Click Prediction: a View from the Trenches, available at http://dl.acm.org/citation.cfm?id=2488200.

It updates the weights using::

rescaled_grad = clip(grad * rescale_grad, clip_gradient) z += rescaled_grad - (sqrt(n + rescaled_grad**2) - sqrt(n)) * weight / n += rescaled_grad**2 w = (sign(z) * lamda1 - z) / ((beta + sqrt(n)) / learning_rate + wd) * (abs(z)

If w, z and n are all of row_sparse storage type, only the row slices whose indices appear in grad.indices are updated (for w, z

for row in grad.indices: rescaled_grad[row] = clip(grad[row] * rescale_grad, clip_gradient) z[row] += rescaled_grad[row] - (sqrt(n[row] + rescaled_grad[row]**2) - n[row] += rescaled_grad[row]**2 w[row] = (sign(z[row]) * lamda1 - z[row]) / ((beta + sqrt(n[row])) /

   Defined in src/operator/optimizer_op.cc:L632
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
zz
nSquare of grad
lrLearning rate
lamda1The L1 regularization coefficient.
betaPer-Coordinate Learning Rate beta.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
Returns
new symbol
Symbol mxnet::cpp::ftrl_update ( Symbol  weight,
Symbol  grad,
Symbol  z,
Symbol  n,
mx_float  lr,
mx_float  lamda1 = 0.01,
mx_float  beta = 1,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1 
)
inline

Update function for Ftrl optimizer. Referenced from Ad Click Prediction: a View from the Trenches, available at http://dl.acm.org/citation.cfm?id=2488200.

It updates the weights using::

rescaled_grad = clip(grad * rescale_grad, clip_gradient) z += rescaled_grad - (sqrt(n + rescaled_grad**2) - sqrt(n)) * weight / n += rescaled_grad**2 w = (sign(z) * lamda1 - z) / ((beta + sqrt(n)) / learning_rate + wd) * (abs(z)

If w, z and n are all of row_sparse storage type, only the row slices whose indices appear in grad.indices are updated (for w, z

for row in grad.indices: rescaled_grad[row] = clip(grad[row] * rescale_grad, clip_gradient) z[row] += rescaled_grad[row] - (sqrt(n[row] + rescaled_grad[row]**2) - n[row] += rescaled_grad[row]**2 w[row] = (sign(z[row]) * lamda1 - z[row]) / ((beta + sqrt(n[row])) /

   Defined in src/operator/optimizer_op.cc:L632
Parameters
weightWeight
gradGradient
zz
nSquare of grad
lrLearning rate
lamda1The L1 regularization coefficient.
betaPer-Coordinate Learning Rate beta.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
Returns
new symbol
Symbol mxnet::cpp::FullyConnected ( const std::string &  symbol_name,
Symbol  data,
Symbol  weight,
Symbol  bias,
int  num_hidden,
bool  no_bias = false,
bool  flatten = true 
)
inline

Applies a linear transformation: :math:Y = XW^T + b.

   If ``flatten`` is set to be true, then the shapes are:

   - **data**: `(batch_size, x1, x2, ..., xn)`
   - **weight**: `(num_hidden, x1 * x2 * ... * xn)`
   - **bias**: `(num_hidden,)`
   - **out**: `(batch_size, num_hidden)`

   If ``flatten`` is set to be false, then the shapes are:

   - **data**: `(x1, x2, ..., xn, input_dim)`
   - **weight**: `(num_hidden, input_dim)`
   - **bias**: `(num_hidden,)`
   - **out**: `(x1, x2, ..., xn, num_hidden)`

   The learnable parameters include both ``weight`` and ``bias``.

   If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

   .. Note::

   The sparse support for FullyConnected is limited to forward evaluation with
   weight and bias, where the length of `weight.indices` and `bias.indices` must
   to `num_hidden`. This could be useful for model inference with `row_sparse`
   trained with importance sampling or noise contrastive estimation.

   To compute linear transformation with 'csr' sparse data, sparse.dot is
   of sparse.FullyConnected.



   Defined in src/operator/nn/fully_connected.cc:L271
Parameters
symbol_namename of the resulting symbol
dataInput data.
weightWeight matrix.
biasBias parameter.
num_hiddenNumber of hidden nodes of the output.
no_biasWhether to disable bias parameter.
flattenWhether to collapse all but the first axis of the input data tensor.
Returns
new symbol
Symbol mxnet::cpp::FullyConnected ( Symbol  data,
Symbol  weight,
Symbol  bias,
int  num_hidden,
bool  no_bias = false,
bool  flatten = true 
)
inline

Applies a linear transformation: :math:Y = XW^T + b.

   If ``flatten`` is set to be true, then the shapes are:

   - **data**: `(batch_size, x1, x2, ..., xn)`
   - **weight**: `(num_hidden, x1 * x2 * ... * xn)`
   - **bias**: `(num_hidden,)`
   - **out**: `(batch_size, num_hidden)`

   If ``flatten`` is set to be false, then the shapes are:

   - **data**: `(x1, x2, ..., xn, input_dim)`
   - **weight**: `(num_hidden, input_dim)`
   - **bias**: `(num_hidden,)`
   - **out**: `(x1, x2, ..., xn, num_hidden)`

   The learnable parameters include both ``weight`` and ``bias``.

   If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

   .. Note::

   The sparse support for FullyConnected is limited to forward evaluation with
   weight and bias, where the length of `weight.indices` and `bias.indices` must
   to `num_hidden`. This could be useful for model inference with `row_sparse`
   trained with importance sampling or noise contrastive estimation.

   To compute linear transformation with 'csr' sparse data, sparse.dot is
   of sparse.FullyConnected.



   Defined in src/operator/nn/fully_connected.cc:L271
Parameters
dataInput data.
weightWeight matrix.
biasBias parameter.
num_hiddenNumber of hidden nodes of the output.
no_biasWhether to disable bias parameter.
flattenWhether to collapse all but the first axis of the input data tensor.
Returns
new symbol
Symbol mxnet::cpp::gamma ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the gamma function (extension of the factorial function \ to the reals), computed element-wise on the input array.

The storage type of gamma output is always dense

Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::gamma ( Symbol  data)
inline

Returns the gamma function (extension of the factorial function \ to the reals), computed element-wise on the input array.

The storage type of gamma output is always dense

Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::gammaln ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise log of the absolute value of the gamma function \ of the input.

The storage type of gammaln output is always dense

Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::gammaln ( Symbol  data)
inline

Returns element-wise log of the absolute value of the gamma function \ of the input.

The storage type of gammaln output is always dense

Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::gather_nd ( const std::string &  symbol_name,
Symbol  data,
Symbol  indices 
)
inline

Gather elements or slices from data and store to a tensor whose shape is defined by indices.

Given data with shape (X_0, X_1, ..., X_{N-1}) and indices with shape (M, Y_0, ..., Y_{K-1}), the output will have shape (Y_0, ..., Y_{K-1}, X_M, whereM <= N. IfM == N, output shape will simply be(Y_0, ..., Y_{K-1})`.

The elements in output is defined as follows::

output[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}] = data[indices[0, y_0, ..., ..., indices[M-1, y_0, ..., y_{K-1}], x_M, ..., x_{N-1}]

Examples::

data = [[0, 1], [2, 3]] indices = [[1, 1, 0], [0, 1, 0]] gather_nd(data, indices) = [2, 3, 0]

data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] indices = [[0, 1], [1, 0]] gather_nd(data, indices) = [[3, 4], [5, 6]]

Parameters
symbol_namename of the resulting symbol
datadata
indicesindices
Returns
new symbol
Symbol mxnet::cpp::gather_nd ( Symbol  data,
Symbol  indices 
)
inline

Gather elements or slices from data and store to a tensor whose shape is defined by indices.

Given data with shape (X_0, X_1, ..., X_{N-1}) and indices with shape (M, Y_0, ..., Y_{K-1}), the output will have shape (Y_0, ..., Y_{K-1}, X_M, whereM <= N. IfM == N, output shape will simply be(Y_0, ..., Y_{K-1})`.

The elements in output is defined as follows::

output[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}] = data[indices[0, y_0, ..., ..., indices[M-1, y_0, ..., y_{K-1}], x_M, ..., x_{N-1}]

Examples::

data = [[0, 1], [2, 3]] indices = [[1, 1, 0], [0, 1, 0]] gather_nd(data, indices) = [2, 3, 0]

data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] indices = [[0, 1], [1, 0]] gather_nd(data, indices) = [[3, 4], [5, 6]]

Parameters
datadata
indicesindices
Returns
new symbol
Symbol mxnet::cpp::GridGenerator ( const std::string &  symbol_name,
Symbol  data,
GridGeneratorTransformType  transform_type,
Shape  target_shape = Shape(0,0) 
)
inline

Generates 2D sampling grid for bilinear sampling.

Parameters
symbol_namename of the resulting symbol
dataInput data to the function.
transform_typeThe type of transformation. For affine, input data should be an affine matrix of size (batch, 6). For warp, input data should be an
target_shapeSpecifies the output shape (H, W). This is required if transformation type is affine. If transformation type is warp, this
Returns
new symbol
Symbol mxnet::cpp::GridGenerator ( Symbol  data,
GridGeneratorTransformType  transform_type,
Shape  target_shape = Shape(0,0) 
)
inline

Generates 2D sampling grid for bilinear sampling.

Parameters
dataInput data to the function.
transform_typeThe type of transformation. For affine, input data should be an affine matrix of size (batch, 6). For warp, input data should be an
target_shapeSpecifies the output shape (H, W). This is required if transformation type is affine. If transformation type is warp, this
Returns
new symbol
Symbol mxnet::cpp::hard_sigmoid ( const std::string &  symbol_name,
Symbol  data,
mx_float  alpha = 0.2,
mx_float  beta = 0.5 
)
inline

Computes hard sigmoid of x element-wise.

   .. math::
   y = max(0, min(1, alpha * x + beta))



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L115
Parameters
symbol_namename of the resulting symbol
dataThe input array.
alphaSlope of hard sigmoid
betaBias of hard sigmoid.
Returns
new symbol
Symbol mxnet::cpp::hard_sigmoid ( Symbol  data,
mx_float  alpha = 0.2,
mx_float  beta = 0.5 
)
inline

Computes hard sigmoid of x element-wise.

   .. math::
   y = max(0, min(1, alpha * x + beta))



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L115
Parameters
dataThe input array.
alphaSlope of hard sigmoid
betaBias of hard sigmoid.
Returns
new symbol
Symbol mxnet::cpp::IdentityAttachKLSparseReg ( const std::string &  symbol_name,
Symbol  data,
mx_float  sparseness_target = 0.1,
mx_float  penalty = 0.001,
mx_float  momentum = 0.9 
)
inline

Apply a sparse regularization to the output a sigmoid activation function.

Parameters
symbol_namename of the resulting symbol
dataInput data.
sparseness_targetThe sparseness target
penaltyThe tradeoff parameter for the sparseness penalty
momentumThe momentum for running average
Returns
new symbol
Symbol mxnet::cpp::IdentityAttachKLSparseReg ( Symbol  data,
mx_float  sparseness_target = 0.1,
mx_float  penalty = 0.001,
mx_float  momentum = 0.9 
)
inline

Apply a sparse regularization to the output a sigmoid activation function.

Parameters
dataInput data.
sparseness_targetThe sparseness target
penaltyThe tradeoff parameter for the sparseness penalty
momentumThe momentum for running average
Returns
new symbol
Symbol mxnet::cpp::InstanceNorm ( const std::string &  symbol_name,
Symbol  data,
Symbol  gamma,
Symbol  beta,
mx_float  eps = 0.001 
)
inline

Applies instance normalization to the n-dimensional input array.

   This operator takes an n-dimensional input array where (n>2) and normalizes
   the input using the following formula:

   .. math::

   out = \frac{x - mean[data]}{ \sqrt{Var[data]} + \epsilon} * gamma + beta

   This layer is similar to batch normalization layer (`BatchNorm`)
   with two differences: first, the normalization is
   carried out per example (instance), not over a batch. Second, the
   same normalization is applied both at test and train time. This
   operation is also known as `contrast normalization`.

   If the input data is of shape [batch, channel, spacial_dim1, spacial_dim2, ...],
   `gamma` and `beta` parameters must be vectors of shape [channel].

   This implementation is based on paper:

   .. [1] Instance Normalization: The Missing Ingredient for Fast Stylization,
   D. Ulyanov, A. Vedaldi, V. Lempitsky, 2016 (arXiv:1607.08022v2).

   Examples::

   // Input of shape (2,1,2)
   x = [[[ 1.1,  2.2]],
   [[ 3.3,  4.4]]]

   // gamma parameter of length 1
   gamma = [1.5]

   // beta parameter of length 1
   beta = [0.5]

   // Instance normalization is calculated with the above formula
   InstanceNorm(x,gamma,beta) = [[[-0.997527  ,  1.99752665]],
   [[-0.99752653,  1.99752724]]]



   Defined in src/operator/instance_norm.cc:L95
Parameters
symbol_namename of the resulting symbol
dataAn n-dimensional input array (n > 2) of the form [batch, channel,
gammaA vector of length 'channel', which multiplies the normalized input.
betaA vector of length 'channel', which is added to the product of the
epsAn epsilon parameter to prevent division by 0.
Returns
new symbol
Symbol mxnet::cpp::InstanceNorm ( Symbol  data,
Symbol  gamma,
Symbol  beta,
mx_float  eps = 0.001 
)
inline

Applies instance normalization to the n-dimensional input array.

   This operator takes an n-dimensional input array where (n>2) and normalizes
   the input using the following formula:

   .. math::

   out = \frac{x - mean[data]}{ \sqrt{Var[data]} + \epsilon} * gamma + beta

   This layer is similar to batch normalization layer (`BatchNorm`)
   with two differences: first, the normalization is
   carried out per example (instance), not over a batch. Second, the
   same normalization is applied both at test and train time. This
   operation is also known as `contrast normalization`.

   If the input data is of shape [batch, channel, spacial_dim1, spacial_dim2, ...],
   `gamma` and `beta` parameters must be vectors of shape [channel].

   This implementation is based on paper:

   .. [1] Instance Normalization: The Missing Ingredient for Fast Stylization,
   D. Ulyanov, A. Vedaldi, V. Lempitsky, 2016 (arXiv:1607.08022v2).

   Examples::

   // Input of shape (2,1,2)
   x = [[[ 1.1,  2.2]],
   [[ 3.3,  4.4]]]

   // gamma parameter of length 1
   gamma = [1.5]

   // beta parameter of length 1
   beta = [0.5]

   // Instance normalization is calculated with the above formula
   InstanceNorm(x,gamma,beta) = [[[-0.997527  ,  1.99752665]],
   [[-0.99752653,  1.99752724]]]



   Defined in src/operator/instance_norm.cc:L95
Parameters
dataAn n-dimensional input array (n > 2) of the form [batch, channel,
gammaA vector of length 'channel', which multiplies the normalized input.
betaA vector of length 'channel', which is added to the product of the
epsAn epsilon parameter to prevent division by 0.
Returns
new symbol
Symbol mxnet::cpp::khatri_rao ( const std::string &  symbol_name,
const std::vector< Symbol > &  args 
)
inline

Computes the Khatri-Rao product of the input matrices.

   Given a collection of :math:`n` input matrices,

   .. math::
   A_1 \in \mathbb{R}^{M_1 \times M}, \ldots, A_n \in \mathbb{R}^{M_n \times N},

   the (column-wise) Khatri-Rao product is defined as the matrix,

   .. math::
   X = A_1 \otimes \cdots \otimes A_n \in \mathbb{R}^{(M_1 \cdots M_n) \times N},

   where the :math:`k` th column is equal to the column-wise outer product
   :math:`{A_1}_k \otimes \cdots \otimes {A_n}_k` where :math:`{A_i}_k` is the kth
   column of the ith matrix.

   Example::

   >>> A = mx.nd.array([[1, -1],
   >>>                  [2, -3]])
   >>> B = mx.nd.array([[1, 4],
   >>>                  [2, 5],
   >>>                  [3, 6]])
   >>> C = mx.nd.khatri_rao(A, B)
   >>> print(C.asnumpy())
   [[  1.  -4.]
   [  2.  -5.]
   [  3.  -6.]
   [  2. -12.]
   [  4. -15.]
   [  6. -18.]]



   Defined in src/operator/contrib/krprod.cc:L108
Parameters
symbol_namename of the resulting symbol
argsPositional input matrices
Returns
new symbol
Symbol mxnet::cpp::khatri_rao ( const std::vector< Symbol > &  args)
inline

Computes the Khatri-Rao product of the input matrices.

   Given a collection of :math:`n` input matrices,

   .. math::
   A_1 \in \mathbb{R}^{M_1 \times M}, \ldots, A_n \in \mathbb{R}^{M_n \times N},

   the (column-wise) Khatri-Rao product is defined as the matrix,

   .. math::
   X = A_1 \otimes \cdots \otimes A_n \in \mathbb{R}^{(M_1 \cdots M_n) \times N},

   where the :math:`k` th column is equal to the column-wise outer product
   :math:`{A_1}_k \otimes \cdots \otimes {A_n}_k` where :math:`{A_i}_k` is the kth
   column of the ith matrix.

   Example::

   >>> A = mx.nd.array([[1, -1],
   >>>                  [2, -3]])
   >>> B = mx.nd.array([[1, 4],
   >>>                  [2, 5],
   >>>                  [3, 6]])
   >>> C = mx.nd.khatri_rao(A, B)
   >>> print(C.asnumpy())
   [[  1.  -4.]
   [  2.  -5.]
   [  3.  -6.]
   [  2. -12.]
   [  4. -15.]
   [  6. -18.]]



   Defined in src/operator/contrib/krprod.cc:L108
Parameters
argsPositional input matrices
Returns
new symbol
Symbol mxnet::cpp::L2Normalization ( const std::string &  symbol_name,
Symbol  data,
mx_float  eps = 1e-10,
L2NormalizationMode  mode = L2NormalizationMode::kInstance 
)
inline

Normalize the input array using the L2 norm.

   For 1-D NDArray, it computes::

   out = data / sqrt(sum(data ** 2) + eps)

   For N-D NDArray, if the input array has shape (N, N, ..., N),

   with ``mode`` = ``instance``, it normalizes each instance in the
   array by its L2 norm.::

   for i in 0...N
   out[i,:,:,...,:] = data[i,:,:,...,:] / sqrt(sum(data[i,:,:,...,:] ** 2) + eps)

   with ``mode`` = ``channel``, it normalizes each channel in the array by its L2

   for i in 0...N
   out[:,i,:,...,:] = data[:,i,:,...,:] / sqrt(sum(data[:,i,:,...,:] ** 2) + eps)

   with ``mode`` = ``spatial``, it normalizes the cross channel norm for each
   in the array by its L2 norm.::

   for dim in 2...N
   for i in 0...N
   out[.....,i,...] = take(out, indices=i, axis=dim) / sqrt(sum(take(out,
   -dim-

   Example::

   x = [[[1,2],
   [3,4]],
   [[2,2],
   [5,6]]]

   L2Normalization(x, mode='instance')
   =[[[ 0.18257418  0.36514837]
   [ 0.54772252  0.73029673]]
   [[ 0.24077171  0.24077171]
   [ 0.60192931  0.72231513]]]

   L2Normalization(x, mode='channel')
   =[[[ 0.31622776  0.44721359]
   [ 0.94868326  0.89442718]]
   [[ 0.37139067  0.31622776]
   [ 0.92847669  0.94868326]]]

   L2Normalization(x, mode='spatial')
   =[[[ 0.44721359  0.89442718]
   [ 0.60000002  0.80000001]]
   [[ 0.70710677  0.70710677]
   [ 0.6401844   0.76822126]]]



   Defined in src/operator/l2_normalization.cc:L196
Parameters
symbol_namename of the resulting symbol
dataInput array to normalize.
epsA small constant for numerical stability.
modeSpecify the dimension along which to compute L2 norm.
Returns
new symbol
Symbol mxnet::cpp::L2Normalization ( Symbol  data,
mx_float  eps = 1e-10,
L2NormalizationMode  mode = L2NormalizationMode::kInstance 
)
inline

Normalize the input array using the L2 norm.

   For 1-D NDArray, it computes::

   out = data / sqrt(sum(data ** 2) + eps)

   For N-D NDArray, if the input array has shape (N, N, ..., N),

   with ``mode`` = ``instance``, it normalizes each instance in the
   array by its L2 norm.::

   for i in 0...N
   out[i,:,:,...,:] = data[i,:,:,...,:] / sqrt(sum(data[i,:,:,...,:] ** 2) + eps)

   with ``mode`` = ``channel``, it normalizes each channel in the array by its L2

   for i in 0...N
   out[:,i,:,...,:] = data[:,i,:,...,:] / sqrt(sum(data[:,i,:,...,:] ** 2) + eps)

   with ``mode`` = ``spatial``, it normalizes the cross channel norm for each
   in the array by its L2 norm.::

   for dim in 2...N
   for i in 0...N
   out[.....,i,...] = take(out, indices=i, axis=dim) / sqrt(sum(take(out,
   -dim-

   Example::

   x = [[[1,2],
   [3,4]],
   [[2,2],
   [5,6]]]

   L2Normalization(x, mode='instance')
   =[[[ 0.18257418  0.36514837]
   [ 0.54772252  0.73029673]]
   [[ 0.24077171  0.24077171]
   [ 0.60192931  0.72231513]]]

   L2Normalization(x, mode='channel')
   =[[[ 0.31622776  0.44721359]
   [ 0.94868326  0.89442718]]
   [[ 0.37139067  0.31622776]
   [ 0.92847669  0.94868326]]]

   L2Normalization(x, mode='spatial')
   =[[[ 0.44721359  0.89442718]
   [ 0.60000002  0.80000001]]
   [[ 0.70710677  0.70710677]
   [ 0.6401844   0.76822126]]]



   Defined in src/operator/l2_normalization.cc:L196
Parameters
dataInput array to normalize.
epsA small constant for numerical stability.
modeSpecify the dimension along which to compute L2 norm.
Returns
new symbol
Symbol mxnet::cpp::LayerNorm ( const std::string &  symbol_name,
Symbol  data,
Symbol  gamma,
Symbol  beta,
int  axis = -1,
mx_float  eps = 1e-05,
bool  output_mean_var = false 
)
inline

Layer normalization.

   Normalizes the channels of the input tensor by mean and variance, and applies a
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis and then
   compute the normalized output, which has the same shape as input, as following:

   .. math::

   out = \frac{data - mean(data, axis)}{\sqrt{var(data, axis) + \epsilon}} * gamma

   Both ``gamma`` and ``beta`` are learnable parameters.

   Unlike BatchNorm and InstanceNorm,  the *mean* and *var* are computed along the

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   ``data_std``. Note that no gradient will be passed through these two outputs.

   The parameter ``axis`` specifies which axis of the input shape denotes
   the 'channel' (separately normalized groups).  The default is -1, which sets
   axis to be the last item in the input shape.



   Defined in src/operator/nn/layer_norm.cc:L94
Parameters
symbol_namename of the resulting symbol
dataInput data to layer normalization
gammagamma array
betabeta array
axisThe axis to perform layer normalization. Usually, this should be be axis
epsAn epsilon parameter to prevent division by 0.
output_mean_varOutput the mean and std calculated along the given axis.
Returns
new symbol
Symbol mxnet::cpp::LayerNorm ( Symbol  data,
Symbol  gamma,
Symbol  beta,
int  axis = -1,
mx_float  eps = 1e-05,
bool  output_mean_var = false 
)
inline

Layer normalization.

   Normalizes the channels of the input tensor by mean and variance, and applies a
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis and then
   compute the normalized output, which has the same shape as input, as following:

   .. math::

   out = \frac{data - mean(data, axis)}{\sqrt{var(data, axis) + \epsilon}} * gamma

   Both ``gamma`` and ``beta`` are learnable parameters.

   Unlike BatchNorm and InstanceNorm,  the *mean* and *var* are computed along the

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   ``data_std``. Note that no gradient will be passed through these two outputs.

   The parameter ``axis`` specifies which axis of the input shape denotes
   the 'channel' (separately normalized groups).  The default is -1, which sets
   axis to be the last item in the input shape.



   Defined in src/operator/nn/layer_norm.cc:L94
Parameters
dataInput data to layer normalization
gammagamma array
betabeta array
axisThe axis to perform layer normalization. Usually, this should be be axis
epsAn epsilon parameter to prevent division by 0.
output_mean_varOutput the mean and std calculated along the given axis.
Returns
new symbol
Symbol mxnet::cpp::LeakyReLU ( const std::string &  symbol_name,
Symbol  data,
Symbol  gamma,
LeakyReLUActType  act_type = LeakyReLUActType::kLeaky,
mx_float  slope = 0.25,
mx_float  lower_bound = 0.125,
mx_float  upper_bound = 0.334 
)
inline

Applies Leaky rectified linear unit activation element-wise to the input.

   Leaky ReLUs attempt to fix the "dying ReLU" problem by allowing a small `slope`
   when the input is negative and has a slope of one when input is positive.

   The following modified ReLU Activation functions are supported:

   - *elu*: Exponential Linear Unit. `y = x > 0 ? x : slope * (exp(x)-1)`
   - *selu*: Scaled Exponential Linear Unit. `y = lambda * (x > 0 ? x : alpha *
   *lambda = 1.0507009873554804934193349852946* and *alpha =
   - *leaky*: Leaky ReLU. `y = x > 0 ? x : slope * x`
   - *prelu*: Parametric ReLU. This is same as *leaky* except that `slope` is
   - *rrelu*: Randomized ReLU. same as *leaky* but the `slope` is uniformly and
   *[lower_bound, upper_bound)* for training, while fixed to be
   *(lower_bound+upper_bound)/2* for inference.



   Defined in src/operator/leaky_relu.cc:L65
Parameters
symbol_namename of the resulting symbol
dataInput data to activation function.
gammaSlope parameter for PReLU. Only required when act_type is 'prelu'. It should be either a vector of size 1, or the same size as the second dimension
act_typeActivation function to be applied.
slopeInit slope for the activation. (For leaky and elu only)
lower_boundLower bound of random slope. (For rrelu only)
upper_boundUpper bound of random slope. (For rrelu only)
Returns
new symbol
Symbol mxnet::cpp::LeakyReLU ( Symbol  data,
Symbol  gamma,
LeakyReLUActType  act_type = LeakyReLUActType::kLeaky,
mx_float  slope = 0.25,
mx_float  lower_bound = 0.125,
mx_float  upper_bound = 0.334 
)
inline

Applies Leaky rectified linear unit activation element-wise to the input.

   Leaky ReLUs attempt to fix the "dying ReLU" problem by allowing a small `slope`
   when the input is negative and has a slope of one when input is positive.

   The following modified ReLU Activation functions are supported:

   - *elu*: Exponential Linear Unit. `y = x > 0 ? x : slope * (exp(x)-1)`
   - *selu*: Scaled Exponential Linear Unit. `y = lambda * (x > 0 ? x : alpha *
   *lambda = 1.0507009873554804934193349852946* and *alpha =
   - *leaky*: Leaky ReLU. `y = x > 0 ? x : slope * x`
   - *prelu*: Parametric ReLU. This is same as *leaky* except that `slope` is
   - *rrelu*: Randomized ReLU. same as *leaky* but the `slope` is uniformly and
   *[lower_bound, upper_bound)* for training, while fixed to be
   *(lower_bound+upper_bound)/2* for inference.



   Defined in src/operator/leaky_relu.cc:L65
Parameters
dataInput data to activation function.
gammaSlope parameter for PReLU. Only required when act_type is 'prelu'. It should be either a vector of size 1, or the same size as the second dimension
act_typeActivation function to be applied.
slopeInit slope for the activation. (For leaky and elu only)
lower_boundLower bound of random slope. (For rrelu only)
upper_boundUpper bound of random slope. (For rrelu only)
Returns
new symbol
Symbol mxnet::cpp::LinearRegressionOutput ( const std::string &  symbol_name,
Symbol  data,
Symbol  label,
mx_float  grad_scale = 1 
)
inline

Computes and optimizes for squared loss during backward propagation. Just outputs data during forward propagation.

If :math:\hat{y}_i is the predicted value of the i-th sample, and :math:y_i then the squared loss estimated over :math:n samples is defined as

:math:`{SquaredLoss}({Y}, {{Y}} ) = {1}{n}

.. note:: Use the LinearRegressionOutput as the final output layer of a net.

The storage type of label can be default or csr

  • LinearRegressionOutput(default, default) = default
  • LinearRegressionOutput(default, csr) = default

By default, gradients of this loss function are scaled by factor 1/m, where m The parameter grad_scale can be used to change this scale to grad_scale/m.

   Defined in src/operator/regression_output.cc:L92
Parameters
symbol_namename of the resulting symbol
dataInput data to the function.
labelInput label to the function.
grad_scaleScale the gradient by a float factor
Returns
new symbol
Symbol mxnet::cpp::LinearRegressionOutput ( Symbol  data,
Symbol  label,
mx_float  grad_scale = 1 
)
inline

Computes and optimizes for squared loss during backward propagation. Just outputs data during forward propagation.

If :math:\hat{y}_i is the predicted value of the i-th sample, and :math:y_i then the squared loss estimated over :math:n samples is defined as

:math:`{SquaredLoss}({Y}, {{Y}} ) = {1}{n}

.. note:: Use the LinearRegressionOutput as the final output layer of a net.

The storage type of label can be default or csr

  • LinearRegressionOutput(default, default) = default
  • LinearRegressionOutput(default, csr) = default

By default, gradients of this loss function are scaled by factor 1/m, where m The parameter grad_scale can be used to change this scale to grad_scale/m.

   Defined in src/operator/regression_output.cc:L92
Parameters
dataInput data to the function.
labelInput label to the function.
grad_scaleScale the gradient by a float factor
Returns
new symbol
Symbol mxnet::cpp::log ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise Natural logarithmic value of the input.

   The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x``

   The storage type of ``log`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L951
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::log ( Symbol  data)
inline

Returns element-wise Natural logarithmic value of the input.

   The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x``

   The storage type of ``log`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L951
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::log10 ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise Base-10 logarithmic value of the input.

   ``10**log10(x) = x``

   The storage type of ``log10`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L963
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::log10 ( Symbol  data)
inline

Returns element-wise Base-10 logarithmic value of the input.

   ``10**log10(x) = x``

   The storage type of ``log10`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L963
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::log1p ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise log(1 + x) value of the input.

   This function is more accurate than ``log(1 + x)``  for small ``x`` so that
   :math:`1+x\approx 1`

   The storage type of ``log1p`` output depends upon the input storage type:

   - log1p(default) = default
   - log1p(row_sparse) = row_sparse
   - log1p(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1000
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::log1p ( Symbol  data)
inline

Returns element-wise log(1 + x) value of the input.

   This function is more accurate than ``log(1 + x)``  for small ``x`` so that
   :math:`1+x\approx 1`

   The storage type of ``log1p`` output depends upon the input storage type:

   - log1p(default) = default
   - log1p(row_sparse) = row_sparse
   - log1p(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1000
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::log2 ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise Base-2 logarithmic value of the input.

   ``2**log2(x) = x``

   The storage type of ``log2`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L975
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::log2 ( Symbol  data)
inline

Returns element-wise Base-2 logarithmic value of the input.

   ``2**log2(x) = x``

   The storage type of ``log2`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L975
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::log_softmax ( const std::string &  symbol_name,
Symbol  data,
int  axis = -1,
dmlc::optional< double >  temperature = dmlc::optional<double>() 
)
inline

Computes the log softmax of the input. This is equivalent to computing softmax followed by log.

Examples::

>>> x = mx.nd.array([1, 2, .1]) >>> mx.nd.log_softmax(x).asnumpy() array([-1.41702998, -0.41702995, -2.31702995], dtype=float32)

>>> x = mx.nd.array( [[1, 2, .1],[.1, 2, 1]] ) >>> mx.nd.log_softmax(x, axis=0).asnumpy() array([[-0.34115392, -0.69314718, -1.24115396], [-1.24115396, -0.69314718, -0.34115392]], dtype=float32)

Parameters
symbol_namename of the resulting symbol
dataThe input array.
axisThe axis along which to compute softmax.
temperatureTemperature parameter in softmax
Returns
new symbol
Symbol mxnet::cpp::log_softmax ( Symbol  data,
int  axis = -1,
dmlc::optional< double >  temperature = dmlc::optional<double>() 
)
inline

Computes the log softmax of the input. This is equivalent to computing softmax followed by log.

Examples::

>>> x = mx.nd.array([1, 2, .1]) >>> mx.nd.log_softmax(x).asnumpy() array([-1.41702998, -0.41702995, -2.31702995], dtype=float32)

>>> x = mx.nd.array( [[1, 2, .1],[.1, 2, 1]] ) >>> mx.nd.log_softmax(x, axis=0).asnumpy() array([[-0.34115392, -0.69314718, -1.24115396], [-1.24115396, -0.69314718, -0.34115392]], dtype=float32)

Parameters
dataThe input array.
axisThe axis along which to compute softmax.
temperatureTemperature parameter in softmax
Returns
new symbol
Symbol mxnet::cpp::logical_not ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the result of logical NOT (!) function

   Example:
   logical_not([-2., 0., 1.]) = [0., 1., 0.]
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::logical_not ( Symbol  data)
inline

Returns the result of logical NOT (!) function

   Example:
   logical_not([-2., 0., 1.]) = [0., 1., 0.]
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::LogisticRegressionOutput ( const std::string &  symbol_name,
Symbol  data,
Symbol  label,
mx_float  grad_scale = 1 
)
inline

Applies a logistic function to the input.

   The logistic function, also known as the sigmoid function, is computed as
   :math:`\frac{1}{1+exp(-\textbf{x})}`.

   Commonly, the sigmoid is used to squash the real-valued output of a linear model
   :math:`wTx+b` into the [0,1] range so that it can be interpreted as a
   It is suitable for binary classification or probability prediction tasks.

   .. note::
   Use the LogisticRegressionOutput as the final output layer of a net.

   The storage type of ``label`` can be ``default`` or ``csr``

   - LogisticRegressionOutput(default, default) = default
   - LogisticRegressionOutput(default, csr) = default

   The loss function used is the Binary Cross Entropy Loss:

   :math:`-{(y\log(p) + (1 - y)\log(1 - p))}`

   Where `y` is the ground truth probability of positive outcome for a given
   example, and `p` the probability predicted by the model. By default, gradients
   of this loss function are scaled by factor `1/m`, where m is the number of
   The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.



   Defined in src/operator/regression_output.cc:L152
Parameters
symbol_namename of the resulting symbol
dataInput data to the function.
labelInput label to the function.
grad_scaleScale the gradient by a float factor
Returns
new symbol
Symbol mxnet::cpp::LogisticRegressionOutput ( Symbol  data,
Symbol  label,
mx_float  grad_scale = 1 
)
inline

Applies a logistic function to the input.

   The logistic function, also known as the sigmoid function, is computed as
   :math:`\frac{1}{1+exp(-\textbf{x})}`.

   Commonly, the sigmoid is used to squash the real-valued output of a linear model
   :math:`wTx+b` into the [0,1] range so that it can be interpreted as a
   It is suitable for binary classification or probability prediction tasks.

   .. note::
   Use the LogisticRegressionOutput as the final output layer of a net.

   The storage type of ``label`` can be ``default`` or ``csr``

   - LogisticRegressionOutput(default, default) = default
   - LogisticRegressionOutput(default, csr) = default

   The loss function used is the Binary Cross Entropy Loss:

   :math:`-{(y\log(p) + (1 - y)\log(1 - p))}`

   Where `y` is the ground truth probability of positive outcome for a given
   example, and `p` the probability predicted by the model. By default, gradients
   of this loss function are scaled by factor `1/m`, where m is the number of
   The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.



   Defined in src/operator/regression_output.cc:L152
Parameters
dataInput data to the function.
labelInput label to the function.
grad_scaleScale the gradient by a float factor
Returns
new symbol
Symbol mxnet::cpp::LRN ( const std::string &  symbol_name,
Symbol  data,
uint32_t  nsize,
mx_float  alpha = 0.0001,
mx_float  beta = 0.75,
mx_float  knorm = 2 
)
inline

Applies local response normalization to the input.

   The local response normalization layer performs "lateral inhibition" by
   over local input regions.

   If :math:`a_{x,y}^{i}` is the activity of a neuron computed by applying kernel
   :math:`(x, y)` and then applying the ReLU nonlinearity, the response-normalized
   activity :math:`b_{x,y}^{i}` is given by the expression:

   .. math::
   b_{x,y}^{i} = \frac{a_{x,y}^{i}}{\Bigg({k + \frac{\alpha}{n} \sum_{j=max(0,

   where the sum runs over :math:`n` "adjacent" kernel maps at the same spatial
   number of kernels in the layer.



   Defined in src/operator/nn/lrn.cc:L164
Parameters
symbol_namename of the resulting symbol
dataInput data to LRN
nsizenormalization window width in elements.
alphaThe variance scaling parameter :math:lpha in the LRN expression.
betaThe power parameter :math:eta in the LRN expression.
knormThe parameter :math:k in the LRN expression.
Returns
new symbol
Symbol mxnet::cpp::LRN ( Symbol  data,
uint32_t  nsize,
mx_float  alpha = 0.0001,
mx_float  beta = 0.75,
mx_float  knorm = 2 
)
inline

Applies local response normalization to the input.

   The local response normalization layer performs "lateral inhibition" by
   over local input regions.

   If :math:`a_{x,y}^{i}` is the activity of a neuron computed by applying kernel
   :math:`(x, y)` and then applying the ReLU nonlinearity, the response-normalized
   activity :math:`b_{x,y}^{i}` is given by the expression:

   .. math::
   b_{x,y}^{i} = \frac{a_{x,y}^{i}}{\Bigg({k + \frac{\alpha}{n} \sum_{j=max(0,

   where the sum runs over :math:`n` "adjacent" kernel maps at the same spatial
   number of kernels in the layer.



   Defined in src/operator/nn/lrn.cc:L164
Parameters
dataInput data to LRN
nsizenormalization window width in elements.
alphaThe variance scaling parameter :math:lpha in the LRN expression.
betaThe power parameter :math:eta in the LRN expression.
knormThe parameter :math:k in the LRN expression.
Returns
new symbol
Symbol mxnet::cpp::MAERegressionOutput ( const std::string &  symbol_name,
Symbol  data,
Symbol  label,
mx_float  grad_scale = 1 
)
inline

Computes mean absolute error of the input.

   MAE is a risk metric corresponding to the expected value of the absolute error.

   If :math:`\hat{y}_i` is the predicted value of the i-th sample, and :math:`y_i`
   then the mean absolute error (MAE) estimated over :math:`n` samples is defined

   :math:`\text{MAE}(\textbf{Y}, \hat{\textbf{Y}} ) = \frac{1}{n} \sum_{i=0}^{n-1}

   .. note::
   Use the MAERegressionOutput as the final output layer of a net.

   The storage type of ``label`` can be ``default`` or ``csr``

   - MAERegressionOutput(default, default) = default
   - MAERegressionOutput(default, csr) = default

   By default, gradients of this loss function are scaled by factor `1/m`, where m
   The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.



   Defined in src/operator/regression_output.cc:L120
Parameters
symbol_namename of the resulting symbol
dataInput data to the function.
labelInput label to the function.
grad_scaleScale the gradient by a float factor
Returns
new symbol
Symbol mxnet::cpp::MAERegressionOutput ( Symbol  data,
Symbol  label,
mx_float  grad_scale = 1 
)
inline

Computes mean absolute error of the input.

   MAE is a risk metric corresponding to the expected value of the absolute error.

   If :math:`\hat{y}_i` is the predicted value of the i-th sample, and :math:`y_i`
   then the mean absolute error (MAE) estimated over :math:`n` samples is defined

   :math:`\text{MAE}(\textbf{Y}, \hat{\textbf{Y}} ) = \frac{1}{n} \sum_{i=0}^{n-1}

   .. note::
   Use the MAERegressionOutput as the final output layer of a net.

   The storage type of ``label`` can be ``default`` or ``csr``

   - MAERegressionOutput(default, default) = default
   - MAERegressionOutput(default, csr) = default

   By default, gradients of this loss function are scaled by factor `1/m`, where m
   The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.



   Defined in src/operator/regression_output.cc:L120
Parameters
dataInput data to the function.
labelInput label to the function.
grad_scaleScale the gradient by a float factor
Returns
new symbol
Symbol mxnet::cpp::make_loss ( const std::string &  symbol_name,
Symbol  data 
)
inline

Make your own loss function in network construction.

   This operator accepts a customized loss function symbol as a terminal loss and
   the symbol should be an operator with no backward dependency.
   The output of this function is the gradient of loss with respect to the input

   For example, if you are a making a cross entropy loss function. Assume ``out``
   predicted output and ``label`` is the true label, then the cross entropy can be

   cross_entropy = label * log(out) + (1 - label) * log(1 - out)
   loss = make_loss(cross_entropy)

   We will need to use ``make_loss`` when we are creating our own loss function or
   combine multiple loss functions. Also we may want to stop some variables'
   from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.

   The storage type of ``make_loss`` output depends upon the input storage type:

   - make_loss(default) = default
   - make_loss(row_sparse) = row_sparse



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L300
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::make_loss ( Symbol  data)
inline

Make your own loss function in network construction.

   This operator accepts a customized loss function symbol as a terminal loss and
   the symbol should be an operator with no backward dependency.
   The output of this function is the gradient of loss with respect to the input

   For example, if you are a making a cross entropy loss function. Assume ``out``
   predicted output and ``label`` is the true label, then the cross entropy can be

   cross_entropy = label * log(out) + (1 - label) * log(1 - out)
   loss = make_loss(cross_entropy)

   We will need to use ``make_loss`` when we are creating our own loss function or
   combine multiple loss functions. Also we may want to stop some variables'
   from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.

   The storage type of ``make_loss`` output depends upon the input storage type:

   - make_loss(default) = default
   - make_loss(row_sparse) = row_sparse



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L300
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::MakeLoss ( const std::string &  symbol_name,
Symbol  data,
mx_float  grad_scale = 1,
mx_float  valid_thresh = 0,
MakeLossNormalization  normalization = MakeLossNormalization::kNull 
)
inline

Make your own loss function in network construction.

   This operator accepts a customized loss function symbol as a terminal loss and
   the symbol should be an operator with no backward dependency.
   The output of this function is the gradient of loss with respect to the input

   For example, if you are a making a cross entropy loss function. Assume ``out``
   predicted output and ``label`` is the true label, then the cross entropy can be

   cross_entropy = label * log(out) + (1 - label) * log(1 - out)
   loss = MakeLoss(cross_entropy)

   We will need to use ``MakeLoss`` when we are creating our own loss function or
   combine multiple loss functions. Also we may want to stop some variables'
   from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.

   In addition, we can give a scale to the loss by setting ``grad_scale``,
   so that the gradient of the loss will be rescaled in the backpropagation.

   .. note:: This operator should be used as a Symbol instead of NDArray.



   Defined in src/operator/make_loss.cc:L71
Parameters
symbol_namename of the resulting symbol
dataInput array.
grad_scaleGradient scale as a supplement to unary and binary operators
valid_threshclip each element in the array to 0 when it is less than
normalizationIf this is set to null, the output gradient will not be normalized. If this is set to batch, the output gradient will be divided by the batch size. If this is set to valid, the output gradient will be divided by the
Returns
new symbol
Symbol mxnet::cpp::MakeLoss ( Symbol  data,
mx_float  grad_scale = 1,
mx_float  valid_thresh = 0,
MakeLossNormalization  normalization = MakeLossNormalization::kNull 
)
inline

Make your own loss function in network construction.

   This operator accepts a customized loss function symbol as a terminal loss and
   the symbol should be an operator with no backward dependency.
   The output of this function is the gradient of loss with respect to the input

   For example, if you are a making a cross entropy loss function. Assume ``out``
   predicted output and ``label`` is the true label, then the cross entropy can be

   cross_entropy = label * log(out) + (1 - label) * log(1 - out)
   loss = MakeLoss(cross_entropy)

   We will need to use ``MakeLoss`` when we are creating our own loss function or
   combine multiple loss functions. Also we may want to stop some variables'
   from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.

   In addition, we can give a scale to the loss by setting ``grad_scale``,
   so that the gradient of the loss will be rescaled in the backpropagation.

   .. note:: This operator should be used as a Symbol instead of NDArray.



   Defined in src/operator/make_loss.cc:L71
Parameters
dataInput array.
grad_scaleGradient scale as a supplement to unary and binary operators
valid_threshclip each element in the array to 0 when it is less than
normalizationIf this is set to null, the output gradient will not be normalized. If this is set to batch, the output gradient will be divided by the batch size. If this is set to valid, the output gradient will be divided by the
Returns
new symbol
Symbol mxnet::cpp::max ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the max of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L191
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::max ( Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the max of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L191
Parameters
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::mean ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the mean of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L132
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::mean ( Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the mean of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L132
Parameters
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::min ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the min of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L205
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::min ( Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the min of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L205
Parameters
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::mp_sgd_mom_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  mom,
Symbol  weight32,
mx_float  lr,
mx_float  momentum = 0,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Updater function for multi-precision sgd optimizer

Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
momMomentum
weight32Weight32
lrLearning rate
momentumThe decay rate of momentum estimates at each epoch.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse
Returns
new symbol
Symbol mxnet::cpp::mp_sgd_mom_update ( Symbol  weight,
Symbol  grad,
Symbol  mom,
Symbol  weight32,
mx_float  lr,
mx_float  momentum = 0,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Updater function for multi-precision sgd optimizer

Parameters
weightWeight
gradGradient
momMomentum
weight32Weight32
lrLearning rate
momentumThe decay rate of momentum estimates at each epoch.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse
Returns
new symbol
Symbol mxnet::cpp::mp_sgd_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  weight32,
mx_float  lr,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Updater function for multi-precision sgd optimizer

Parameters
symbol_namename of the resulting symbol
weightWeight
gradgradient
weight32Weight32
lrLearning rate
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse.
Returns
new symbol
Symbol mxnet::cpp::mp_sgd_update ( Symbol  weight,
Symbol  grad,
Symbol  weight32,
mx_float  lr,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Updater function for multi-precision sgd optimizer

Parameters
weightWeight
gradgradient
weight32Weight32
lrLearning rate
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse.
Returns
new symbol
Symbol mxnet::cpp::nanprod ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the product of array elements over given axes treating Not a Numbers

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L177
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::nanprod ( Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the product of array elements over given axes treating Not a Numbers

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L177
Parameters
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::nansum ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the sum of array elements over given axes treating Not a Numbers

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L162
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::nansum ( Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the sum of array elements over given axes treating Not a Numbers

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L162
Parameters
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::negative ( const std::string &  symbol_name,
Symbol  data 
)
inline

Numerical negative of the argument, element-wise.

   The storage type of ``negative`` output depends upon the input storage type:

   - negative(default) = default
   - negative(row_sparse) = row_sparse
   - negative(csr) = csr
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::negative ( Symbol  data)
inline

Numerical negative of the argument, element-wise.

   The storage type of ``negative`` output depends upon the input storage type:

   - negative(default) = default
   - negative(row_sparse) = row_sparse
   - negative(csr) = csr
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::norm ( const std::string &  symbol_name,
Symbol  data,
int  ord = 2,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false 
)
inline

Computes the norm on an NDArray.

   This operator computes the norm on an NDArray with the specified axis, depending
   on the value of the ord parameter. By default, it computes the L2 norm on the
   array. Currently only ord=2 supports sparse ndarrays.

   Examples::

   x = [[[1, 2],
   [3, 4]],
   [[2, 2],
   [5, 6]]]

   norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ]
   [5.3851647 6.3245554]]

   norm(x, ord=1, axis=1) = [[4., 6.],
   [7., 8.]]

   rsp = x.cast_storage('row_sparse')

   norm(rsp) = [5.47722578]

   csr = x.cast_storage('csr')

   norm(csr) = [5.47722578]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L350
Parameters
symbol_namename of the resulting symbol
dataThe input
ordOrder of the norm. Currently ord=1 and ord=2 is supported.
axisThe axis or axes along which to perform the reduction. The default, axis=(), will compute over all elements into a scalar array with shape (1,). If axis is int, a reduction is performed on a particular axis. If axis is a 2-tuple, it specifies the axes that hold 2-D matrices, and the matrix norms of these matrices are computed.
keepdimsIf this is set to True, the reduced axis is left in the result as
Returns
new symbol
Symbol mxnet::cpp::norm ( Symbol  data,
int  ord = 2,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false 
)
inline

Computes the norm on an NDArray.

   This operator computes the norm on an NDArray with the specified axis, depending
   on the value of the ord parameter. By default, it computes the L2 norm on the
   array. Currently only ord=2 supports sparse ndarrays.

   Examples::

   x = [[[1, 2],
   [3, 4]],
   [[2, 2],
   [5, 6]]]

   norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ]
   [5.3851647 6.3245554]]

   norm(x, ord=1, axis=1) = [[4., 6.],
   [7., 8.]]

   rsp = x.cast_storage('row_sparse')

   norm(rsp) = [5.47722578]

   csr = x.cast_storage('csr')

   norm(csr) = [5.47722578]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L350
Parameters
dataThe input
ordOrder of the norm. Currently ord=1 and ord=2 is supported.
axisThe axis or axes along which to perform the reduction. The default, axis=(), will compute over all elements into a scalar array with shape (1,). If axis is int, a reduction is performed on a particular axis. If axis is a 2-tuple, it specifies the axes that hold 2-D matrices, and the matrix norms of these matrices are computed.
keepdimsIf this is set to True, the reduced axis is left in the result as
Returns
new symbol
Symbol mxnet::cpp::one_hot ( const std::string &  symbol_name,
Symbol  indices,
int  depth,
double  on_value = 1,
double  off_value = 0,
One_hotDtype  dtype = One_hotDtype::kFloat32 
)
inline

Returns a one-hot array.

   The locations represented by `indices` take value `on_value`, while all
   other locations take value `off_value`.

   `one_hot` operation with `indices` of shape ``(i0, i1)`` and `depth`  of ``d``
   in an output array of shape ``(i0, i1, d)`` with::

   output[i,j,:] = off_value
   output[i,j,indices[i,j]] = on_value

   Examples::

   one_hot([1,0,2,0], 3) = [[ 0.  1.  0.]
   [ 1.  0.  0.]
   [ 0.  0.  1.]
   [ 1.  0.  0.]]

   one_hot([1,0,2,0], 3, on_value=8, off_value=1,
   dtype='int32') = [[1 8 1]
   [8 1 1]
   [1 1 8]
   [8 1 1]]

   one_hot([[1,0],[1,0],[2,0]], 3) = [[[ 0.  1.  0.]
   [ 1.  0.  0.]]

   [[ 0.  1.  0.]
   [ 1.  0.  0.]]

   [[ 0.  0.  1.]
   [ 1.  0.  0.]]]


   Defined in src/operator/tensor/indexing_op.cc:L796
Parameters
symbol_namename of the resulting symbol
indicesarray of locations where to set on_value
depthDepth of the one hot dimension.
on_valueThe value assigned to the locations represented by indices.
off_valueThe value assigned to the locations not represented by indices.
dtypeDType of the output
Returns
new symbol
Symbol mxnet::cpp::one_hot ( Symbol  indices,
int  depth,
double  on_value = 1,
double  off_value = 0,
One_hotDtype  dtype = One_hotDtype::kFloat32 
)
inline

Returns a one-hot array.

   The locations represented by `indices` take value `on_value`, while all
   other locations take value `off_value`.

   `one_hot` operation with `indices` of shape ``(i0, i1)`` and `depth`  of ``d``
   in an output array of shape ``(i0, i1, d)`` with::

   output[i,j,:] = off_value
   output[i,j,indices[i,j]] = on_value

   Examples::

   one_hot([1,0,2,0], 3) = [[ 0.  1.  0.]
   [ 1.  0.  0.]
   [ 0.  0.  1.]
   [ 1.  0.  0.]]

   one_hot([1,0,2,0], 3, on_value=8, off_value=1,
   dtype='int32') = [[1 8 1]
   [8 1 1]
   [1 1 8]
   [8 1 1]]

   one_hot([[1,0],[1,0],[2,0]], 3) = [[[ 0.  1.  0.]
   [ 1.  0.  0.]]

   [[ 0.  1.  0.]
   [ 1.  0.  0.]]

   [[ 0.  0.  1.]
   [ 1.  0.  0.]]]


   Defined in src/operator/tensor/indexing_op.cc:L796
Parameters
indicesarray of locations where to set on_value
depthDepth of the one hot dimension.
on_valueThe value assigned to the locations represented by indices.
off_valueThe value assigned to the locations not represented by indices.
dtypeDType of the output
Returns
new symbol
Symbol mxnet::cpp::ones_like ( const std::string &  symbol_name,
Symbol  data 
)
inline

Return an array of ones with the same shape and type as the input array.

Examples::

x = [[ 0., 0., 0.], [ 0., 0., 0.]]

ones_like(x) = [[ 1., 1., 1.], [ 1., 1., 1.]]

Parameters
symbol_namename of the resulting symbol
dataThe input
Returns
new symbol
Symbol mxnet::cpp::ones_like ( Symbol  data)
inline

Return an array of ones with the same shape and type as the input array.

Examples::

x = [[ 0., 0., 0.], [ 0., 0., 0.]]

ones_like(x) = [[ 1., 1., 1.], [ 1., 1., 1.]]

Parameters
dataThe input
Returns
new symbol
Symbol mxnet::cpp::operator% ( mx_float  lhs,
const Symbol rhs 
)
Symbol mxnet::cpp::operator* ( mx_float  lhs,
const Symbol rhs 
)
Symbol mxnet::cpp::operator+ ( mx_float  lhs,
const Symbol rhs 
)
Symbol mxnet::cpp::operator- ( mx_float  lhs,
const Symbol rhs 
)
Symbol mxnet::cpp::operator/ ( mx_float  lhs,
const Symbol rhs 
)
std::ostream& mxnet::cpp::operator<< ( std::ostream &  os,
const Shape shape 
)
inline

allow string printing of the shape

Parameters
osthe output stream
shapethe shape
Returns
the ostream
std::ostream& mxnet::cpp::operator<< ( std::ostream &  out,
const NDArray ndarray 
)
std::istream& mxnet::cpp::operator>> ( std::istream &  is,
Shape shape 
)
inline

read shape from the istream

Parameters
isthe input stream
shapethe shape
Returns
the istream
Symbol mxnet::cpp::Pad ( const std::string &  symbol_name,
Symbol  data,
PadMode  mode,
Shape  pad_width,
double  constant_value = 0 
)
inline

Pads an input array with a constant or edge values of the array.

   .. note:: `Pad` is deprecated. Use `pad` instead.

   .. note:: Current implementation only supports 4D and 5D input arrays with
   only on axes 1, 2 and 3. Expects axes 4 and 5 in `pad_width` to be zero.

   This operation pads an input array with either a `constant_value` or edge values
   along each axis of the input array. The amount of padding is specified by

   `pad_width` is a tuple of integer padding widths for each axis of the format
   ``(before_1, after_1, ... , before_N, after_N)``. The `pad_width` should be of
   where ``N`` is the number of dimensions of the array.

   For dimension ``N`` of the input array, ``before_N`` and ``after_N`` indicates
   to add before and after the elements of the array along dimension ``N``.
   The widths of the higher two dimensions ``before_1``, ``after_1``, ``before_2``,
   ``after_2`` must be 0.

   Example::

   x = [[[[  1.   2.   3.]
   [  4.   5.   6.]]

   [[  7.   8.   9.]
   [ 10.  11.  12.]]]


   [[[ 11.  12.  13.]
   [ 14.  15.  16.]]

   [[ 17.  18.  19.]
   [ 20.  21.  22.]]]]

   pad(x,mode="edge", pad_width=(0,0,0,0,1,1,1,1)) =

   [[[[  1.   1.   2.   3.   3.]
   [  1.   1.   2.   3.   3.]
   [  4.   4.   5.   6.   6.]
   [  4.   4.   5.   6.   6.]]

   [[  7.   7.   8.   9.   9.]
   [  7.   7.   8.   9.   9.]
   [ 10.  10.  11.  12.  12.]
   [ 10.  10.  11.  12.  12.]]]


   [[[ 11.  11.  12.  13.  13.]
   [ 11.  11.  12.  13.  13.]
   [ 14.  14.  15.  16.  16.]
   [ 14.  14.  15.  16.  16.]]

   [[ 17.  17.  18.  19.  19.]
   [ 17.  17.  18.  19.  19.]
   [ 20.  20.  21.  22.  22.]
   [ 20.  20.  21.  22.  22.]]]]

   pad(x, mode="constant", constant_value=0, pad_width=(0,0,0,0,1,1,1,1)) =

   [[[[  0.   0.   0.   0.   0.]
   [  0.   1.   2.   3.   0.]
   [  0.   4.   5.   6.   0.]
   [  0.   0.   0.   0.   0.]]

   [[  0.   0.   0.   0.   0.]
   [  0.   7.   8.   9.   0.]
   [  0.  10.  11.  12.   0.]
   [  0.   0.   0.   0.   0.]]]


   [[[  0.   0.   0.   0.   0.]
   [  0.  11.  12.  13.   0.]
   [  0.  14.  15.  16.   0.]
   [  0.   0.   0.   0.   0.]]

   [[  0.   0.   0.   0.   0.]
   [  0.  17.  18.  19.   0.]
   [  0.  20.  21.  22.   0.]
   [  0.   0.   0.   0.   0.]]]]




   Defined in src/operator/pad.cc:L766
Parameters
symbol_namename of the resulting symbol
dataAn n-dimensional input array.
modePadding type to use. "constant" pads with constant_value "edge" pads using the edge values of the input array "reflect" pads by reflecting values
pad_widthWidths of the padding regions applied to the edges of each axis. It is a tuple of integer padding widths for each axis of the format (before_1, after_1, ... , before_N, after_N). It should be of length 2*N where N is the number of dimensions of the array.This is equivalent to pad_width in
constant_valueThe value used for padding when mode is "constant".
Returns
new symbol
Symbol mxnet::cpp::Pad ( Symbol  data,
PadMode  mode,
Shape  pad_width,
double  constant_value = 0 
)
inline

Pads an input array with a constant or edge values of the array.

   .. note:: `Pad` is deprecated. Use `pad` instead.

   .. note:: Current implementation only supports 4D and 5D input arrays with
   only on axes 1, 2 and 3. Expects axes 4 and 5 in `pad_width` to be zero.

   This operation pads an input array with either a `constant_value` or edge values
   along each axis of the input array. The amount of padding is specified by

   `pad_width` is a tuple of integer padding widths for each axis of the format
   ``(before_1, after_1, ... , before_N, after_N)``. The `pad_width` should be of
   where ``N`` is the number of dimensions of the array.

   For dimension ``N`` of the input array, ``before_N`` and ``after_N`` indicates
   to add before and after the elements of the array along dimension ``N``.
   The widths of the higher two dimensions ``before_1``, ``after_1``, ``before_2``,
   ``after_2`` must be 0.

   Example::

   x = [[[[  1.   2.   3.]
   [  4.   5.   6.]]

   [[  7.   8.   9.]
   [ 10.  11.  12.]]]


   [[[ 11.  12.  13.]
   [ 14.  15.  16.]]

   [[ 17.  18.  19.]
   [ 20.  21.  22.]]]]

   pad(x,mode="edge", pad_width=(0,0,0,0,1,1,1,1)) =

   [[[[  1.   1.   2.   3.   3.]
   [  1.   1.   2.   3.   3.]
   [  4.   4.   5.   6.   6.]
   [  4.   4.   5.   6.   6.]]

   [[  7.   7.   8.   9.   9.]
   [  7.   7.   8.   9.   9.]
   [ 10.  10.  11.  12.  12.]
   [ 10.  10.  11.  12.  12.]]]


   [[[ 11.  11.  12.  13.  13.]
   [ 11.  11.  12.  13.  13.]
   [ 14.  14.  15.  16.  16.]
   [ 14.  14.  15.  16.  16.]]

   [[ 17.  17.  18.  19.  19.]
   [ 17.  17.  18.  19.  19.]
   [ 20.  20.  21.  22.  22.]
   [ 20.  20.  21.  22.  22.]]]]

   pad(x, mode="constant", constant_value=0, pad_width=(0,0,0,0,1,1,1,1)) =

   [[[[  0.   0.   0.   0.   0.]
   [  0.   1.   2.   3.   0.]
   [  0.   4.   5.   6.   0.]
   [  0.   0.   0.   0.   0.]]

   [[  0.   0.   0.   0.   0.]
   [  0.   7.   8.   9.   0.]
   [  0.  10.  11.  12.   0.]
   [  0.   0.   0.   0.   0.]]]


   [[[  0.   0.   0.   0.   0.]
   [  0.  11.  12.  13.   0.]
   [  0.  14.  15.  16.   0.]
   [  0.   0.   0.   0.   0.]]

   [[  0.   0.   0.   0.   0.]
   [  0.  17.  18.  19.   0.]
   [  0.  20.  21.  22.   0.]
   [  0.   0.   0.   0.   0.]]]]




   Defined in src/operator/pad.cc:L766
Parameters
dataAn n-dimensional input array.
modePadding type to use. "constant" pads with constant_value "edge" pads using the edge values of the input array "reflect" pads by reflecting values
pad_widthWidths of the padding regions applied to the edges of each axis. It is a tuple of integer padding widths for each axis of the format (before_1, after_1, ... , before_N, after_N). It should be of length 2*N where N is the number of dimensions of the array.This is equivalent to pad_width in
constant_valueThe value used for padding when mode is "constant".
Returns
new symbol
Symbol mxnet::cpp::pick ( const std::string &  symbol_name,
Symbol  data,
Symbol  index,
dmlc::optional< int >  axis = dmlc::optional<int>(-1),
bool  keepdims = false,
PickMode  mode = PickMode::kClip 
)
inline

Picks elements from an input array according to the input indices along the

   Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
   an output array of shape ``(i0,)`` with::

   output[i] = input[i, indices[i]]

   By default, if any index mentioned is too large, it is replaced by the index
   the last element along an axis (the `clip` mode).

   This function supports n-dimensional input and (n-1)-dimensional indices arrays.

   Examples::

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // picks elements with specified indices along axis 0
   pick(x, y=[0,1], 0) = [ 1.,  4.]

   // picks elements with specified indices along axis 1
   pick(x, y=[0,1,0], 1) = [ 1.,  4.,  5.]

   y = [[ 1.],
   [ 0.],
   [ 2.]]

   // picks elements with specified indices along axis 1 using 'wrap' mode
   // to place indicies that would normally be out of bounds
   pick(x, y=[2,-1,-2], 1, mode='wrap') = [ 1.,  4.,  5.]

   y = [[ 1.],
   [ 0.],
   [ 2.]]

   // picks elements with specified indices along axis 1 and dims are maintained
   pick(x,y, 1, keepdims=True) = [[ 2.],
   [ 3.],
   [ 6.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L153
Parameters
symbol_namename of the resulting symbol
dataThe input array
indexThe index array
axisint or None. The axis to picking the elements. Negative values means indexing from right to left. If is None, the elements in the index w.r.t the
keepdimsIf true, the axis where we pick the elements is left in the result as
modeSpecify how out-of-bound indices behave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap"
Returns
new symbol
Symbol mxnet::cpp::pick ( Symbol  data,
Symbol  index,
dmlc::optional< int >  axis = dmlc::optional<int>(-1),
bool  keepdims = false,
PickMode  mode = PickMode::kClip 
)
inline

Picks elements from an input array according to the input indices along the

   Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
   an output array of shape ``(i0,)`` with::

   output[i] = input[i, indices[i]]

   By default, if any index mentioned is too large, it is replaced by the index
   the last element along an axis (the `clip` mode).

   This function supports n-dimensional input and (n-1)-dimensional indices arrays.

   Examples::

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // picks elements with specified indices along axis 0
   pick(x, y=[0,1], 0) = [ 1.,  4.]

   // picks elements with specified indices along axis 1
   pick(x, y=[0,1,0], 1) = [ 1.,  4.,  5.]

   y = [[ 1.],
   [ 0.],
   [ 2.]]

   // picks elements with specified indices along axis 1 using 'wrap' mode
   // to place indicies that would normally be out of bounds
   pick(x, y=[2,-1,-2], 1, mode='wrap') = [ 1.,  4.,  5.]

   y = [[ 1.],
   [ 0.],
   [ 2.]]

   // picks elements with specified indices along axis 1 and dims are maintained
   pick(x,y, 1, keepdims=True) = [[ 2.],
   [ 3.],
   [ 6.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L153
Parameters
dataThe input array
indexThe index array
axisint or None. The axis to picking the elements. Negative values means indexing from right to left. If is None, the elements in the index w.r.t the
keepdimsIf true, the axis where we pick the elements is left in the result as
modeSpecify how out-of-bound indices behave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap"
Returns
new symbol
Symbol mxnet::cpp::Pooling ( const std::string &  symbol_name,
Symbol  data,
Shape  kernel = Shape(),
PoolingPoolType  pool_type = PoolingPoolType::kMax,
bool  global_pool = false,
bool  cudnn_off = false,
PoolingPoolingConvention  pooling_convention = PoolingPoolingConvention::kValid,
Shape  stride = Shape(),
Shape  pad = Shape(),
dmlc::optional< int >  p_value = dmlc::optional<int>(),
dmlc::optional< bool >  count_include_pad = dmlc::optional<bool>() 
)
inline

Performs pooling on the input.

   The shapes for 1-D pooling are

   - **data**: *(batch_size, channel, width)*,
   - **out**: *(batch_size, num_filter, out_width)*.

   The shapes for 2-D pooling are

   - **data**: *(batch_size, channel, height, width)*
   - **out**: *(batch_size, num_filter, out_height, out_width)*, with::

   out_height = f(height, kernel[0], pad[0], stride[0])
   out_width = f(width, kernel[1], pad[1], stride[1])

   The definition of *f* depends on ``pooling_convention``, which has two options:

   - **valid** (default)::

   f(x, k, p, s) = floor((x+2*p-k)/s)+1

   - **full**, which is compatible with Caffe::

   f(x, k, p, s) = ceil((x+2*p-k)/s)+1

   But ``global_pool`` is set to be true, then do a global pooling, namely reset
   ``kernel=(height, width)``.

   Three pooling options are supported by ``pool_type``:

   - **avg**: average pooling
   - **max**: max pooling
   - **sum**: sum pooling
   - **lp**: Lp pooling

   For 3-D pooling, an additional *depth* dimension is added before
   *height*. Namely the input data will have shape *(batch_size, channel, depth,
   height, width)*.

   Notes on Lp pooling:

   Lp pooling was first introduced by this paper:
   L-1 pooling is simply sum pooling, while L-inf pooling is simply max pooling.
   We can see that Lp pooling stands between those two, in practice the most

   For each window ``X``, the mathematical expression for Lp pooling is:

   :math:`f(X) = \sqrt[p]{\sum_{x}^{X} x^p}`



   Defined in src/operator/nn/pooling.cc:L379
Parameters
symbol_namename of the resulting symbol
dataInput data to the pooling operator.
kernelPooling kernel size: (y, x) or (d, y, x)
pool_typePooling type to be applied.
global_poolIgnore kernel size, do global pooling based on current input
cudnn_offTurn off cudnn pooling and use MXNet pooling operator.
pooling_conventionPooling convention to be applied.
strideStride: for pooling (y, x) or (d, y, x). Defaults to 1 for each
padPad for pooling: (y, x) or (d, y, x). Defaults to no padding.
p_valueValue of p for Lp pooling, can be 1 or 2, required for Lp Pooling.
count_include_padOnly used for AvgPool, specify whether to count padding elements for averagecalculation. For example, with a 5*5 kernel on a 3*3 corner of a image,the sum of the 9 valid elements will be divided by 25 if this is set
Returns
new symbol
Symbol mxnet::cpp::Pooling ( Symbol  data,
Shape  kernel = Shape(),
PoolingPoolType  pool_type = PoolingPoolType::kMax,
bool  global_pool = false,
bool  cudnn_off = false,
PoolingPoolingConvention  pooling_convention = PoolingPoolingConvention::kValid,
Shape  stride = Shape(),
Shape  pad = Shape(),
dmlc::optional< int >  p_value = dmlc::optional<int>(),
dmlc::optional< bool >  count_include_pad = dmlc::optional<bool>() 
)
inline

Performs pooling on the input.

   The shapes for 1-D pooling are

   - **data**: *(batch_size, channel, width)*,
   - **out**: *(batch_size, num_filter, out_width)*.

   The shapes for 2-D pooling are

   - **data**: *(batch_size, channel, height, width)*
   - **out**: *(batch_size, num_filter, out_height, out_width)*, with::

   out_height = f(height, kernel[0], pad[0], stride[0])
   out_width = f(width, kernel[1], pad[1], stride[1])

   The definition of *f* depends on ``pooling_convention``, which has two options:

   - **valid** (default)::

   f(x, k, p, s) = floor((x+2*p-k)/s)+1

   - **full**, which is compatible with Caffe::

   f(x, k, p, s) = ceil((x+2*p-k)/s)+1

   But ``global_pool`` is set to be true, then do a global pooling, namely reset
   ``kernel=(height, width)``.

   Three pooling options are supported by ``pool_type``:

   - **avg**: average pooling
   - **max**: max pooling
   - **sum**: sum pooling
   - **lp**: Lp pooling

   For 3-D pooling, an additional *depth* dimension is added before
   *height*. Namely the input data will have shape *(batch_size, channel, depth,
   height, width)*.

   Notes on Lp pooling:

   Lp pooling was first introduced by this paper:
   L-1 pooling is simply sum pooling, while L-inf pooling is simply max pooling.
   We can see that Lp pooling stands between those two, in practice the most

   For each window ``X``, the mathematical expression for Lp pooling is:

   :math:`f(X) = \sqrt[p]{\sum_{x}^{X} x^p}`



   Defined in src/operator/nn/pooling.cc:L379
Parameters
dataInput data to the pooling operator.
kernelPooling kernel size: (y, x) or (d, y, x)
pool_typePooling type to be applied.
global_poolIgnore kernel size, do global pooling based on current input
cudnn_offTurn off cudnn pooling and use MXNet pooling operator.
pooling_conventionPooling convention to be applied.
strideStride: for pooling (y, x) or (d, y, x). Defaults to 1 for each
padPad for pooling: (y, x) or (d, y, x). Defaults to no padding.
p_valueValue of p for Lp pooling, can be 1 or 2, required for Lp Pooling.
count_include_padOnly used for AvgPool, specify whether to count padding elements for averagecalculation. For example, with a 5*5 kernel on a 3*3 corner of a image,the sum of the 9 valid elements will be divided by 25 if this is set
Returns
new symbol
Symbol mxnet::cpp::Pooling_v1 ( const std::string &  symbol_name,
Symbol  data,
Shape  kernel = Shape(),
Pooling_v1PoolType  pool_type = Pooling_v1PoolType::kMax,
bool  global_pool = false,
Pooling_v1PoolingConvention  pooling_convention = Pooling_v1PoolingConvention::kValid,
Shape  stride = Shape(),
Shape  pad = Shape() 
)
inline

This operator is DEPRECATED. Perform pooling on the input.

The shapes for 2-D pooling is

  • data: *(batch_size, channel, height, width)*
  • out: *(batch_size, num_filter, out_height, out_width)*, with::

out_height = f(height, kernel[0], pad[0], stride[0]) out_width = f(width, kernel[1], pad[1], stride[1])

The definition of f depends on pooling_convention, which has two options:

  • valid (default)::

f(x, k, p, s) = floor((x+2*p-k)/s)+1

  • full, which is compatible with Caffe::

f(x, k, p, s) = ceil((x+2*p-k)/s)+1

But global_pool is set to be true, then do a global pooling, namely reset kernel=(height, width).

Three pooling options are supported by pool_type:

  • avg: average pooling
  • max: max pooling
  • sum: sum pooling

1-D pooling is special case of 2-D pooling with weight=1 and kernel[1]=1.

For 3-D pooling, an additional depth dimension is added before height. Namely the input data will have shape *(batch_size, channel, depth, height, width)*.

   Defined in src/operator/pooling_v1.cc:L104
Parameters
symbol_namename of the resulting symbol
dataInput data to the pooling operator.
kernelpooling kernel size: (y, x) or (d, y, x)
pool_typePooling type to be applied.
global_poolIgnore kernel size, do global pooling based on current input
pooling_conventionPooling convention to be applied.
stridestride: for pooling (y, x) or (d, y, x)
padpad for pooling: (y, x) or (d, y, x)
Returns
new symbol
Symbol mxnet::cpp::Pooling_v1 ( Symbol  data,
Shape  kernel = Shape(),
Pooling_v1PoolType  pool_type = Pooling_v1PoolType::kMax,
bool  global_pool = false,
Pooling_v1PoolingConvention  pooling_convention = Pooling_v1PoolingConvention::kValid,
Shape  stride = Shape(),
Shape  pad = Shape() 
)
inline

This operator is DEPRECATED. Perform pooling on the input.

The shapes for 2-D pooling is

  • data: *(batch_size, channel, height, width)*
  • out: *(batch_size, num_filter, out_height, out_width)*, with::

out_height = f(height, kernel[0], pad[0], stride[0]) out_width = f(width, kernel[1], pad[1], stride[1])

The definition of f depends on pooling_convention, which has two options:

  • valid (default)::

f(x, k, p, s) = floor((x+2*p-k)/s)+1

  • full, which is compatible with Caffe::

f(x, k, p, s) = ceil((x+2*p-k)/s)+1

But global_pool is set to be true, then do a global pooling, namely reset kernel=(height, width).

Three pooling options are supported by pool_type:

  • avg: average pooling
  • max: max pooling
  • sum: sum pooling

1-D pooling is special case of 2-D pooling with weight=1 and kernel[1]=1.

For 3-D pooling, an additional depth dimension is added before height. Namely the input data will have shape *(batch_size, channel, depth, height, width)*.

   Defined in src/operator/pooling_v1.cc:L104
Parameters
dataInput data to the pooling operator.
kernelpooling kernel size: (y, x) or (d, y, x)
pool_typePooling type to be applied.
global_poolIgnore kernel size, do global pooling based on current input
pooling_conventionPooling convention to be applied.
stridestride: for pooling (y, x) or (d, y, x)
padpad for pooling: (y, x) or (d, y, x)
Returns
new symbol
Symbol mxnet::cpp::prod ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the product of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L147
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::prod ( Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the product of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L147
Parameters
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::radians ( const std::string &  symbol_name,
Symbol  data 
)
inline

Converts each element of the input array from degrees to radians.

   .. math::
   radians([0, 90, 180, 270, 360]) = [0, \pi/2, \pi, 3\pi/2, 2\pi]

   The storage type of ``radians`` output depends upon the input storage type:

   - radians(default) = default
   - radians(row_sparse) = row_sparse
   - radians(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L182
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::radians ( Symbol  data)
inline

Converts each element of the input array from degrees to radians.

   .. math::
   radians([0, 90, 180, 270, 360]) = [0, \pi/2, \pi, 3\pi/2, 2\pi]

   The storage type of ``radians`` output depends upon the input storage type:

   - radians(default) = default
   - radians(row_sparse) = row_sparse
   - radians(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L182
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::rcbrt ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise inverse cube-root value of the input.

   .. math::
   rcbrt(x) = 1/\sqrt[3]{x}

   Example::

   rcbrt([1,8,-125]) = [1.0, 0.5, -0.2]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L916
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::rcbrt ( Symbol  data)
inline

Returns element-wise inverse cube-root value of the input.

   .. math::
   rcbrt(x) = 1/\sqrt[3]{x}

   Example::

   rcbrt([1,8,-125]) = [1.0, 0.5, -0.2]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L916
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::reciprocal ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the reciprocal of the argument, element-wise.

   Calculates 1/x.

   Example::

   reciprocal([-2, 1, 3, 1.6, 0.2]) = [-0.5, 1.0, 0.33333334, 0.625, 5.0]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L640
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::reciprocal ( Symbol  data)
inline

Returns the reciprocal of the argument, element-wise.

   Calculates 1/x.

   Example::

   reciprocal([-2, 1, 3, 1.6, 0.2]) = [-0.5, 1.0, 0.33333334, 0.625, 5.0]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L640
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::relu ( const std::string &  symbol_name,
Symbol  data 
)
inline

Computes rectified linear.

   .. math::
   max(features, 0)

   The storage type of ``relu`` output depends upon the input storage type:

   - relu(default) = default
   - relu(row_sparse) = row_sparse
   - relu(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L85
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::relu ( Symbol  data)
inline

Computes rectified linear.

   .. math::
   max(features, 0)

   The storage type of ``relu`` output depends upon the input storage type:

   - relu(default) = default
   - relu(row_sparse) = row_sparse
   - relu(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L85
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::repeat ( const std::string &  symbol_name,
Symbol  data,
int  repeats,
dmlc::optional< int >  axis = dmlc::optional<int>() 
)
inline

Repeats elements of an array.

   By default, ``repeat`` flattens the input array into 1-D and then repeats the
   elements::

   x = [[ 1, 2],
   [ 3, 4]]

   repeat(x, repeats=2) = [ 1.,  1.,  2.,  2.,  3.,  3.,  4.,  4.]

   The parameter ``axis`` specifies the axis along which to perform repeat::

   repeat(x, repeats=2, axis=1) = [[ 1.,  1.,  2.,  2.],
   [ 3.,  3.,  4.,  4.]]

   repeat(x, repeats=2, axis=0) = [[ 1.,  2.],
   [ 1.,  2.],
   [ 3.,  4.],
   [ 3.,  4.]]

   repeat(x, repeats=2, axis=-1) = [[ 1.,  1.,  2.,  2.],
   [ 3.,  3.,  4.,  4.]]



   Defined in src/operator/tensor/matrix_op.cc:L692
Parameters
symbol_namename of the resulting symbol
dataInput data array
repeatsThe number of repetitions for each element.
axisThe axis along which to repeat values. The negative numbers are interpreted counting from the backward. By default, use the flattened input
Returns
new symbol
Symbol mxnet::cpp::repeat ( Symbol  data,
int  repeats,
dmlc::optional< int >  axis = dmlc::optional<int>() 
)
inline

Repeats elements of an array.

   By default, ``repeat`` flattens the input array into 1-D and then repeats the
   elements::

   x = [[ 1, 2],
   [ 3, 4]]

   repeat(x, repeats=2) = [ 1.,  1.,  2.,  2.,  3.,  3.,  4.,  4.]

   The parameter ``axis`` specifies the axis along which to perform repeat::

   repeat(x, repeats=2, axis=1) = [[ 1.,  1.,  2.,  2.],
   [ 3.,  3.,  4.,  4.]]

   repeat(x, repeats=2, axis=0) = [[ 1.,  2.],
   [ 1.,  2.],
   [ 3.,  4.],
   [ 3.,  4.]]

   repeat(x, repeats=2, axis=-1) = [[ 1.,  1.,  2.,  2.],
   [ 3.,  3.,  4.,  4.]]



   Defined in src/operator/tensor/matrix_op.cc:L692
Parameters
dataInput data array
repeatsThe number of repetitions for each element.
axisThe axis along which to repeat values. The negative numbers are interpreted counting from the backward. By default, use the flattened input
Returns
new symbol
Symbol mxnet::cpp::Reshape ( const std::string &  symbol_name,
Symbol  data,
Shape  shape = Shape(),
bool  reverse = false,
Shape  target_shape = Shape(),
bool  keep_highest = false 
)
inline

Reshapes the input array.

   .. note:: ``Reshape`` is deprecated, use ``reshape``

   Given an array and a shape, this function returns a copy of the array in the
   The shape is a tuple of integers such as (2,3,4). The size of the new shape

   Example::

   reshape([1,2,3,4], shape=(2,2)) = [[1,2], [3,4]]

   Some dimensions of the shape can take special values from the set {0, -1, -2,

   - ``0``  copy this dimension from the input to the output shape.

   Example::

   - input shape = (2,3,4), shape = (4,0,2), output shape = (4,3,2)
   - input shape = (2,3,4), shape = (2,0,0), output shape = (2,3,4)

   - ``-1`` infers the dimension of the output shape by using the remainder of the
   keeping the size of the new array same as that of the input array.
   At most one dimension of shape can be -1.

   Example::

   - input shape = (2,3,4), shape = (6,1,-1), output shape = (6,1,4)
   - input shape = (2,3,4), shape = (3,-1,8), output shape = (3,1,8)
   - input shape = (2,3,4), shape=(-1,), output shape = (24,)

   - ``-2`` copy all/remainder of the input dimensions to the output shape.

   Example::

   - input shape = (2,3,4), shape = (-2,), output shape = (2,3,4)
   - input shape = (2,3,4), shape = (2,-2), output shape = (2,3,4)
   - input shape = (2,3,4), shape = (-2,1,1), output shape = (2,3,4,1,1)

   - ``-3`` use the product of two consecutive dimensions of the input shape as

   Example::

   - input shape = (2,3,4), shape = (-3,4), output shape = (6,4)
   - input shape = (2,3,4,5), shape = (-3,-3), output shape = (6,20)
   - input shape = (2,3,4), shape = (0,-3), output shape = (2,12)
   - input shape = (2,3,4), shape = (-3,-2), output shape = (6,4)

   - ``-4`` split one dimension of the input into two dimensions passed subsequent

   Example::

   - input shape = (2,3,4), shape = (-4,1,2,-2), output shape =(1,2,3,4)
   - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = (2,1,3,4)

   If the argument `reverse` is set to 1, then the special values are inferred

   Example::

   - without reverse=1, for input shape = (10,5,4), shape = (-1,0), output shape
   - with reverse=1, output shape will be (50,4).



   Defined in src/operator/tensor/matrix_op.cc:L169
Parameters
symbol_namename of the resulting symbol
dataInput data to reshape.
shapeThe target shape
reverseIf true then the special values are inferred from right to left
target_shape(Deprecated! Use shape instead.) Target new shape. One and
keep_highest(Deprecated! Use shape instead.) Whether keep the highest dim unchanged.If set to true, then the first dim in target_shape is ignored,and
Returns
new symbol
Symbol mxnet::cpp::Reshape ( Symbol  data,
Shape  shape = Shape(),
bool  reverse = false,
Shape  target_shape = Shape(),
bool  keep_highest = false 
)
inline

Reshapes the input array.

   .. note:: ``Reshape`` is deprecated, use ``reshape``

   Given an array and a shape, this function returns a copy of the array in the
   The shape is a tuple of integers such as (2,3,4). The size of the new shape

   Example::

   reshape([1,2,3,4], shape=(2,2)) = [[1,2], [3,4]]

   Some dimensions of the shape can take special values from the set {0, -1, -2,

   - ``0``  copy this dimension from the input to the output shape.

   Example::

   - input shape = (2,3,4), shape = (4,0,2), output shape = (4,3,2)
   - input shape = (2,3,4), shape = (2,0,0), output shape = (2,3,4)

   - ``-1`` infers the dimension of the output shape by using the remainder of the
   keeping the size of the new array same as that of the input array.
   At most one dimension of shape can be -1.

   Example::

   - input shape = (2,3,4), shape = (6,1,-1), output shape = (6,1,4)
   - input shape = (2,3,4), shape = (3,-1,8), output shape = (3,1,8)
   - input shape = (2,3,4), shape=(-1,), output shape = (24,)

   - ``-2`` copy all/remainder of the input dimensions to the output shape.

   Example::

   - input shape = (2,3,4), shape = (-2,), output shape = (2,3,4)
   - input shape = (2,3,4), shape = (2,-2), output shape = (2,3,4)
   - input shape = (2,3,4), shape = (-2,1,1), output shape = (2,3,4,1,1)

   - ``-3`` use the product of two consecutive dimensions of the input shape as

   Example::

   - input shape = (2,3,4), shape = (-3,4), output shape = (6,4)
   - input shape = (2,3,4,5), shape = (-3,-3), output shape = (6,20)
   - input shape = (2,3,4), shape = (0,-3), output shape = (2,12)
   - input shape = (2,3,4), shape = (-3,-2), output shape = (6,4)

   - ``-4`` split one dimension of the input into two dimensions passed subsequent

   Example::

   - input shape = (2,3,4), shape = (-4,1,2,-2), output shape =(1,2,3,4)
   - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = (2,1,3,4)

   If the argument `reverse` is set to 1, then the special values are inferred

   Example::

   - without reverse=1, for input shape = (10,5,4), shape = (-1,0), output shape
   - with reverse=1, output shape will be (50,4).



   Defined in src/operator/tensor/matrix_op.cc:L169
Parameters
dataInput data to reshape.
shapeThe target shape
reverseIf true then the special values are inferred from right to left
target_shape(Deprecated! Use shape instead.) Target new shape. One and
keep_highest(Deprecated! Use shape instead.) Whether keep the highest dim unchanged.If set to true, then the first dim in target_shape is ignored,and
Returns
new symbol
Symbol mxnet::cpp::reshape_like ( const std::string &  symbol_name,
Symbol  lhs,
Symbol  rhs 
)
inline

Reshape some or all dimensions of lhs to have the same shape as some or all

   Returns a **view** of the `lhs` array with a new shape without altering any

   Example::

   x = [1, 2, 3, 4, 5, 6]
   y = [[0, -4], [3, 2], [2, 2]]
   reshape_like(x, y) = [[1, 2], [3, 4], [5, 6]]

   More precise control over how dimensions are inherited is achieved by
   slices over the `lhs` and `rhs` array dimensions. Only the sliced `lhs`
   are reshaped to the `rhs` sliced dimensions, with the non-sliced `lhs`

   Examples::

   - lhs shape = (30,7), rhs shape = (15,2,4), lhs_begin=0, lhs_end=1,
   - lhs shape = (3, 5), rhs shape = (1,15,4), lhs_begin=0, lhs_end=2,

   Negative indices are supported, and `None` can be used for either `lhs_end` or

   Example::

   - lhs shape = (30, 12), rhs shape = (4, 2, 2, 3), lhs_begin=-1, lhs_end=None,



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L455
Parameters
symbol_namename of the resulting symbol
lhsFirst input.
rhsSecond input.
Returns
new symbol
Symbol mxnet::cpp::reshape_like ( Symbol  lhs,
Symbol  rhs 
)
inline

Reshape some or all dimensions of lhs to have the same shape as some or all

   Returns a **view** of the `lhs` array with a new shape without altering any

   Example::

   x = [1, 2, 3, 4, 5, 6]
   y = [[0, -4], [3, 2], [2, 2]]
   reshape_like(x, y) = [[1, 2], [3, 4], [5, 6]]

   More precise control over how dimensions are inherited is achieved by
   slices over the `lhs` and `rhs` array dimensions. Only the sliced `lhs`
   are reshaped to the `rhs` sliced dimensions, with the non-sliced `lhs`

   Examples::

   - lhs shape = (30,7), rhs shape = (15,2,4), lhs_begin=0, lhs_end=1,
   - lhs shape = (3, 5), rhs shape = (1,15,4), lhs_begin=0, lhs_end=2,

   Negative indices are supported, and `None` can be used for either `lhs_end` or

   Example::

   - lhs shape = (30, 12), rhs shape = (4, 2, 2, 3), lhs_begin=-1, lhs_end=None,



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L455
Parameters
lhsFirst input.
rhsSecond input.
Returns
new symbol
Symbol mxnet::cpp::reverse ( const std::string &  symbol_name,
Symbol  data,
Shape  axis 
)
inline

Reverses the order of elements along given axis while preserving array shape.

   Note: reverse and flip are equivalent. We use reverse in the following examples.

   Examples::

   x = [[ 0.,  1.,  2.,  3.,  4.],
   [ 5.,  6.,  7.,  8.,  9.]]

   reverse(x, axis=0) = [[ 5.,  6.,  7.,  8.,  9.],
   [ 0.,  1.,  2.,  3.,  4.]]

   reverse(x, axis=1) = [[ 4.,  3.,  2.,  1.,  0.],
   [ 9.,  8.,  7.,  6.,  5.]]


   Defined in src/operator/tensor/matrix_op.cc:L794
Parameters
symbol_namename of the resulting symbol
dataInput data array
axisThe axis which to reverse elements.
Returns
new symbol
Symbol mxnet::cpp::reverse ( Symbol  data,
Shape  axis 
)
inline

Reverses the order of elements along given axis while preserving array shape.

   Note: reverse and flip are equivalent. We use reverse in the following examples.

   Examples::

   x = [[ 0.,  1.,  2.,  3.,  4.],
   [ 5.,  6.,  7.,  8.,  9.]]

   reverse(x, axis=0) = [[ 5.,  6.,  7.,  8.,  9.],
   [ 0.,  1.,  2.,  3.,  4.]]

   reverse(x, axis=1) = [[ 4.,  3.,  2.,  1.,  0.],
   [ 9.,  8.,  7.,  6.,  5.]]


   Defined in src/operator/tensor/matrix_op.cc:L794
Parameters
dataInput data array
axisThe axis which to reverse elements.
Returns
new symbol
Symbol mxnet::cpp::rint ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise rounded value to the nearest integer of the input.

   .. note::
   - For input ``n.5`` ``rint`` returns ``n`` while ``round`` returns ``n+1``.
   - For input ``-n.5`` both ``rint`` and ``round`` returns ``-n-1``.

   Example::

   rint([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  1., -2.,  2.,  2.]

   The storage type of ``rint`` output depends upon the input storage type:

   - rint(default) = default
   - rint(row_sparse) = row_sparse
   - rint(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L721
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::rint ( Symbol  data)
inline

Returns element-wise rounded value to the nearest integer of the input.

   .. note::
   - For input ``n.5`` ``rint`` returns ``n`` while ``round`` returns ``n+1``.
   - For input ``-n.5`` both ``rint`` and ``round`` returns ``-n-1``.

   Example::

   rint([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  1., -2.,  2.,  2.]

   The storage type of ``rint`` output depends upon the input storage type:

   - rint(default) = default
   - rint(row_sparse) = row_sparse
   - rint(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L721
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::rmsprop_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  n,
mx_float  lr,
mx_float  gamma1 = 0.95,
mx_float  epsilon = 1e-08,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
mx_float  clip_weights = -1 
)
inline

Update function for RMSProp optimizer.

   `RMSprop` is a variant of stochastic gradient descent where the gradients are
   divided by a cache which grows with the sum of squares of recent gradients?

   `RMSProp` is similar to `AdaGrad`, a popular variant of `SGD` which adaptively
   tunes the learning rate of each parameter. `AdaGrad` lowers the learning rate
   each parameter monotonically over the course of training.
   While this is analytically motivated for convex optimizations, it may not be
   for non-convex problems. `RMSProp` deals with this heuristically by allowing the
   learning rates to rebound as the denominator decays over time.

   Define the Root Mean Square (RMS) error criterion of the gradient as
   :math:`RMS[g]_t = \sqrt{E[g^2]_t + \epsilon}`, where :math:`g` represents
   gradient and :math:`E[g^2]_t` is the decaying average over past squared

   The :math:`E[g^2]_t` is given by:

   .. math::
   E[g^2]_t = \gamma * E[g^2]_{t-1} + (1-\gamma) * g_t^2

   The update step is

   .. math::
   \theta_{t+1} = \theta_t - \frac{\eta}{RMS[g]_t} g_t

   The RMSProp code follows the version in
   http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
   Tieleman & Hinton, 2012.

   Hinton suggests the momentum term :math:`\gamma` to be 0.9 and the learning rate
   :math:`\eta` to be 0.001.



   Defined in src/operator/optimizer_op.cc:L553
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
nn
lrLearning rate
gamma1The decay rate of momentum estimates.
epsilonA small constant for numerical stability.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
clip_weightsClip weights to the range of [-clip_weights, clip_weights] If clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,
Returns
new symbol
Symbol mxnet::cpp::rmsprop_update ( Symbol  weight,
Symbol  grad,
Symbol  n,
mx_float  lr,
mx_float  gamma1 = 0.95,
mx_float  epsilon = 1e-08,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
mx_float  clip_weights = -1 
)
inline

Update function for RMSProp optimizer.

   `RMSprop` is a variant of stochastic gradient descent where the gradients are
   divided by a cache which grows with the sum of squares of recent gradients?

   `RMSProp` is similar to `AdaGrad`, a popular variant of `SGD` which adaptively
   tunes the learning rate of each parameter. `AdaGrad` lowers the learning rate
   each parameter monotonically over the course of training.
   While this is analytically motivated for convex optimizations, it may not be
   for non-convex problems. `RMSProp` deals with this heuristically by allowing the
   learning rates to rebound as the denominator decays over time.

   Define the Root Mean Square (RMS) error criterion of the gradient as
   :math:`RMS[g]_t = \sqrt{E[g^2]_t + \epsilon}`, where :math:`g` represents
   gradient and :math:`E[g^2]_t` is the decaying average over past squared

   The :math:`E[g^2]_t` is given by:

   .. math::
   E[g^2]_t = \gamma * E[g^2]_{t-1} + (1-\gamma) * g_t^2

   The update step is

   .. math::
   \theta_{t+1} = \theta_t - \frac{\eta}{RMS[g]_t} g_t

   The RMSProp code follows the version in
   http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
   Tieleman & Hinton, 2012.

   Hinton suggests the momentum term :math:`\gamma` to be 0.9 and the learning rate
   :math:`\eta` to be 0.001.



   Defined in src/operator/optimizer_op.cc:L553
Parameters
weightWeight
gradGradient
nn
lrLearning rate
gamma1The decay rate of momentum estimates.
epsilonA small constant for numerical stability.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
clip_weightsClip weights to the range of [-clip_weights, clip_weights] If clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,
Returns
new symbol
Symbol mxnet::cpp::rmspropalex_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  n,
Symbol  g,
Symbol  delta,
mx_float  lr,
mx_float  gamma1 = 0.95,
mx_float  gamma2 = 0.9,
mx_float  epsilon = 1e-08,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
mx_float  clip_weights = -1 
)
inline

Update function for RMSPropAlex optimizer.

   `RMSPropAlex` is non-centered version of `RMSProp`.

   Define :math:`E[g^2]_t` is the decaying average over past squared gradient and
   :math:`E[g]_t` is the decaying average over past gradient.

   .. math::
   E[g^2]_t = \gamma_1 * E[g^2]_{t-1} + (1 - \gamma_1) * g_t^2\\
   E[g]_t = \gamma_1 * E[g]_{t-1} + (1 - \gamma_1) * g_t\\
   \Delta_t = \gamma_2 * \Delta_{t-1} - \frac{\eta}{\sqrt{E[g^2]_t - E[g]_t^2 +

   The update step is

   .. math::
   \theta_{t+1} = \theta_t + \Delta_t

   The RMSPropAlex code follows the version in
   http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.

   Graves suggests the momentum term :math:`\gamma_1` to be 0.95, :math:`\gamma_2`
   to be 0.9 and the learning rate :math:`\eta` to be 0.0001.


   Defined in src/operator/optimizer_op.cc:L592
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
nn
gg
deltadelta
lrLearning rate
gamma1Decay rate.
gamma2Decay rate.
epsilonA small constant for numerical stability.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
clip_weightsClip weights to the range of [-clip_weights, clip_weights] If clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,
Returns
new symbol
Symbol mxnet::cpp::rmspropalex_update ( Symbol  weight,
Symbol  grad,
Symbol  n,
Symbol  g,
Symbol  delta,
mx_float  lr,
mx_float  gamma1 = 0.95,
mx_float  gamma2 = 0.9,
mx_float  epsilon = 1e-08,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
mx_float  clip_weights = -1 
)
inline

Update function for RMSPropAlex optimizer.

   `RMSPropAlex` is non-centered version of `RMSProp`.

   Define :math:`E[g^2]_t` is the decaying average over past squared gradient and
   :math:`E[g]_t` is the decaying average over past gradient.

   .. math::
   E[g^2]_t = \gamma_1 * E[g^2]_{t-1} + (1 - \gamma_1) * g_t^2\\
   E[g]_t = \gamma_1 * E[g]_{t-1} + (1 - \gamma_1) * g_t\\
   \Delta_t = \gamma_2 * \Delta_{t-1} - \frac{\eta}{\sqrt{E[g^2]_t - E[g]_t^2 +

   The update step is

   .. math::
   \theta_{t+1} = \theta_t + \Delta_t

   The RMSPropAlex code follows the version in
   http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.

   Graves suggests the momentum term :math:`\gamma_1` to be 0.95, :math:`\gamma_2`
   to be 0.9 and the learning rate :math:`\eta` to be 0.0001.


   Defined in src/operator/optimizer_op.cc:L592
Parameters
weightWeight
gradGradient
nn
gg
deltadelta
lrLearning rate
gamma1Decay rate.
gamma2Decay rate.
epsilonA small constant for numerical stability.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
clip_weightsClip weights to the range of [-clip_weights, clip_weights] If clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,
Returns
new symbol
Symbol mxnet::cpp::RNN ( const std::string &  symbol_name,
Symbol  data,
Symbol  parameters,
Symbol  state,
Symbol  state_cell,
uint32_t  state_size,
uint32_t  num_layers,
RNNMode  mode,
bool  bidirectional = false,
mx_float  p = 0,
bool  state_outputs = false,
dmlc::optional< int >  projection_size = dmlc::optional<int>(),
dmlc::optional< double >  lstm_state_clip_min = dmlc::optional<double>(),
dmlc::optional< double >  lstm_state_clip_max = dmlc::optional<double>(),
bool  lstm_state_clip_nan = false 
)
inline

Applies recurrent layers to input data. Currently, vanilla RNN, LSTM and GRU are implemented, with both multi-layer and bidirectional support.

When the input data is of type float32 and the environment variables and MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION are set to 1, this operator will pseudo-float16 precision (float32 math with float16 I/O) precision in order to Tensor Cores on suitable NVIDIA GPUs. This can sometimes give significant

Vanilla RNN

Applies a single-gate recurrent layer to input X. Two kinds of activation ReLU and Tanh.

With ReLU activation function:

.. math:: h_t = relu(W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh})

With Tanh activtion function:

.. math:: h_t = (W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh})

Reference paper: Finding structure in time - Elman, 1988. https://crl.ucsd.edu/~elman/Papers/fsit.pdf

LSTM

Long Short-Term Memory - Hochreiter, 1997.

.. math:: {array}{ll} i_t = {sigmoid}(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \ f_t = {sigmoid}(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \ g_t = (W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \ o_t = {sigmoid}(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \ c_t = f_t * c_{(t-1)} + i_t * g_t \ h_t = o_t * (c_t) {array}

GRU

Gated Recurrent Unit - Cho et al. 2014. http://arxiv.org/abs/1406.1078

The definition of GRU here is slightly different from paper but compatible with

.. math:: {array}{ll} r_t = {sigmoid}(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \ z_t = {sigmoid}(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \ n_t = (W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \ h_t = (1 - z_t) * n_t + z_t * h_{(t-1)} \ {array}

Parameters
symbol_namename of the resulting symbol
dataInput data to RNN
parametersVector of all RNN trainable parameters concatenated
stateinitial hidden state of the RNN
state_cellinitial cell state for LSTM networks (only for LSTM)
state_sizesize of the state for each layer
num_layersnumber of stacked layers
modethe type of RNN to compute
bidirectionalwhether to use bidirectional recurrent layers
pdrop rate of the dropout on the outputs of each RNN layer, except the last
state_outputsWhether to have the states as symbol outputs.
projection_sizesize of project size
lstm_state_clip_minMinimum clip value of LSTM states. This option must be used
lstm_state_clip_maxMaximum clip value of LSTM states. This option must be used
lstm_state_clip_nanWhether to stop NaN from propagating in state by clipping
Returns
new symbol
Symbol mxnet::cpp::RNN ( Symbol  data,
Symbol  parameters,
Symbol  state,
Symbol  state_cell,
uint32_t  state_size,
uint32_t  num_layers,
RNNMode  mode,
bool  bidirectional = false,
mx_float  p = 0,
bool  state_outputs = false,
dmlc::optional< int >  projection_size = dmlc::optional<int>(),
dmlc::optional< double >  lstm_state_clip_min = dmlc::optional<double>(),
dmlc::optional< double >  lstm_state_clip_max = dmlc::optional<double>(),
bool  lstm_state_clip_nan = false 
)
inline

Applies recurrent layers to input data. Currently, vanilla RNN, LSTM and GRU are implemented, with both multi-layer and bidirectional support.

When the input data is of type float32 and the environment variables and MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION are set to 1, this operator will pseudo-float16 precision (float32 math with float16 I/O) precision in order to Tensor Cores on suitable NVIDIA GPUs. This can sometimes give significant

Vanilla RNN

Applies a single-gate recurrent layer to input X. Two kinds of activation ReLU and Tanh.

With ReLU activation function:

.. math:: h_t = relu(W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh})

With Tanh activtion function:

.. math:: h_t = (W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh})

Reference paper: Finding structure in time - Elman, 1988. https://crl.ucsd.edu/~elman/Papers/fsit.pdf

LSTM

Long Short-Term Memory - Hochreiter, 1997.

.. math:: {array}{ll} i_t = {sigmoid}(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \ f_t = {sigmoid}(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \ g_t = (W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \ o_t = {sigmoid}(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \ c_t = f_t * c_{(t-1)} + i_t * g_t \ h_t = o_t * (c_t) {array}

GRU

Gated Recurrent Unit - Cho et al. 2014. http://arxiv.org/abs/1406.1078

The definition of GRU here is slightly different from paper but compatible with

.. math:: {array}{ll} r_t = {sigmoid}(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \ z_t = {sigmoid}(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \ n_t = (W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \ h_t = (1 - z_t) * n_t + z_t * h_{(t-1)} \ {array}

Parameters
dataInput data to RNN
parametersVector of all RNN trainable parameters concatenated
stateinitial hidden state of the RNN
state_cellinitial cell state for LSTM networks (only for LSTM)
state_sizesize of the state for each layer
num_layersnumber of stacked layers
modethe type of RNN to compute
bidirectionalwhether to use bidirectional recurrent layers
pdrop rate of the dropout on the outputs of each RNN layer, except the last
state_outputsWhether to have the states as symbol outputs.
projection_sizesize of project size
lstm_state_clip_minMinimum clip value of LSTM states. This option must be used
lstm_state_clip_maxMaximum clip value of LSTM states. This option must be used
lstm_state_clip_nanWhether to stop NaN from propagating in state by clipping
Returns
new symbol
Symbol mxnet::cpp::ROIPooling ( const std::string &  symbol_name,
Symbol  data,
Symbol  rois,
Shape  pooled_size,
mx_float  spatial_scale 
)
inline

Performs region of interest(ROI) pooling on the input array.

   ROI pooling is a variant of a max pooling layer, in which the output size is
   region of interest is a parameter. Its purpose is to perform max pooling on the
   of non-uniform sizes to obtain fixed-size feature maps. ROI pooling is a
   layer mostly used in training a `Fast R-CNN` network for object detection.

   This operator takes a 4D feature map as an input array and region proposals as
   then it pools over sub-regions of input and produces a fixed-sized output array
   regardless of the ROI size.

   To crop the feature map accordingly, you can resize the bounding box coordinates
   by changing the parameters `rois` and `spatial_scale`.

   The cropped feature maps are pooled by standard max pooling operation to a
   indicated by a `pooled_size` parameter. batch_size will change to the number of
   bounding boxes after `ROIPooling`.

   The size of each region of interest doesn't have to be perfectly divisible by
   the number of pooling sections(`pooled_size`).

   Example::

   x = [[[[  0.,   1.,   2.,   3.,   4.,   5.],
   [  6.,   7.,   8.,   9.,  10.,  11.],
   [ 12.,  13.,  14.,  15.,  16.,  17.],
   [ 18.,  19.,  20.,  21.,  22.,  23.],
   [ 24.,  25.,  26.,  27.,  28.,  29.],
   [ 30.,  31.,  32.,  33.,  34.,  35.],
   [ 36.,  37.,  38.,  39.,  40.,  41.],
   [ 42.,  43.,  44.,  45.,  46.,  47.]]]]

   // region of interest i.e. bounding box coordinates.
   y = [[0,0,0,4,4]]

   // returns array of shape (2,2) according to the given roi with max pooling.
   ROIPooling(x, y, (2,2), 1.0) = [[[[ 14.,  16.],
   [ 26.,  28.]]]]

   // region of interest is changed due to the change in `spacial_scale` parameter.
   ROIPooling(x, y, (2,2), 0.7) = [[[[  7.,   9.],
   [ 19.,  21.]]]]



   Defined in src/operator/roi_pooling.cc:L295
Parameters
symbol_namename of the resulting symbol
dataThe input array to the pooling operator, a 4D Feature maps
roisBounding box coordinates, a 2D array of [[batch_index, x1, y1, x2, y2]], where (x1, y1) and (x2, y2) are top left and bottom right corners of designated region of interest. batch_index indicates the index of corresponding image in
pooled_sizeROI pooling output shape (h,w)
spatial_scaleRatio of input feature map height (or w) to raw image height (or
Returns
new symbol
Symbol mxnet::cpp::ROIPooling ( Symbol  data,
Symbol  rois,
Shape  pooled_size,
mx_float  spatial_scale 
)
inline

Performs region of interest(ROI) pooling on the input array.

   ROI pooling is a variant of a max pooling layer, in which the output size is
   region of interest is a parameter. Its purpose is to perform max pooling on the
   of non-uniform sizes to obtain fixed-size feature maps. ROI pooling is a
   layer mostly used in training a `Fast R-CNN` network for object detection.

   This operator takes a 4D feature map as an input array and region proposals as
   then it pools over sub-regions of input and produces a fixed-sized output array
   regardless of the ROI size.

   To crop the feature map accordingly, you can resize the bounding box coordinates
   by changing the parameters `rois` and `spatial_scale`.

   The cropped feature maps are pooled by standard max pooling operation to a
   indicated by a `pooled_size` parameter. batch_size will change to the number of
   bounding boxes after `ROIPooling`.

   The size of each region of interest doesn't have to be perfectly divisible by
   the number of pooling sections(`pooled_size`).

   Example::

   x = [[[[  0.,   1.,   2.,   3.,   4.,   5.],
   [  6.,   7.,   8.,   9.,  10.,  11.],
   [ 12.,  13.,  14.,  15.,  16.,  17.],
   [ 18.,  19.,  20.,  21.,  22.,  23.],
   [ 24.,  25.,  26.,  27.,  28.,  29.],
   [ 30.,  31.,  32.,  33.,  34.,  35.],
   [ 36.,  37.,  38.,  39.,  40.,  41.],
   [ 42.,  43.,  44.,  45.,  46.,  47.]]]]

   // region of interest i.e. bounding box coordinates.
   y = [[0,0,0,4,4]]

   // returns array of shape (2,2) according to the given roi with max pooling.
   ROIPooling(x, y, (2,2), 1.0) = [[[[ 14.,  16.],
   [ 26.,  28.]]]]

   // region of interest is changed due to the change in `spacial_scale` parameter.
   ROIPooling(x, y, (2,2), 0.7) = [[[[  7.,   9.],
   [ 19.,  21.]]]]



   Defined in src/operator/roi_pooling.cc:L295
Parameters
dataThe input array to the pooling operator, a 4D Feature maps
roisBounding box coordinates, a 2D array of [[batch_index, x1, y1, x2, y2]], where (x1, y1) and (x2, y2) are top left and bottom right corners of designated region of interest. batch_index indicates the index of corresponding image in
pooled_sizeROI pooling output shape (h,w)
spatial_scaleRatio of input feature map height (or w) to raw image height (or
Returns
new symbol
Symbol mxnet::cpp::round ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise rounded value to the nearest integer of the input.

   Example::

   round([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  2., -2.,  2.,  2.]

   The storage type of ``round`` output depends upon the input storage type:

   - round(default) = default
   - round(row_sparse) = row_sparse
   - round(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L700
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::round ( Symbol  data)
inline

Returns element-wise rounded value to the nearest integer of the input.

   Example::

   round([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  2., -2.,  2.,  2.]

   The storage type of ``round`` output depends upon the input storage type:

   - round(default) = default
   - round(row_sparse) = row_sparse
   - round(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L700
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::rsqrt ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise inverse square-root value of the input.

   .. math::
   rsqrt(x) = 1/\sqrt{x}

   Example::

   rsqrt([4,9,16]) = [0.5, 0.33333334, 0.25]

   The storage type of ``rsqrt`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L860
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::rsqrt ( Symbol  data)
inline

Returns element-wise inverse square-root value of the input.

   .. math::
   rsqrt(x) = 1/\sqrt{x}

   Example::

   rsqrt([4,9,16]) = [0.5, 0.33333334, 0.25]

   The storage type of ``rsqrt`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L860
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::scatter_nd ( const std::string &  symbol_name,
Symbol  data,
Symbol  indices,
Shape  shape 
)
inline

Scatters data into a new tensor according to indices.

   Given `data` with shape `(Y_0, ..., Y_{K-1}, X_M, ..., X_{N-1})` and indices
   `(M, Y_0, ..., Y_{K-1})`, the output will have shape `(X_0, X_1, ..., X_{N-1})`,
   where `M <= N`. If `M == N`, data shape should simply be `(Y_0, ..., Y_{K-1})`.

   The elements in output is defined as follows::

   output[indices[0, y_0, ..., y_{K-1}],
   ...,
   indices[M-1, y_0, ..., y_{K-1}],
   x_M, ..., x_{N-1}] = data[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}]

   all other entries in output are 0.

   .. warning::

   If the indices have duplicates, the result will be non-deterministic and
   the gradient of `scatter_nd` will not be correct!!


   Examples::

   data = [2, 3, 0]
   indices = [[1, 1, 0], [0, 1, 0]]
   shape = (2, 2)
   scatter_nd(data, indices, shape) = [[0, 0], [2, 3]]

   data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
   indices = [[0, 1], [1, 1]]
   shape = (2, 2, 2, 2)
   scatter_nd(data, indices, shape) = [[[[0, 0],
   [0, 0]],

   [[1, 2],
   [3, 4]]],

   [[[0, 0],
   [0, 0]],

   [[5, 6],
   [7, 8]]]]
Parameters
symbol_namename of the resulting symbol
datadata
indicesindices
shapeShape of output.
Returns
new symbol
Symbol mxnet::cpp::scatter_nd ( Symbol  data,
Symbol  indices,
Shape  shape 
)
inline

Scatters data into a new tensor according to indices.

   Given `data` with shape `(Y_0, ..., Y_{K-1}, X_M, ..., X_{N-1})` and indices
   `(M, Y_0, ..., Y_{K-1})`, the output will have shape `(X_0, X_1, ..., X_{N-1})`,
   where `M <= N`. If `M == N`, data shape should simply be `(Y_0, ..., Y_{K-1})`.

   The elements in output is defined as follows::

   output[indices[0, y_0, ..., y_{K-1}],
   ...,
   indices[M-1, y_0, ..., y_{K-1}],
   x_M, ..., x_{N-1}] = data[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}]

   all other entries in output are 0.

   .. warning::

   If the indices have duplicates, the result will be non-deterministic and
   the gradient of `scatter_nd` will not be correct!!


   Examples::

   data = [2, 3, 0]
   indices = [[1, 1, 0], [0, 1, 0]]
   shape = (2, 2)
   scatter_nd(data, indices, shape) = [[0, 0], [2, 3]]

   data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
   indices = [[0, 1], [1, 1]]
   shape = (2, 2, 2, 2)
   scatter_nd(data, indices, shape) = [[[[0, 0],
   [0, 0]],

   [[1, 2],
   [3, 4]]],

   [[[0, 0],
   [0, 0]],

   [[5, 6],
   [7, 8]]]]
Parameters
datadata
indicesindices
shapeShape of output.
Returns
new symbol
Symbol mxnet::cpp::SequenceLast ( const std::string &  symbol_name,
Symbol  data,
Symbol  sequence_length,
bool  use_sequence_length = false,
int  axis = 0 
)
inline

Takes the last element of a sequence.

   This function takes an n-dimensional input array of the form
   [max_sequence_length, batch_size, other_feature_dims] and returns a
   of the form [batch_size, other_feature_dims].

   Parameter `sequence_length` is used to handle variable-length sequences.
   an input array of positive ints of dimension [batch_size]. To use this
   set `use_sequence_length` to `True`, otherwise each example in the batch is
   to have the max sequence length.

   .. note:: Alternatively, you can also use `take` operator.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.],
   [  7.,   8.,   9.]],

   [[ 10.,   11.,   12.],
   [ 13.,   14.,   15.],
   [ 16.,   17.,   18.]],

   [[  19.,   20.,   21.],
   [  22.,   23.,   24.],
   [  25.,   26.,   27.]]]

   // returns last sequence when sequence_length parameter is not used
   SequenceLast(x) = [[  19.,   20.,   21.],
   [  22.,   23.,   24.],
   [  25.,   26.,   27.]]

   // sequence_length is used
   SequenceLast(x, sequence_length=[1,1,1], use_sequence_length=True) =
   [[  1.,   2.,   3.],
   [  4.,   5.,   6.],
   [  7.,   8.,   9.]]

   // sequence_length is used
   SequenceLast(x, sequence_length=[1,2,3], use_sequence_length=True) =
   [[  1.,    2.,   3.],
   [  13.,  14.,  15.],
   [  25.,  26.,  27.]]



   Defined in src/operator/sequence_last.cc:L92
Parameters
symbol_namename of the resulting symbol
datan-dimensional input array of the form [max_sequence_length, batch_size,
sequence_lengthvector of sequence lengths of the form [batch_size]
use_sequence_lengthIf set to true, this layer takes in an extra input
axisThe sequence axis. Only values of 0 and 1 are currently supported.
Returns
new symbol
Symbol mxnet::cpp::SequenceLast ( Symbol  data,
Symbol  sequence_length,
bool  use_sequence_length = false,
int  axis = 0 
)
inline

Takes the last element of a sequence.

   This function takes an n-dimensional input array of the form
   [max_sequence_length, batch_size, other_feature_dims] and returns a
   of the form [batch_size, other_feature_dims].

   Parameter `sequence_length` is used to handle variable-length sequences.
   an input array of positive ints of dimension [batch_size]. To use this
   set `use_sequence_length` to `True`, otherwise each example in the batch is
   to have the max sequence length.

   .. note:: Alternatively, you can also use `take` operator.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.],
   [  7.,   8.,   9.]],

   [[ 10.,   11.,   12.],
   [ 13.,   14.,   15.],
   [ 16.,   17.,   18.]],

   [[  19.,   20.,   21.],
   [  22.,   23.,   24.],
   [  25.,   26.,   27.]]]

   // returns last sequence when sequence_length parameter is not used
   SequenceLast(x) = [[  19.,   20.,   21.],
   [  22.,   23.,   24.],
   [  25.,   26.,   27.]]

   // sequence_length is used
   SequenceLast(x, sequence_length=[1,1,1], use_sequence_length=True) =
   [[  1.,   2.,   3.],
   [  4.,   5.,   6.],
   [  7.,   8.,   9.]]

   // sequence_length is used
   SequenceLast(x, sequence_length=[1,2,3], use_sequence_length=True) =
   [[  1.,    2.,   3.],
   [  13.,  14.,  15.],
   [  25.,  26.,  27.]]



   Defined in src/operator/sequence_last.cc:L92
Parameters
datan-dimensional input array of the form [max_sequence_length, batch_size,
sequence_lengthvector of sequence lengths of the form [batch_size]
use_sequence_lengthIf set to true, this layer takes in an extra input
axisThe sequence axis. Only values of 0 and 1 are currently supported.
Returns
new symbol
Symbol mxnet::cpp::SequenceMask ( const std::string &  symbol_name,
Symbol  data,
Symbol  sequence_length,
bool  use_sequence_length = false,
mx_float  value = 0,
int  axis = 0 
)
inline

Sets all elements outside the sequence to a constant value.

   This function takes an n-dimensional input array of the form
   [max_sequence_length, batch_size, other_feature_dims] and returns an array of

   Parameter `sequence_length` is used to handle variable-length sequences.
   should be an input array of positive ints of dimension [batch_size].
   To use this parameter, set `use_sequence_length` to `True`,
   otherwise each example in the batch is assumed to have the max sequence length
   this operator works as the `identity` operator.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // Batch 1
   B1 = [[  1.,   2.,   3.],
   [  7.,   8.,   9.],
   [ 13.,  14.,  15.]]

   // Batch 2
   B2 = [[  4.,   5.,   6.],
   [ 10.,  11.,  12.],
   [ 16.,  17.,  18.]]

   // works as identity operator when sequence_length parameter is not used
   SequenceMask(x) = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // sequence_length [1,1] means 1 of each batch will be kept
   // and other rows are masked with default mask value = 0
   SequenceMask(x, sequence_length=[1,1], use_sequence_length=True) =
   [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  0.,   0.,   0.],
   [  0.,   0.,   0.]],

   [[  0.,   0.,   0.],
   [  0.,   0.,   0.]]]

   // sequence_length [2,3] means 2 of batch B1 and 3 of batch B2 will be kept
   // and other rows are masked with value = 1
   SequenceMask(x, sequence_length=[2,3], use_sequence_length=True, value=1) =
   [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [  10.,  11.,  12.]],

   [[   1.,   1.,   1.],
   [  16.,  17.,  18.]]]



   Defined in src/operator/sequence_mask.cc:L114
Parameters
symbol_namename of the resulting symbol
datan-dimensional input array of the form [max_sequence_length, batch_size,
sequence_lengthvector of sequence lengths of the form [batch_size]
use_sequence_lengthIf set to true, this layer takes in an extra input
valueThe value to be used as a mask.
axisThe sequence axis. Only values of 0 and 1 are currently supported.
Returns
new symbol
Symbol mxnet::cpp::SequenceMask ( Symbol  data,
Symbol  sequence_length,
bool  use_sequence_length = false,
mx_float  value = 0,
int  axis = 0 
)
inline

Sets all elements outside the sequence to a constant value.

   This function takes an n-dimensional input array of the form
   [max_sequence_length, batch_size, other_feature_dims] and returns an array of

   Parameter `sequence_length` is used to handle variable-length sequences.
   should be an input array of positive ints of dimension [batch_size].
   To use this parameter, set `use_sequence_length` to `True`,
   otherwise each example in the batch is assumed to have the max sequence length
   this operator works as the `identity` operator.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // Batch 1
   B1 = [[  1.,   2.,   3.],
   [  7.,   8.,   9.],
   [ 13.,  14.,  15.]]

   // Batch 2
   B2 = [[  4.,   5.,   6.],
   [ 10.,  11.,  12.],
   [ 16.,  17.,  18.]]

   // works as identity operator when sequence_length parameter is not used
   SequenceMask(x) = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // sequence_length [1,1] means 1 of each batch will be kept
   // and other rows are masked with default mask value = 0
   SequenceMask(x, sequence_length=[1,1], use_sequence_length=True) =
   [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  0.,   0.,   0.],
   [  0.,   0.,   0.]],

   [[  0.,   0.,   0.],
   [  0.,   0.,   0.]]]

   // sequence_length [2,3] means 2 of batch B1 and 3 of batch B2 will be kept
   // and other rows are masked with value = 1
   SequenceMask(x, sequence_length=[2,3], use_sequence_length=True, value=1) =
   [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [  10.,  11.,  12.]],

   [[   1.,   1.,   1.],
   [  16.,  17.,  18.]]]



   Defined in src/operator/sequence_mask.cc:L114
Parameters
datan-dimensional input array of the form [max_sequence_length, batch_size,
sequence_lengthvector of sequence lengths of the form [batch_size]
use_sequence_lengthIf set to true, this layer takes in an extra input
valueThe value to be used as a mask.
axisThe sequence axis. Only values of 0 and 1 are currently supported.
Returns
new symbol
Symbol mxnet::cpp::SequenceReverse ( const std::string &  symbol_name,
Symbol  data,
Symbol  sequence_length,
bool  use_sequence_length = false,
int  axis = 0 
)
inline

Reverses the elements of each sequence.

   This function takes an n-dimensional input array of the form
   and returns an array of the same shape.

   Parameter `sequence_length` is used to handle variable-length sequences.
   `sequence_length` should be an input array of positive ints of dimension
   To use this parameter, set `use_sequence_length` to `True`,
   otherwise each example in the batch is assumed to have the max sequence length.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // Batch 1
   B1 = [[  1.,   2.,   3.],
   [  7.,   8.,   9.],
   [ 13.,  14.,  15.]]

   // Batch 2
   B2 = [[  4.,   5.,   6.],
   [ 10.,  11.,  12.],
   [ 16.,  17.,  18.]]

   // returns reverse sequence when sequence_length parameter is not used
   SequenceReverse(x) = [[[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[  1.,   2.,   3.],
   [  4.,   5.,   6.]]]

   // sequence_length [2,2] means 2 rows of
   // both batch B1 and B2 will be reversed.
   SequenceReverse(x, sequence_length=[2,2], use_sequence_length=True) =
   [[[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // sequence_length [2,3] means 2 of batch B2 and 3 of batch B3
   // will be reversed.
   SequenceReverse(x, sequence_length=[2,3], use_sequence_length=True) =
   [[[  7.,   8.,   9.],
   [ 16.,  17.,  18.]],

   [[  1.,   2.,   3.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14,   15.],
   [  4.,   5.,   6.]]]



   Defined in src/operator/sequence_reverse.cc:L113
Parameters
symbol_namename of the resulting symbol
datan-dimensional input array of the form [max_sequence_length, batch_size,
sequence_lengthvector of sequence lengths of the form [batch_size]
use_sequence_lengthIf set to true, this layer takes in an extra input
axisThe sequence axis. Only 0 is currently supported.
Returns
new symbol
Symbol mxnet::cpp::SequenceReverse ( Symbol  data,
Symbol  sequence_length,
bool  use_sequence_length = false,
int  axis = 0 
)
inline

Reverses the elements of each sequence.

   This function takes an n-dimensional input array of the form
   and returns an array of the same shape.

   Parameter `sequence_length` is used to handle variable-length sequences.
   `sequence_length` should be an input array of positive ints of dimension
   To use this parameter, set `use_sequence_length` to `True`,
   otherwise each example in the batch is assumed to have the max sequence length.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // Batch 1
   B1 = [[  1.,   2.,   3.],
   [  7.,   8.,   9.],
   [ 13.,  14.,  15.]]

   // Batch 2
   B2 = [[  4.,   5.,   6.],
   [ 10.,  11.,  12.],
   [ 16.,  17.,  18.]]

   // returns reverse sequence when sequence_length parameter is not used
   SequenceReverse(x) = [[[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[  1.,   2.,   3.],
   [  4.,   5.,   6.]]]

   // sequence_length [2,2] means 2 rows of
   // both batch B1 and B2 will be reversed.
   SequenceReverse(x, sequence_length=[2,2], use_sequence_length=True) =
   [[[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // sequence_length [2,3] means 2 of batch B2 and 3 of batch B3
   // will be reversed.
   SequenceReverse(x, sequence_length=[2,3], use_sequence_length=True) =
   [[[  7.,   8.,   9.],
   [ 16.,  17.,  18.]],

   [[  1.,   2.,   3.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14,   15.],
   [  4.,   5.,   6.]]]



   Defined in src/operator/sequence_reverse.cc:L113
Parameters
datan-dimensional input array of the form [max_sequence_length, batch_size,
sequence_lengthvector of sequence lengths of the form [batch_size]
use_sequence_lengthIf set to true, this layer takes in an extra input
axisThe sequence axis. Only 0 is currently supported.
Returns
new symbol
Symbol mxnet::cpp::sgd_mom_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  mom,
mx_float  lr,
mx_float  momentum = 0,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Momentum update function for Stochastic Gradient Descent (SGD) optimizer.

   Momentum update has better convergence rates on neural networks. Mathematically
   like below:

   .. math::

   v_1 = \alpha * \nabla J(W_0)\\
   v_t = \gamma v_{t-1} - \alpha * \nabla J(W_{t-1})\\
   W_t = W_{t-1} + v_t

   It updates the weights using::

   v = momentum * v - learning_rate * gradient
   weight += v

   Where the parameter ``momentum`` is the decay rate of momentum estimates at

   However, if grad's storage type is ``row_sparse``, ``lazy_update`` is True and
   type is the same as momentum's storage type,
   only the row slices whose indices appear in grad.indices are updated (for both

   for row in gradient.indices:
   v[row] = momentum[row] * v[row] - learning_rate * gradient[row]
   weight[row] += v[row]



   Defined in src/operator/optimizer_op.cc:L372
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
momMomentum
lrLearning rate
momentumThe decay rate of momentum estimates at each epoch.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse
Returns
new symbol
Symbol mxnet::cpp::sgd_mom_update ( Symbol  weight,
Symbol  grad,
Symbol  mom,
mx_float  lr,
mx_float  momentum = 0,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Momentum update function for Stochastic Gradient Descent (SGD) optimizer.

   Momentum update has better convergence rates on neural networks. Mathematically
   like below:

   .. math::

   v_1 = \alpha * \nabla J(W_0)\\
   v_t = \gamma v_{t-1} - \alpha * \nabla J(W_{t-1})\\
   W_t = W_{t-1} + v_t

   It updates the weights using::

   v = momentum * v - learning_rate * gradient
   weight += v

   Where the parameter ``momentum`` is the decay rate of momentum estimates at

   However, if grad's storage type is ``row_sparse``, ``lazy_update`` is True and
   type is the same as momentum's storage type,
   only the row slices whose indices appear in grad.indices are updated (for both

   for row in gradient.indices:
   v[row] = momentum[row] * v[row] - learning_rate * gradient[row]
   weight[row] += v[row]



   Defined in src/operator/optimizer_op.cc:L372
Parameters
weightWeight
gradGradient
momMomentum
lrLearning rate
momentumThe decay rate of momentum estimates at each epoch.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse
Returns
new symbol
Symbol mxnet::cpp::sgd_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
mx_float  lr,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Update function for Stochastic Gradient Descent (SDG) optimizer.

   It updates the weights using::

   weight = weight - learning_rate * (gradient + wd * weight)

   However, if gradient is of ``row_sparse`` storage type and ``lazy_update`` is
   only the row slices whose indices appear in grad.indices are updated::

   for row in gradient.indices:
   weight[row] = weight[row] - learning_rate * (gradient[row] + wd * weight[row])



   Defined in src/operator/optimizer_op.cc:L331
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
lrLearning rate
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse.
Returns
new symbol
Symbol mxnet::cpp::sgd_update ( Symbol  weight,
Symbol  grad,
mx_float  lr,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
bool  lazy_update = true 
)
inline

Update function for Stochastic Gradient Descent (SDG) optimizer.

   It updates the weights using::

   weight = weight - learning_rate * (gradient + wd * weight)

   However, if gradient is of ``row_sparse`` storage type and ``lazy_update`` is
   only the row slices whose indices appear in grad.indices are updated::

   for row in gradient.indices:
   weight[row] = weight[row] - learning_rate * (gradient[row] + wd * weight[row])



   Defined in src/operator/optimizer_op.cc:L331
Parameters
weightWeight
gradGradient
lrLearning rate
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_updateIf true, lazy updates are applied if gradient's stype is row_sparse.
Returns
new symbol
Symbol mxnet::cpp::shape_array ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< int >  lhs_begin = dmlc::optional<int>(),
dmlc::optional< int >  lhs_end = dmlc::optional<int>(),
dmlc::optional< int >  rhs_begin = dmlc::optional<int>(),
dmlc::optional< int >  rhs_end = dmlc::optional<int>() 
)
inline

Returns a 1D int64 array containing the shape of data.

   Example::

   shape_array([[1,2,3,4], [5,6,7,8]]) = [2,4]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L506
Parameters
symbol_namename of the resulting symbol
dataInput Array.
lhs_beginDefaults to 0. The beginning index along which the lhs dimensions are
lhs_endDefaults to None. The ending index along which the lhs dimensions are
rhs_beginDefaults to 0. The beginning index along which the rhs dimensions are
rhs_endDefaults to None. The ending index along which the rhs dimensions are
Returns
new symbol
Symbol mxnet::cpp::shape_array ( Symbol  data,
dmlc::optional< int >  lhs_begin = dmlc::optional<int>(),
dmlc::optional< int >  lhs_end = dmlc::optional<int>(),
dmlc::optional< int >  rhs_begin = dmlc::optional<int>(),
dmlc::optional< int >  rhs_end = dmlc::optional<int>() 
)
inline

Returns a 1D int64 array containing the shape of data.

   Example::

   shape_array([[1,2,3,4], [5,6,7,8]]) = [2,4]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L506
Parameters
dataInput Array.
lhs_beginDefaults to 0. The beginning index along which the lhs dimensions are
lhs_endDefaults to None. The ending index along which the lhs dimensions are
rhs_beginDefaults to 0. The beginning index along which the rhs dimensions are
rhs_endDefaults to None. The ending index along which the rhs dimensions are
Returns
new symbol
Symbol mxnet::cpp::sigmoid ( const std::string &  symbol_name,
Symbol  data 
)
inline

Computes sigmoid of x element-wise.

   .. math::
   y = 1 / (1 + exp(-x))

   The storage type of ``sigmoid`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L101
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::sigmoid ( Symbol  data)
inline

Computes sigmoid of x element-wise.

   .. math::
   y = 1 / (1 + exp(-x))

   The storage type of ``sigmoid`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L101
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::sign ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise sign of the input.

   Example::

   sign([-2, 0, 3]) = [-1, 0, 1]

   The storage type of ``sign`` output depends upon the input storage type:

   - sign(default) = default
   - sign(row_sparse) = row_sparse
   - sign(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L681
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::sign ( Symbol  data)
inline

Returns element-wise sign of the input.

   Example::

   sign([-2, 0, 3]) = [-1, 0, 1]

   The storage type of ``sign`` output depends upon the input storage type:

   - sign(default) = default
   - sign(row_sparse) = row_sparse
   - sign(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L681
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::signsgd_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
mx_float  lr,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1 
)
inline

Update function for SignSGD optimizer.

   .. math::

   g_t = \nabla J(W_{t-1})\\
   W_t = W_{t-1} - \eta_t \text{sign}(g_t)

   It updates the weights using::

   weight = weight - learning_rate * sign(gradient)

   .. note::
   - sparse ndarray not supported for this optimizer yet.


   Defined in src/operator/optimizer_op.cc:L57
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
lrLearning rate
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
Returns
new symbol
Symbol mxnet::cpp::signsgd_update ( Symbol  weight,
Symbol  grad,
mx_float  lr,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1 
)
inline

Update function for SignSGD optimizer.

   .. math::

   g_t = \nabla J(W_{t-1})\\
   W_t = W_{t-1} - \eta_t \text{sign}(g_t)

   It updates the weights using::

   weight = weight - learning_rate * sign(gradient)

   .. note::
   - sparse ndarray not supported for this optimizer yet.


   Defined in src/operator/optimizer_op.cc:L57
Parameters
weightWeight
gradGradient
lrLearning rate
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
Returns
new symbol
Symbol mxnet::cpp::signum_update ( const std::string &  symbol_name,
Symbol  weight,
Symbol  grad,
Symbol  mom,
mx_float  lr,
mx_float  momentum = 0,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
mx_float  wd_lh = 0 
)
inline

SIGN momentUM (Signum) optimizer.

   .. math::

   g_t = \nabla J(W_{t-1})\\
   m_t = \beta m_{t-1} + (1 - \beta) g_t\\
   W_t = W_{t-1} - \eta_t \text{sign}(m_t)

   It updates the weights using::
   state = momentum * state + (1-momentum) * gradient
   weight = weight - learning_rate * sign(state)

   Where the parameter ``momentum`` is the decay rate of momentum estimates at

   .. note::
   - sparse ndarray not supported for this optimizer yet.


   Defined in src/operator/optimizer_op.cc:L86
Parameters
symbol_namename of the resulting symbol
weightWeight
gradGradient
momMomentum
lrLearning rate
momentumThe decay rate of momentum estimates at each epoch.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
wd_lhThe amount of weight decay that does not go into gradient/momentum
Returns
new symbol
Symbol mxnet::cpp::signum_update ( Symbol  weight,
Symbol  grad,
Symbol  mom,
mx_float  lr,
mx_float  momentum = 0,
mx_float  wd = 0,
mx_float  rescale_grad = 1,
mx_float  clip_gradient = -1,
mx_float  wd_lh = 0 
)
inline

SIGN momentUM (Signum) optimizer.

   .. math::

   g_t = \nabla J(W_{t-1})\\
   m_t = \beta m_{t-1} + (1 - \beta) g_t\\
   W_t = W_{t-1} - \eta_t \text{sign}(m_t)

   It updates the weights using::
   state = momentum * state + (1-momentum) * gradient
   weight = weight - learning_rate * sign(state)

   Where the parameter ``momentum`` is the decay rate of momentum estimates at

   .. note::
   - sparse ndarray not supported for this optimizer yet.


   Defined in src/operator/optimizer_op.cc:L86
Parameters
weightWeight
gradGradient
momMomentum
lrLearning rate
momentumThe decay rate of momentum estimates at each epoch.
wdWeight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_gradRescale gradient to grad = rescale_grad*grad.
clip_gradientClip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
wd_lhThe amount of weight decay that does not go into gradient/momentum
Returns
new symbol
Symbol mxnet::cpp::sin ( const std::string &  symbol_name,
Symbol  data 
)
inline

Computes the element-wise sine of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   sin([0, \pi/4, \pi/2]) = [0, 0.707, 1]

   The storage type of ``sin`` output depends upon the input storage type:

   - sin(default) = default
   - sin(row_sparse) = row_sparse
   - sin(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L46
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::sin ( Symbol  data)
inline

Computes the element-wise sine of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   sin([0, \pi/4, \pi/2]) = [0, 0.707, 1]

   The storage type of ``sin`` output depends upon the input storage type:

   - sin(default) = default
   - sin(row_sparse) = row_sparse
   - sin(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L46
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::sinh ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the hyperbolic sine of the input array, computed element-wise.

   .. math::
   sinh(x) = 0.5\times(exp(x) - exp(-x))

   The storage type of ``sinh`` output depends upon the input storage type:

   - sinh(default) = default
   - sinh(row_sparse) = row_sparse
   - sinh(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L201
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::sinh ( Symbol  data)
inline

Returns the hyperbolic sine of the input array, computed element-wise.

   .. math::
   sinh(x) = 0.5\times(exp(x) - exp(-x))

   The storage type of ``sinh`` output depends upon the input storage type:

   - sinh(default) = default
   - sinh(row_sparse) = row_sparse
   - sinh(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L201
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::size_array ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns a 1D int64 array containing the size of data.

   Example::

   size_array([[1,2,3,4], [5,6,7,8]]) = [8]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L558
Parameters
symbol_namename of the resulting symbol
dataInput Array.
Returns
new symbol
Symbol mxnet::cpp::size_array ( Symbol  data)
inline

Returns a 1D int64 array containing the size of data.

   Example::

   size_array([[1,2,3,4], [5,6,7,8]]) = [8]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L558
Parameters
dataInput Array.
Returns
new symbol
Symbol mxnet::cpp::slice ( const std::string &  symbol_name,
Symbol  data,
Shape  begin,
Shape  end,
Shape  step = Shape() 
)
inline

Slices a region of the array.

   .. note:: ``crop`` is deprecated. Use ``slice`` instead.

   This function returns a sliced array between the indices given
   by `begin` and `end` with the corresponding `step`.

   For an input array of ``shape=(d_0, d_1, ..., d_n-1)``,
   slice operation with ``begin=(b_0, b_1...b_m-1)``,
   ``end=(e_0, e_1, ..., e_m-1)``, and ``step=(s_0, s_1, ..., s_m-1)``,
   where m <= n, results in an array with the shape
   ``(|e_0-b_0|/|s_0|, ..., |e_m-1-b_m-1|/|s_m-1|, d_m, ..., d_n-1)``.

   The resulting array's *k*-th dimension contains elements
   from the *k*-th dimension of the input array starting
   from index ``b_k`` (inclusive) with step ``s_k``
   until reaching ``e_k`` (exclusive).

   If the *k*-th elements are `None` in the sequence of `begin`, `end`,
   and `step`, the following rule will be used to set default values.
   If `s_k` is `None`, set `s_k=1`. If `s_k > 0`, set `b_k=0`, `e_k=d_k`;
   else, set `b_k=d_k-1`, `e_k=-1`.

   The storage type of ``slice`` output depends on storage types of inputs

   - slice(csr) = csr
   - otherwise, ``slice`` generates output with default storage

   .. note:: When input data storage type is csr, it only supports
   step=(), or step=(None,), or step=(1,) to generate a csr output.
   For other step parameter values, it falls back to slicing
   a dense tensor.

   Example::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice(x, begin=(0,1), end=(2,4)) = [[ 2.,  3.,  4.],
   [ 6.,  7.,  8.]]
   slice(x, begin=(None, 0), end=(None, 3), step=(-1, 2)) = [[9., 11.],
   [5.,  7.],
   [1.,  3.]]


   Defined in src/operator/tensor/matrix_op.cc:L414
Parameters
symbol_namename of the resulting symbol
dataSource input
beginstarting indices for the slice operation, supports negative indices.
endending indices for the slice operation, supports negative indices.
stepstep for the slice operation, supports negative values.
Returns
new symbol
Symbol mxnet::cpp::slice ( Symbol  data,
Shape  begin,
Shape  end,
Shape  step = Shape() 
)
inline

Slices a region of the array.

   .. note:: ``crop`` is deprecated. Use ``slice`` instead.

   This function returns a sliced array between the indices given
   by `begin` and `end` with the corresponding `step`.

   For an input array of ``shape=(d_0, d_1, ..., d_n-1)``,
   slice operation with ``begin=(b_0, b_1...b_m-1)``,
   ``end=(e_0, e_1, ..., e_m-1)``, and ``step=(s_0, s_1, ..., s_m-1)``,
   where m <= n, results in an array with the shape
   ``(|e_0-b_0|/|s_0|, ..., |e_m-1-b_m-1|/|s_m-1|, d_m, ..., d_n-1)``.

   The resulting array's *k*-th dimension contains elements
   from the *k*-th dimension of the input array starting
   from index ``b_k`` (inclusive) with step ``s_k``
   until reaching ``e_k`` (exclusive).

   If the *k*-th elements are `None` in the sequence of `begin`, `end`,
   and `step`, the following rule will be used to set default values.
   If `s_k` is `None`, set `s_k=1`. If `s_k > 0`, set `b_k=0`, `e_k=d_k`;
   else, set `b_k=d_k-1`, `e_k=-1`.

   The storage type of ``slice`` output depends on storage types of inputs

   - slice(csr) = csr
   - otherwise, ``slice`` generates output with default storage

   .. note:: When input data storage type is csr, it only supports
   step=(), or step=(None,), or step=(1,) to generate a csr output.
   For other step parameter values, it falls back to slicing
   a dense tensor.

   Example::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice(x, begin=(0,1), end=(2,4)) = [[ 2.,  3.,  4.],
   [ 6.,  7.,  8.]]
   slice(x, begin=(None, 0), end=(None, 3), step=(-1, 2)) = [[9., 11.],
   [5.,  7.],
   [1.,  3.]]


   Defined in src/operator/tensor/matrix_op.cc:L414
Parameters
dataSource input
beginstarting indices for the slice operation, supports negative indices.
endending indices for the slice operation, supports negative indices.
stepstep for the slice operation, supports negative values.
Returns
new symbol
Symbol mxnet::cpp::slice_axis ( const std::string &  symbol_name,
Symbol  data,
int  axis,
int  begin,
dmlc::optional< int >  end 
)
inline

Slices along a given axis.

   Returns an array slice along a given `axis` starting from the `begin` index
   to the `end` index.

   Examples::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice_axis(x, axis=0, begin=1, end=3) = [[  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice_axis(x, axis=1, begin=0, end=2) = [[  1.,   2.],
   [  5.,   6.],
   [  9.,  10.]]

   slice_axis(x, axis=1, begin=-3, end=-1) = [[  2.,   3.],
   [  6.,   7.],
   [ 10.,  11.]]


   Defined in src/operator/tensor/matrix_op.cc:L501
Parameters
symbol_namename of the resulting symbol
dataSource input
axisAxis along which to be sliced, supports negative indexes.
beginThe beginning index along the axis to be sliced, supports negative
endThe ending index along the axis to be sliced, supports negative indexes.
Returns
new symbol
Symbol mxnet::cpp::slice_axis ( Symbol  data,
int  axis,
int  begin,
dmlc::optional< int >  end 
)
inline

Slices along a given axis.

   Returns an array slice along a given `axis` starting from the `begin` index
   to the `end` index.

   Examples::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice_axis(x, axis=0, begin=1, end=3) = [[  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice_axis(x, axis=1, begin=0, end=2) = [[  1.,   2.],
   [  5.,   6.],
   [  9.,  10.]]

   slice_axis(x, axis=1, begin=-3, end=-1) = [[  2.,   3.],
   [  6.,   7.],
   [ 10.,  11.]]


   Defined in src/operator/tensor/matrix_op.cc:L501
Parameters
dataSource input
axisAxis along which to be sliced, supports negative indexes.
beginThe beginning index along the axis to be sliced, supports negative
endThe ending index along the axis to be sliced, supports negative indexes.
Returns
new symbol
Symbol mxnet::cpp::slice_like ( const std::string &  symbol_name,
Symbol  data,
Symbol  shape_like,
Shape  axes = Shape() 
)
inline

Slices a region of the array like the shape of another array.

   This function is similar to ``slice``, however, the `begin` are always `0`s
   and `end` of specific axes are inferred from the second input `shape_like`.

   Given the second `shape_like` input of ``shape=(d_0, d_1, ..., d_n-1)``,
   a ``slice_like`` operator with default empty `axes`, it performs the
   following operation:

   `` out = slice(input, begin=(0, 0, ..., 0), end=(d_0, d_1, ..., d_n-1))``.

   When `axes` is not empty, it is used to speficy which axes are being sliced.

   Given a 4-d input data, ``slice_like`` operator with ``axes=(0, 2, -1)``
   will perform the following operation:

   `` out = slice(input, begin=(0, 0, 0, 0), end=(d_0, None, d_2, d_3))``.

   Note that it is allowed to have first and second input with different
   however, you have to make sure the `axes` are specified and not exceeding the
   dimension limits.

   For example, given `input_1` with ``shape=(2,3,4,5)`` and `input_2` with
   ``shape=(1,2,3)``, it is not allowed to use:

   `` out = slice_like(a, b)`` because ndim of `input_1` is 4, and ndim of
   is 3.

   The following is allowed in this situation:

   `` out = slice_like(a, b, axes=(0, 2))``

   Example::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   y = [[  0.,   0.,   0.],
   [  0.,   0.,   0.]]

   slice_like(x, y) = [[ 1.,  2.,  3.]
   [ 5.,  6.,  7.]]
   slice_like(x, y, axes=(0, 1)) = [[ 1.,  2.,  3.]
   [ 5.,  6.,  7.]]
   slice_like(x, y, axes=(0)) = [[ 1.,  2.,  3.,  4.]
   [ 5.,  6.,  7.,  8.]]
   slice_like(x, y, axes=(-1)) = [[  1.,   2.,   3.]
   [  5.,   6.,   7.]
   [  9.,  10.,  11.]]


   Defined in src/operator/tensor/matrix_op.cc:L570
Parameters
symbol_namename of the resulting symbol
dataSource input
shape_likeShape like input
axesList of axes on which input data will be sliced according to the corresponding size of the second input. By default will slice on all axes.
Returns
new symbol
Symbol mxnet::cpp::slice_like ( Symbol  data,
Symbol  shape_like,
Shape  axes = Shape() 
)
inline

Slices a region of the array like the shape of another array.

   This function is similar to ``slice``, however, the `begin` are always `0`s
   and `end` of specific axes are inferred from the second input `shape_like`.

   Given the second `shape_like` input of ``shape=(d_0, d_1, ..., d_n-1)``,
   a ``slice_like`` operator with default empty `axes`, it performs the
   following operation:

   `` out = slice(input, begin=(0, 0, ..., 0), end=(d_0, d_1, ..., d_n-1))``.

   When `axes` is not empty, it is used to speficy which axes are being sliced.

   Given a 4-d input data, ``slice_like`` operator with ``axes=(0, 2, -1)``
   will perform the following operation:

   `` out = slice(input, begin=(0, 0, 0, 0), end=(d_0, None, d_2, d_3))``.

   Note that it is allowed to have first and second input with different
   however, you have to make sure the `axes` are specified and not exceeding the
   dimension limits.

   For example, given `input_1` with ``shape=(2,3,4,5)`` and `input_2` with
   ``shape=(1,2,3)``, it is not allowed to use:

   `` out = slice_like(a, b)`` because ndim of `input_1` is 4, and ndim of
   is 3.

   The following is allowed in this situation:

   `` out = slice_like(a, b, axes=(0, 2))``

   Example::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   y = [[  0.,   0.,   0.],
   [  0.,   0.,   0.]]

   slice_like(x, y) = [[ 1.,  2.,  3.]
   [ 5.,  6.,  7.]]
   slice_like(x, y, axes=(0, 1)) = [[ 1.,  2.,  3.]
   [ 5.,  6.,  7.]]
   slice_like(x, y, axes=(0)) = [[ 1.,  2.,  3.,  4.]
   [ 5.,  6.,  7.,  8.]]
   slice_like(x, y, axes=(-1)) = [[  1.,   2.,   3.]
   [  5.,   6.,   7.]
   [  9.,  10.,  11.]]


   Defined in src/operator/tensor/matrix_op.cc:L570
Parameters
dataSource input
shape_likeShape like input
axesList of axes on which input data will be sliced according to the corresponding size of the second input. By default will slice on all axes.
Returns
new symbol
Symbol mxnet::cpp::SliceChannel ( const std::string &  symbol_name,
Symbol  data,
int  num_outputs,
int  axis = 1,
bool  squeeze_axis = false 
)
inline

Splits an array along a particular axis into multiple sub-arrays.

   .. note:: ``SliceChannel`` is deprecated. Use ``split`` instead.

   **Note** that `num_outputs` should evenly divide the length of the axis
   along which to split the array.

   Example::

   x  = [[[ 1.]
   [ 2.]]
   [[ 3.]
   [ 4.]]
   [[ 5.]
   [ 6.]]]
   x.shape = (3, 2, 1)

   y = split(x, axis=1, num_outputs=2) // a list of 2 arrays with shape (3, 1, 1)
   y = [[[ 1.]]
   [[ 3.]]
   [[ 5.]]]

   [[[ 2.]]
   [[ 4.]]
   [[ 6.]]]

   y[0].shape = (3, 1, 1)

   z = split(x, axis=0, num_outputs=3) // a list of 3 arrays with shape (1, 2, 1)
   z = [[[ 1.]
   [ 2.]]]

   [[[ 3.]
   [ 4.]]]

   [[[ 5.]
   [ 6.]]]

   z[0].shape = (1, 2, 1)

   `squeeze_axis=1` removes the axis with length 1 from the shapes of the output
   **Note** that setting `squeeze_axis` to ``1`` removes axis with length 1 only
   along the `axis` which it is split.
   Also `squeeze_axis` can be set to true only if ``input.shape[axis] ==

   Example::

   z = split(x, axis=0, num_outputs=3, squeeze_axis=1) // a list of 3 arrays with
   z = [[ 1.]
   [ 2.]]

   [[ 3.]
   [ 4.]]

   [[ 5.]
   [ 6.]]
   z[0].shape = (2 ,1 )



   Defined in src/operator/slice_channel.cc:L107
Parameters
symbol_namename of the resulting symbol
dataThe input
num_outputsNumber of splits. Note that this should evenly divide the length of
axisAxis along which to split.
squeeze_axisIf true, Removes the axis with length 1 from the shapes of the output arrays. Note that setting squeeze_axis to true removes axis with length 1 only along the axis which it is split. Also squeeze_axis can
Returns
new symbol
Symbol mxnet::cpp::SliceChannel ( Symbol  data,
int  num_outputs,
int  axis = 1,
bool  squeeze_axis = false 
)
inline

Splits an array along a particular axis into multiple sub-arrays.

   .. note:: ``SliceChannel`` is deprecated. Use ``split`` instead.

   **Note** that `num_outputs` should evenly divide the length of the axis
   along which to split the array.

   Example::

   x  = [[[ 1.]
   [ 2.]]
   [[ 3.]
   [ 4.]]
   [[ 5.]
   [ 6.]]]
   x.shape = (3, 2, 1)

   y = split(x, axis=1, num_outputs=2) // a list of 2 arrays with shape (3, 1, 1)
   y = [[[ 1.]]
   [[ 3.]]
   [[ 5.]]]

   [[[ 2.]]
   [[ 4.]]
   [[ 6.]]]

   y[0].shape = (3, 1, 1)

   z = split(x, axis=0, num_outputs=3) // a list of 3 arrays with shape (1, 2, 1)
   z = [[[ 1.]
   [ 2.]]]

   [[[ 3.]
   [ 4.]]]

   [[[ 5.]
   [ 6.]]]

   z[0].shape = (1, 2, 1)

   `squeeze_axis=1` removes the axis with length 1 from the shapes of the output
   **Note** that setting `squeeze_axis` to ``1`` removes axis with length 1 only
   along the `axis` which it is split.
   Also `squeeze_axis` can be set to true only if ``input.shape[axis] ==

   Example::

   z = split(x, axis=0, num_outputs=3, squeeze_axis=1) // a list of 3 arrays with
   z = [[ 1.]
   [ 2.]]

   [[ 3.]
   [ 4.]]

   [[ 5.]
   [ 6.]]
   z[0].shape = (2 ,1 )



   Defined in src/operator/slice_channel.cc:L107
Parameters
dataThe input
num_outputsNumber of splits. Note that this should evenly divide the length of
axisAxis along which to split.
squeeze_axisIf true, Removes the axis with length 1 from the shapes of the output arrays. Note that setting squeeze_axis to true removes axis with length 1 only along the axis which it is split. Also squeeze_axis can
Returns
new symbol
Symbol mxnet::cpp::smooth_l1 ( const std::string &  symbol_name,
Symbol  data,
mx_float  scalar 
)
inline

Calculate Smooth L1 Loss(lhs, scalar) by summing

   .. math::

   f(x) =
   \begin{cases}
   (\sigma x)^2/2,& \text{if }x < 1/\sigma^2\\
   |x|-0.5/\sigma^2,& \text{otherwise}
   \end{cases}

   where :math:`x` is an element of the tensor *lhs* and :math:`\sigma` is the

   Example::

   smooth_l1([1, 2, 3, 4]) = [0.5, 1.5, 2.5, 3.5]
   smooth_l1([1, 2, 3, 4], scalar=1) = [0.5, 1.5, 2.5, 3.5]



   Defined in src/operator/tensor/elemwise_binary_scalar_op_extended.cc:L104
Parameters
symbol_namename of the resulting symbol
datasource input
scalarscalar input
Returns
new symbol
Symbol mxnet::cpp::smooth_l1 ( Symbol  data,
mx_float  scalar 
)
inline

Calculate Smooth L1 Loss(lhs, scalar) by summing

   .. math::

   f(x) =
   \begin{cases}
   (\sigma x)^2/2,& \text{if }x < 1/\sigma^2\\
   |x|-0.5/\sigma^2,& \text{otherwise}
   \end{cases}

   where :math:`x` is an element of the tensor *lhs* and :math:`\sigma` is the

   Example::

   smooth_l1([1, 2, 3, 4]) = [0.5, 1.5, 2.5, 3.5]
   smooth_l1([1, 2, 3, 4], scalar=1) = [0.5, 1.5, 2.5, 3.5]



   Defined in src/operator/tensor/elemwise_binary_scalar_op_extended.cc:L104
Parameters
datasource input
scalarscalar input
Returns
new symbol
Symbol mxnet::cpp::softmax ( const std::string &  symbol_name,
Symbol  data,
int  axis = -1,
dmlc::optional< double >  temperature = dmlc::optional<double>() 
)
inline

Applies the softmax function.

   The resulting array contains elements in the range (0,1) and the elements along

   .. math::
   softmax(\mathbf{z/t})_j = \frac{e^{z_j/t}}{\sum_{k=1}^K e^{z_k/t}}

   for :math:`j = 1, ..., K`

   t is the temperature parameter in softmax function. By default, t equals 1.0

   Example::

   x = [[ 1.  1.  1.]
   [ 1.  1.  1.]]

   softmax(x,axis=0) = [[ 0.5  0.5  0.5]
   [ 0.5  0.5  0.5]]

   softmax(x,axis=1) = [[ 0.33333334,  0.33333334,  0.33333334],
   [ 0.33333334,  0.33333334,  0.33333334]]



   Defined in src/operator/nn/softmax.cc:L93
Parameters
symbol_namename of the resulting symbol
dataThe input array.
axisThe axis along which to compute softmax.
temperatureTemperature parameter in softmax
Returns
new symbol
Symbol mxnet::cpp::Softmax ( const std::string &  symbol_name,
Symbol  data,
mx_float  grad_scale = 1,
mx_float  ignore_label = -1,
bool  multi_output = false,
bool  use_ignore = false,
bool  preserve_shape = false,
SoftmaxNormalization  normalization = SoftmaxNormalization::kNull,
bool  out_grad = false,
mx_float  smooth_alpha = 0 
)
inline

Please use SoftmaxOutput.

   .. note::

   This operator has been renamed to `SoftmaxOutput`, which
   computes the gradient of cross-entropy loss w.r.t softmax output.
   To just compute softmax output, use the `softmax` operator.



   Defined in src/operator/softmax_output.cc:L138
Parameters
symbol_namename of the resulting symbol
dataInput array.
grad_scaleScales the gradient by a float factor.
ignore_labelThe instances whose labels == ignore_label will be ignored
multi_outputIf set to true, the softmax function will be computed along axis 1. This is applied when the shape of input array differs from the
use_ignoreIf set to true, the ignore_label value will not contribute to
preserve_shapeIf set to true, the softmax function will be computed along
normalizationNormalizes the gradient.
out_gradMultiplies gradient with output gradient element-wise.
smooth_alphaConstant for computing a label smoothed version of cross-entropyfor the backwards pass. This constant gets subtracted from theone-hot encoding of the gold label and distributed uniformly toall other
Returns
new symbol
Symbol mxnet::cpp::softmax ( Symbol  data,
int  axis = -1,
dmlc::optional< double >  temperature = dmlc::optional<double>() 
)
inline

Applies the softmax function.

   The resulting array contains elements in the range (0,1) and the elements along

   .. math::
   softmax(\mathbf{z/t})_j = \frac{e^{z_j/t}}{\sum_{k=1}^K e^{z_k/t}}

   for :math:`j = 1, ..., K`

   t is the temperature parameter in softmax function. By default, t equals 1.0

   Example::

   x = [[ 1.  1.  1.]
   [ 1.  1.  1.]]

   softmax(x,axis=0) = [[ 0.5  0.5  0.5]
   [ 0.5  0.5  0.5]]

   softmax(x,axis=1) = [[ 0.33333334,  0.33333334,  0.33333334],
   [ 0.33333334,  0.33333334,  0.33333334]]



   Defined in src/operator/nn/softmax.cc:L93
Parameters
dataThe input array.
axisThe axis along which to compute softmax.
temperatureTemperature parameter in softmax
Returns
new symbol
Symbol mxnet::cpp::Softmax ( Symbol  data,
mx_float  grad_scale = 1,
mx_float  ignore_label = -1,
bool  multi_output = false,
bool  use_ignore = false,
bool  preserve_shape = false,
SoftmaxNormalization  normalization = SoftmaxNormalization::kNull,
bool  out_grad = false,
mx_float  smooth_alpha = 0 
)
inline

Please use SoftmaxOutput.

   .. note::

   This operator has been renamed to `SoftmaxOutput`, which
   computes the gradient of cross-entropy loss w.r.t softmax output.
   To just compute softmax output, use the `softmax` operator.



   Defined in src/operator/softmax_output.cc:L138
Parameters
dataInput array.
grad_scaleScales the gradient by a float factor.
ignore_labelThe instances whose labels == ignore_label will be ignored
multi_outputIf set to true, the softmax function will be computed along axis 1. This is applied when the shape of input array differs from the
use_ignoreIf set to true, the ignore_label value will not contribute to
preserve_shapeIf set to true, the softmax function will be computed along
normalizationNormalizes the gradient.
out_gradMultiplies gradient with output gradient element-wise.
smooth_alphaConstant for computing a label smoothed version of cross-entropyfor the backwards pass. This constant gets subtracted from theone-hot encoding of the gold label and distributed uniformly toall other
Returns
new symbol
Symbol mxnet::cpp::softmax_cross_entropy ( const std::string &  symbol_name,
Symbol  data,
Symbol  label 
)
inline

Calculate cross entropy of softmax output and one-hot label.

   - This operator computes the cross entropy in two steps:
   - Applies softmax function on the input array.
   - Computes and returns the cross entropy loss between the softmax output and

   - The softmax function and cross entropy loss is given by:

   - Softmax Function:

   .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}

   - Cross Entropy Function:

   .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i

   Example::

   x = [[1, 2, 3],
   [11, 7, 5]]

   label = [2, 0]

   softmax(x) = [[0.09003057, 0.24472848, 0.66524094],
   [0.97962922, 0.01794253, 0.00242826]]

   softmax_cross_entropy(data, label) = - log(0.66524084) - log(0.97962922) =



   Defined in src/operator/loss_binary_op.cc:L59
Parameters
symbol_namename of the resulting symbol
dataInput data
labelInput label
Returns
new symbol
Symbol mxnet::cpp::softmax_cross_entropy ( Symbol  data,
Symbol  label 
)
inline

Calculate cross entropy of softmax output and one-hot label.

   - This operator computes the cross entropy in two steps:
   - Applies softmax function on the input array.
   - Computes and returns the cross entropy loss between the softmax output and

   - The softmax function and cross entropy loss is given by:

   - Softmax Function:

   .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}

   - Cross Entropy Function:

   .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i

   Example::

   x = [[1, 2, 3],
   [11, 7, 5]]

   label = [2, 0]

   softmax(x) = [[0.09003057, 0.24472848, 0.66524094],
   [0.97962922, 0.01794253, 0.00242826]]

   softmax_cross_entropy(data, label) = - log(0.66524084) - log(0.97962922) =



   Defined in src/operator/loss_binary_op.cc:L59
Parameters
dataInput data
labelInput label
Returns
new symbol
Symbol mxnet::cpp::SoftmaxActivation ( const std::string &  symbol_name,
Symbol  data,
SoftmaxActivationMode  mode = SoftmaxActivationMode::kInstance 
)
inline

Applies softmax activation to input. This is intended for internal layers.

   .. note::

   This operator has been deprecated, please use `softmax`.

   If `mode` = ``instance``, this operator will compute a softmax for each
   This is the default mode.

   If `mode` = ``channel``, this operator will compute a k-class softmax at each
   of each instance, where `k` = ``num_channel``. This mode can only be used when
   has at least 3 dimensions.
   This can be used for `fully convolutional network`, `image segmentation`, etc.

   Example::

   >>> input_array = mx.nd.array([[3., 0.5, -0.5, 2., 7.],
   >>>                            [2., -.4, 7.,   3., 0.2]])
   >>> softmax_act = mx.nd.SoftmaxActivation(input_array)
   >>> print softmax_act.asnumpy()
   [[  1.78322066e-02   1.46375655e-03   5.38485940e-04   6.56010211e-03
   [  6.56221947e-03   5.95310994e-04   9.73919690e-01   1.78379621e-02



   Defined in src/operator/nn/softmax_activation.cc:L59
Parameters
symbol_namename of the resulting symbol
dataThe input array.
modeSpecifies how to compute the softmax. If set to instance, it computes softmax for each instance. If set to channel, It computes cross channel
Returns
new symbol
Symbol mxnet::cpp::SoftmaxActivation ( Symbol  data,
SoftmaxActivationMode  mode = SoftmaxActivationMode::kInstance 
)
inline

Applies softmax activation to input. This is intended for internal layers.

   .. note::

   This operator has been deprecated, please use `softmax`.

   If `mode` = ``instance``, this operator will compute a softmax for each
   This is the default mode.

   If `mode` = ``channel``, this operator will compute a k-class softmax at each
   of each instance, where `k` = ``num_channel``. This mode can only be used when
   has at least 3 dimensions.
   This can be used for `fully convolutional network`, `image segmentation`, etc.

   Example::

   >>> input_array = mx.nd.array([[3., 0.5, -0.5, 2., 7.],
   >>>                            [2., -.4, 7.,   3., 0.2]])
   >>> softmax_act = mx.nd.SoftmaxActivation(input_array)
   >>> print softmax_act.asnumpy()
   [[  1.78322066e-02   1.46375655e-03   5.38485940e-04   6.56010211e-03
   [  6.56221947e-03   5.95310994e-04   9.73919690e-01   1.78379621e-02



   Defined in src/operator/nn/softmax_activation.cc:L59
Parameters
dataThe input array.
modeSpecifies how to compute the softmax. If set to instance, it computes softmax for each instance. If set to channel, It computes cross channel
Returns
new symbol
Symbol mxnet::cpp::SoftmaxOutput ( const std::string &  symbol_name,
Symbol  data,
Symbol  label,
mx_float  grad_scale = 1,
mx_float  ignore_label = -1,
bool  multi_output = false,
bool  use_ignore = false,
bool  preserve_shape = false,
SoftmaxOutputNormalization  normalization = SoftmaxOutputNormalization::kNull,
bool  out_grad = false,
mx_float  smooth_alpha = 0 
)
inline

Computes the gradient of cross entropy loss with respect to softmax output.

   - This operator computes the gradient in two steps.
   The cross entropy loss does not actually need to be computed.

   - Applies softmax function on the input array.
   - Computes and returns the gradient of cross entropy loss w.r.t. the softmax

   - The softmax function, cross entropy loss and gradient is given by:

   - Softmax Function:

   .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}

   - Cross Entropy Function:

   .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i

   - The gradient of cross entropy loss w.r.t softmax output:

   .. math:: \text{gradient} = \text{output} - \text{label}

   - During forward propagation, the softmax function is computed for each

   For general *N*-D input arrays with shape :math:`(d_1, d_2, ..., d_n)`. The
   :math:`s=d_1 \cdot d_2 \cdot \cdot \cdot d_n`. We can use the parameters
   and `multi_output` to specify the way to compute softmax:

   - By default, `preserve_shape` is ``false``. This operator will reshape the
   into a 2-D array with shape :math:`(d_1, \frac{s}{d_1})` and then compute the
   each row in the reshaped array, and afterwards reshape it back to the original
   :math:`(d_1, d_2, ..., d_n)`.
   - If `preserve_shape` is ``true``, the softmax function will be computed along
   the last axis (`axis` = ``-1``).
   - If `multi_output` is ``true``, the softmax function will be computed along
   the second axis (`axis` = ``1``).

   - During backward propagation, the gradient of cross-entropy loss w.r.t softmax
   The provided label can be a one-hot label array or a probability label array.

   - If the parameter `use_ignore` is ``true``, `ignore_label` can specify input
   with a particular label to be ignored during backward propagation. **This has
   softmax `output` has same shape as `label`**.

   Example::

   data = [[1,2,3,4],[2,2,2,2],[3,3,3,3],[4,4,4,4]]
   label = [1,0,2,3]
   ignore_label = 1
   SoftmaxOutput(data=data, label = label,\
   multi_output=true, use_ignore=true,\
   ignore_label=ignore_label)
   ## forward softmax output
   [[ 0.0320586   0.08714432  0.23688284  0.64391428]
   [ 0.25        0.25        0.25        0.25      ]
   [ 0.25        0.25        0.25        0.25      ]
   [ 0.25        0.25        0.25        0.25      ]]
   ## backward gradient output
   [[ 0.    0.    0.    0.  ]
   [-0.75  0.25  0.25  0.25]
   [ 0.25  0.25 -0.75  0.25]
   [ 0.25  0.25  0.25 -0.75]]
   ## notice that the first row is all 0 because label[0] is 1, which is equal to

   - The parameter `grad_scale` can be used to rescale the gradient, which is
   give each loss function different weights.

   - This operator also supports various ways to normalize the gradient by
   The `normalization` is applied if softmax output has different shape than the
   The `normalization` mode can be set to the followings:

   - ``'null'``: do nothing.
   - ``'batch'``: divide the gradient by the batch size.
   - ``'valid'``: divide the gradient by the number of instances which are not



   Defined in src/operator/softmax_output.cc:L123
Parameters
symbol_namename of the resulting symbol
dataInput array.
labelGround truth label.
grad_scaleScales the gradient by a float factor.
ignore_labelThe instances whose labels == ignore_label will be ignored
multi_outputIf set to true, the softmax function will be computed along axis 1. This is applied when the shape of input array differs from the
use_ignoreIf set to true, the ignore_label value will not contribute to
preserve_shapeIf set to true, the softmax function will be computed along
normalizationNormalizes the gradient.
out_gradMultiplies gradient with output gradient element-wise.
smooth_alphaConstant for computing a label smoothed version of cross-entropyfor the backwards pass. This constant gets subtracted from theone-hot encoding of the gold label and distributed uniformly toall other
Returns
new symbol
Symbol mxnet::cpp::SoftmaxOutput ( Symbol  data,
Symbol  label,
mx_float  grad_scale = 1,
mx_float  ignore_label = -1,
bool  multi_output = false,
bool  use_ignore = false,
bool  preserve_shape = false,
SoftmaxOutputNormalization  normalization = SoftmaxOutputNormalization::kNull,
bool  out_grad = false,
mx_float  smooth_alpha = 0 
)
inline

Computes the gradient of cross entropy loss with respect to softmax output.

   - This operator computes the gradient in two steps.
   The cross entropy loss does not actually need to be computed.

   - Applies softmax function on the input array.
   - Computes and returns the gradient of cross entropy loss w.r.t. the softmax

   - The softmax function, cross entropy loss and gradient is given by:

   - Softmax Function:

   .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}

   - Cross Entropy Function:

   .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i

   - The gradient of cross entropy loss w.r.t softmax output:

   .. math:: \text{gradient} = \text{output} - \text{label}

   - During forward propagation, the softmax function is computed for each

   For general *N*-D input arrays with shape :math:`(d_1, d_2, ..., d_n)`. The
   :math:`s=d_1 \cdot d_2 \cdot \cdot \cdot d_n`. We can use the parameters
   and `multi_output` to specify the way to compute softmax:

   - By default, `preserve_shape` is ``false``. This operator will reshape the
   into a 2-D array with shape :math:`(d_1, \frac{s}{d_1})` and then compute the
   each row in the reshaped array, and afterwards reshape it back to the original
   :math:`(d_1, d_2, ..., d_n)`.
   - If `preserve_shape` is ``true``, the softmax function will be computed along
   the last axis (`axis` = ``-1``).
   - If `multi_output` is ``true``, the softmax function will be computed along
   the second axis (`axis` = ``1``).

   - During backward propagation, the gradient of cross-entropy loss w.r.t softmax
   The provided label can be a one-hot label array or a probability label array.

   - If the parameter `use_ignore` is ``true``, `ignore_label` can specify input
   with a particular label to be ignored during backward propagation. **This has
   softmax `output` has same shape as `label`**.

   Example::

   data = [[1,2,3,4],[2,2,2,2],[3,3,3,3],[4,4,4,4]]
   label = [1,0,2,3]
   ignore_label = 1
   SoftmaxOutput(data=data, label = label,\
   multi_output=true, use_ignore=true,\
   ignore_label=ignore_label)
   ## forward softmax output
   [[ 0.0320586   0.08714432  0.23688284  0.64391428]
   [ 0.25        0.25        0.25        0.25      ]
   [ 0.25        0.25        0.25        0.25      ]
   [ 0.25        0.25        0.25        0.25      ]]
   ## backward gradient output
   [[ 0.    0.    0.    0.  ]
   [-0.75  0.25  0.25  0.25]
   [ 0.25  0.25 -0.75  0.25]
   [ 0.25  0.25  0.25 -0.75]]
   ## notice that the first row is all 0 because label[0] is 1, which is equal to

   - The parameter `grad_scale` can be used to rescale the gradient, which is
   give each loss function different weights.

   - This operator also supports various ways to normalize the gradient by
   The `normalization` is applied if softmax output has different shape than the
   The `normalization` mode can be set to the followings:

   - ``'null'``: do nothing.
   - ``'batch'``: divide the gradient by the batch size.
   - ``'valid'``: divide the gradient by the number of instances which are not



   Defined in src/operator/softmax_output.cc:L123
Parameters
dataInput array.
labelGround truth label.
grad_scaleScales the gradient by a float factor.
ignore_labelThe instances whose labels == ignore_label will be ignored
multi_outputIf set to true, the softmax function will be computed along axis 1. This is applied when the shape of input array differs from the
use_ignoreIf set to true, the ignore_label value will not contribute to
preserve_shapeIf set to true, the softmax function will be computed along
normalizationNormalizes the gradient.
out_gradMultiplies gradient with output gradient element-wise.
smooth_alphaConstant for computing a label smoothed version of cross-entropyfor the backwards pass. This constant gets subtracted from theone-hot encoding of the gold label and distributed uniformly toall other
Returns
new symbol
Symbol mxnet::cpp::softmin ( const std::string &  symbol_name,
Symbol  data,
int  axis = -1,
dmlc::optional< double >  temperature = dmlc::optional<double>() 
)
inline

Applies the softmin function.

   The resulting array contains elements in the range (0,1) and the elements along
   up to 1.

   .. math::
   softmin(\mathbf{z/t})_j = \frac{e^{-z_j/t}}{\sum_{k=1}^K e^{-z_k/t}}

   for :math:`j = 1, ..., K`

   t is the temperature parameter in softmax function. By default, t equals 1.0

   Example::

   x = [[ 1.  2.  3.]
   [ 3.  2.  1.]]

   softmin(x,axis=0) = [[ 0.88079703,  0.5,  0.11920292],
   [ 0.11920292,  0.5,  0.88079703]]

   softmin(x,axis=1) = [[ 0.66524094,  0.24472848,  0.09003057],
   [ 0.09003057,  0.24472848,  0.66524094]]



   Defined in src/operator/nn/softmax.cc:L137
Parameters
symbol_namename of the resulting symbol
dataThe input array.
axisThe axis along which to compute softmax.
temperatureTemperature parameter in softmax
Returns
new symbol
Symbol mxnet::cpp::softmin ( Symbol  data,
int  axis = -1,
dmlc::optional< double >  temperature = dmlc::optional<double>() 
)
inline

Applies the softmin function.

   The resulting array contains elements in the range (0,1) and the elements along
   up to 1.

   .. math::
   softmin(\mathbf{z/t})_j = \frac{e^{-z_j/t}}{\sum_{k=1}^K e^{-z_k/t}}

   for :math:`j = 1, ..., K`

   t is the temperature parameter in softmax function. By default, t equals 1.0

   Example::

   x = [[ 1.  2.  3.]
   [ 3.  2.  1.]]

   softmin(x,axis=0) = [[ 0.88079703,  0.5,  0.11920292],
   [ 0.11920292,  0.5,  0.88079703]]

   softmin(x,axis=1) = [[ 0.66524094,  0.24472848,  0.09003057],
   [ 0.09003057,  0.24472848,  0.66524094]]



   Defined in src/operator/nn/softmax.cc:L137
Parameters
dataThe input array.
axisThe axis along which to compute softmax.
temperatureTemperature parameter in softmax
Returns
new symbol
Symbol mxnet::cpp::softsign ( const std::string &  symbol_name,
Symbol  data 
)
inline

Computes softsign of x element-wise.

   .. math::
   y = x / (1 + abs(x))

   The storage type of ``softsign`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L145
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::softsign ( Symbol  data)
inline

Computes softsign of x element-wise.

   .. math::
   y = x / (1 + abs(x))

   The storage type of ``softsign`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L145
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::sort ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(-1),
bool  is_ascend = true 
)
inline

Returns a sorted copy of an input array along the given axis.

   Examples::

   x = [[ 1, 4],
   [ 3, 1]]

   // sorts along the last axis
   sort(x) = [[ 1.,  4.],
   [ 1.,  3.]]

   // flattens and then sorts
   sort(x) = [ 1.,  1.,  3.,  4.]

   // sorts along the first axis
   sort(x, axis=0) = [[ 1.,  1.],
   [ 3.,  4.]]

   // in a descend order
   sort(x, is_ascend=0) = [[ 4.,  1.],
   [ 3.,  1.]]



   Defined in src/operator/tensor/ordering_op.cc:L127
Parameters
symbol_namename of the resulting symbol
dataThe input array
axisAxis along which to choose sort the input tensor. If not given, the
is_ascendWhether to sort in ascending or descending order.
Returns
new symbol
Symbol mxnet::cpp::sort ( Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(-1),
bool  is_ascend = true 
)
inline

Returns a sorted copy of an input array along the given axis.

   Examples::

   x = [[ 1, 4],
   [ 3, 1]]

   // sorts along the last axis
   sort(x) = [[ 1.,  4.],
   [ 1.,  3.]]

   // flattens and then sorts
   sort(x) = [ 1.,  1.,  3.,  4.]

   // sorts along the first axis
   sort(x, axis=0) = [[ 1.,  1.],
   [ 3.,  4.]]

   // in a descend order
   sort(x, is_ascend=0) = [[ 4.,  1.],
   [ 3.,  1.]]



   Defined in src/operator/tensor/ordering_op.cc:L127
Parameters
dataThe input array
axisAxis along which to choose sort the input tensor. If not given, the
is_ascendWhether to sort in ascending or descending order.
Returns
new symbol
Symbol mxnet::cpp::space_to_depth ( const std::string &  symbol_name,
Symbol  data,
int  block_size 
)
inline

Rearranges(permutes) blocks of spatial data into depth. Similar to ONNX SpaceToDepth operator: https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth

The output is a new tensor where the values from height and width dimension are moved to the depth dimension. The reverse of this operation is

.. math::

{gather*} x = reshape(x, [N, C, H / block_size, block_size, W / block_size, x = transpose(x , [0, 3, 5, 1, 2, 4]) \ y = reshape(x , [N, C * (block_size ^ 2), H / block_size, W / {gather*}

where :math:x is an input tensor with default layout as :math:[N, C, H, W]: and :math:y is the output tensor of layout :math:`[N, C * (block_size ^ 2),

Example::

x = [[[[0, 6, 1, 7, 2, 8], [12, 18, 13, 19, 14, 20], [3, 9, 4, 10, 5, 11], [15, 21, 16, 22, 17, 23]]]]

   space_to_depth(x, 2) = [[[[0, 1, 2],
   [3, 4, 5]],
   [[6, 7, 8],
   [9, 10, 11]],
   [[12, 13, 14],
   [15, 16, 17]],
   [[18, 19, 20],
   [21, 22, 23]]]]


   Defined in src/operator/tensor/matrix_op.cc:L1000
Parameters
symbol_namename of the resulting symbol
dataInput ndarray
block_sizeBlocks of [block_size. block_size] are moved
Returns
new symbol
Symbol mxnet::cpp::space_to_depth ( Symbol  data,
int  block_size 
)
inline

Rearranges(permutes) blocks of spatial data into depth. Similar to ONNX SpaceToDepth operator: https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth

The output is a new tensor where the values from height and width dimension are moved to the depth dimension. The reverse of this operation is

.. math::

{gather*} x = reshape(x, [N, C, H / block_size, block_size, W / block_size, x = transpose(x , [0, 3, 5, 1, 2, 4]) \ y = reshape(x , [N, C * (block_size ^ 2), H / block_size, W / {gather*}

where :math:x is an input tensor with default layout as :math:[N, C, H, W]: and :math:y is the output tensor of layout :math:`[N, C * (block_size ^ 2),

Example::

x = [[[[0, 6, 1, 7, 2, 8], [12, 18, 13, 19, 14, 20], [3, 9, 4, 10, 5, 11], [15, 21, 16, 22, 17, 23]]]]

   space_to_depth(x, 2) = [[[[0, 1, 2],
   [3, 4, 5]],
   [[6, 7, 8],
   [9, 10, 11]],
   [[12, 13, 14],
   [15, 16, 17]],
   [[18, 19, 20],
   [21, 22, 23]]]]


   Defined in src/operator/tensor/matrix_op.cc:L1000
Parameters
dataInput ndarray
block_sizeBlocks of [block_size. block_size] are moved
Returns
new symbol
Symbol mxnet::cpp::SpatialTransformer ( const std::string &  symbol_name,
Symbol  data,
Symbol  loc,
SpatialTransformerTransformType  transform_type,
SpatialTransformerSamplerType  sampler_type,
Shape  target_shape = Shape(0,0),
dmlc::optional< bool >  cudnn_off = dmlc::optional<bool>() 
)
inline

Applies a spatial transformer to input feature map.

Parameters
symbol_namename of the resulting symbol
dataInput data to the SpatialTransformerOp.
loclocalisation net, the output dim should be 6 when transform_type is affine.
transform_typetransformation type
sampler_typesampling type
target_shapeoutput shape(h, w) of spatial transformer: (y, x)
cudnn_offwhether to turn cudnn off
Returns
new symbol
Symbol mxnet::cpp::SpatialTransformer ( Symbol  data,
Symbol  loc,
SpatialTransformerTransformType  transform_type,
SpatialTransformerSamplerType  sampler_type,
Shape  target_shape = Shape(0,0),
dmlc::optional< bool >  cudnn_off = dmlc::optional<bool>() 
)
inline

Applies a spatial transformer to input feature map.

Parameters
dataInput data to the SpatialTransformerOp.
loclocalisation net, the output dim should be 6 when transform_type is affine.
transform_typetransformation type
sampler_typesampling type
target_shapeoutput shape(h, w) of spatial transformer: (y, x)
cudnn_offwhether to turn cudnn off
Returns
new symbol
Symbol mxnet::cpp::sqrt ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise square-root value of the input.

   .. math::
   \textrm{sqrt}(x) = \sqrt{x}

   Example::

   sqrt([4, 9, 16]) = [2, 3, 4]

   The storage type of ``sqrt`` output depends upon the input storage type:

   - sqrt(default) = default
   - sqrt(row_sparse) = row_sparse
   - sqrt(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L840
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::sqrt ( Symbol  data)
inline

Returns element-wise square-root value of the input.

   .. math::
   \textrm{sqrt}(x) = \sqrt{x}

   Example::

   sqrt([4, 9, 16]) = [2, 3, 4]

   The storage type of ``sqrt`` output depends upon the input storage type:

   - sqrt(default) = default
   - sqrt(row_sparse) = row_sparse
   - sqrt(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L840
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::square ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns element-wise squared value of the input.

   .. math::
   square(x) = x^2

   Example::

   square([2, 3, 4]) = [4, 9, 16]

   The storage type of ``square`` output depends upon the input storage type:

   - square(default) = default
   - square(row_sparse) = row_sparse
   - square(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L817
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::square ( Symbol  data)
inline

Returns element-wise squared value of the input.

   .. math::
   square(x) = x^2

   Example::

   square([2, 3, 4]) = [4, 9, 16]

   The storage type of ``square`` output depends upon the input storage type:

   - square(default) = default
   - square(row_sparse) = row_sparse
   - square(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L817
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::squeeze ( const std::string &  symbol_name,
const std::vector< Symbol > &  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>() 
)
inline

Remove single-dimensional entries from the shape of an array. Same behavior of defining the output tensor shape as numpy.squeeze for the most See the following note for exception.

Examples::

data = [[[0], [1], [2]]] squeeze(data) = [0, 1, 2] squeeze(data, axis=0) = [[0], [1], [2]] squeeze(data, axis=2) = [[0, 1, 2]] squeeze(data, axis=(0, 2)) = [0, 1, 2]

.. Note:: The output of this operator will keep at least one dimension not removed. For squeeze([[[4]]]) = [4], while in numpy.squeeze, the output will become a scalar.

Parameters
symbol_namename of the resulting symbol
datadata to squeeze
axisSelects a subset of the single-dimensional entries in the shape. If an
Returns
new symbol
Symbol mxnet::cpp::squeeze ( const std::vector< Symbol > &  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>() 
)
inline

Remove single-dimensional entries from the shape of an array. Same behavior of defining the output tensor shape as numpy.squeeze for the most See the following note for exception.

Examples::

data = [[[0], [1], [2]]] squeeze(data) = [0, 1, 2] squeeze(data, axis=0) = [[0], [1], [2]] squeeze(data, axis=2) = [[0, 1, 2]] squeeze(data, axis=(0, 2)) = [0, 1, 2]

.. Note:: The output of this operator will keep at least one dimension not removed. For squeeze([[[4]]]) = [4], while in numpy.squeeze, the output will become a scalar.

Parameters
datadata to squeeze
axisSelects a subset of the single-dimensional entries in the shape. If an
Returns
new symbol
Symbol mxnet::cpp::stack ( const std::string &  symbol_name,
const std::vector< Symbol > &  data,
int  num_args,
int  axis = 0 
)
inline

Join a sequence of arrays along a new axis.

   The axis parameter specifies the index of the new axis in the dimensions of the
   result. For example, if axis=0 it will be the first dimension and if axis=-1 it
   will be the last dimension.

   Examples::

   x = [1, 2]
   y = [3, 4]

   stack(x, y) = [[1, 2],
   [3, 4]]
   stack(x, y, axis=1) = [[1, 3],
   [2, 4]]
Parameters
symbol_namename of the resulting symbol
dataList of arrays to stack
num_argsNumber of inputs to be stacked.
axisThe axis in the result array along which the input arrays are stacked.
Returns
new symbol
Symbol mxnet::cpp::stack ( const std::vector< Symbol > &  data,
int  num_args,
int  axis = 0 
)
inline

Join a sequence of arrays along a new axis.

   The axis parameter specifies the index of the new axis in the dimensions of the
   result. For example, if axis=0 it will be the first dimension and if axis=-1 it
   will be the last dimension.

   Examples::

   x = [1, 2]
   y = [3, 4]

   stack(x, y) = [[1, 2],
   [3, 4]]
   stack(x, y, axis=1) = [[1, 3],
   [2, 4]]
Parameters
dataList of arrays to stack
num_argsNumber of inputs to be stacked.
axisThe axis in the result array along which the input arrays are stacked.
Returns
new symbol
Symbol mxnet::cpp::sum ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the sum of array elements over given axes.

   .. Note::

   `sum` and `sum_axis` are equivalent.
   For ndarray of csr storage type summation along axis 0 and axis 1 is supported.
   Setting keepdims or exclude to True will cause a fallback to dense operator.

   Example::

   data = [[[1, 2], [2, 3], [1, 3]],
   [[1, 4], [4, 3], [5, 2]],
   [[7, 1], [7, 2], [7, 3]]]

   sum(data, axis=1)
   [[  4.   8.]
   [ 10.   9.]
   [ 21.   6.]]

   sum(data, axis=[1,2])
   [ 12.  19.  27.]

   data = [[1, 2, 0],
   [3, 0, 1],
   [4, 1, 0]]

   csr = cast_storage(data, 'csr')

   sum(csr, axis=0)
   [ 8.  3.  1.]

   sum(csr, axis=1)
   [ 3.  4.  5.]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L116
Parameters
symbol_namename of the resulting symbol
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::sum ( Symbol  data,
dmlc::optional< Shape axis = dmlc::optional<Shape>(),
bool  keepdims = false,
bool  exclude = false 
)
inline

Computes the sum of array elements over given axes.

   .. Note::

   `sum` and `sum_axis` are equivalent.
   For ndarray of csr storage type summation along axis 0 and axis 1 is supported.
   Setting keepdims or exclude to True will cause a fallback to dense operator.

   Example::

   data = [[[1, 2], [2, 3], [1, 3]],
   [[1, 4], [4, 3], [5, 2]],
   [[7, 1], [7, 2], [7, 3]]]

   sum(data, axis=1)
   [[  4.   8.]
   [ 10.   9.]
   [ 21.   6.]]

   sum(data, axis=[1,2])
   [ 12.  19.  27.]

   data = [[1, 2, 0],
   [3, 0, 1],
   [4, 1, 0]]

   csr = cast_storage(data, 'csr')

   sum(csr, axis=0)
   [ 8.  3.  1.]

   sum(csr, axis=1)
   [ 3.  4.  5.]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L116
Parameters
dataThe input
axisThe axis or axes along which to perform the reduction.
   The default, `axis=()`, will compute over all elements into a
   scalar array with shape `(1,)`.

   If `axis` is int, a reduction is performed on a particular axis.

   If `axis` is a tuple of ints, a reduction is performed on all the axes
   specified in the tuple.

   If `exclude` is true, reduction will be performed on the axes that are
   NOT in axis instead.

   Negative values means indexing from right to left.
keepdimsIf this is set to True, the reduced axes are left in the result as
excludeWhether to perform reduction on axis that are NOT in axis instead.
Returns
new symbol
Symbol mxnet::cpp::SVMOutput ( const std::string &  symbol_name,
Symbol  data,
Symbol  label,
mx_float  margin = 1,
mx_float  regularization_coefficient = 1,
bool  use_linear = false 
)
inline

Computes support vector machine based transformation of the input.

   This tutorial demonstrates using SVM as output layer for classification instead
   https://github.com/dmlc/mxnet/tree/master/example/svm_mnist.
Parameters
symbol_namename of the resulting symbol
dataInput data for SVM transformation.
labelClass label for the input data.
marginThe loss function penalizes outputs that lie outside this margin.
regularization_coefficientRegularization parameter for the SVM. This balances
use_linearWhether to use L1-SVM objective. L2-SVM objective is used by default.
Returns
new symbol
Symbol mxnet::cpp::SVMOutput ( Symbol  data,
Symbol  label,
mx_float  margin = 1,
mx_float  regularization_coefficient = 1,
bool  use_linear = false 
)
inline

Computes support vector machine based transformation of the input.

   This tutorial demonstrates using SVM as output layer for classification instead
   https://github.com/dmlc/mxnet/tree/master/example/svm_mnist.
Parameters
dataInput data for SVM transformation.
labelClass label for the input data.
marginThe loss function penalizes outputs that lie outside this margin.
regularization_coefficientRegularization parameter for the SVM. This balances
use_linearWhether to use L1-SVM objective. L2-SVM objective is used by default.
Returns
new symbol
Symbol mxnet::cpp::SwapAxis ( const std::string &  symbol_name,
Symbol  data,
uint32_t  dim1 = 0,
uint32_t  dim2 = 0 
)
inline

Interchanges two axes of an array.

   Examples::

   x = [[1, 2, 3]])
   swapaxes(x, 0, 1) = [[ 1],
   [ 2],
   [ 3]]

   x = [[[ 0, 1],
   [ 2, 3]],
   [[ 4, 5],
   [ 6, 7]]]  // (2,2,2) array

   swapaxes(x, 0, 2) = [[[ 0, 4],
   [ 2, 6]],
   [[ 1, 5],
   [ 3, 7]]]


   Defined in src/operator/swapaxis.cc:L70
Parameters
symbol_namename of the resulting symbol
dataInput array.
dim1the first axis to be swapped.
dim2the second axis to be swapped.
Returns
new symbol
Symbol mxnet::cpp::SwapAxis ( Symbol  data,
uint32_t  dim1 = 0,
uint32_t  dim2 = 0 
)
inline

Interchanges two axes of an array.

   Examples::

   x = [[1, 2, 3]])
   swapaxes(x, 0, 1) = [[ 1],
   [ 2],
   [ 3]]

   x = [[[ 0, 1],
   [ 2, 3]],
   [[ 4, 5],
   [ 6, 7]]]  // (2,2,2) array

   swapaxes(x, 0, 2) = [[[ 0, 4],
   [ 2, 6]],
   [[ 1, 5],
   [ 3, 7]]]


   Defined in src/operator/swapaxis.cc:L70
Parameters
dataInput array.
dim1the first axis to be swapped.
dim2the second axis to be swapped.
Returns
new symbol
Symbol mxnet::cpp::take ( const std::string &  symbol_name,
Symbol  a,
Symbol  indices,
int  axis = 0,
TakeMode  mode = TakeMode::kClip 
)
inline

Takes elements from an input array along the given axis.

   This function slices the input array along a particular axis with the provided

   Given data tensor of rank r >= 1, and indices tensor of rank q, gather entries
   dimension of data (by default outer-most one as axis=0) indexed by indices, and
   in an output tensor of rank q + (r - 1).

   Examples::

   x = [4.  5.  6.]

   // Trivial case, take the second element along the first axis.

   take(x, [1]) = [ 5. ]

   // The other trivial case, axis=-1, take the third element along the first axis

   take(x, [3], axis=-1, mode='clip') = [ 6. ]

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // In this case we will get rows 0 and 1, then 1 and 2. Along axis 0

   take(x, [[0,1],[1,2]]) = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 3.,  4.],
   [ 5.,  6.]]]

   // In this case we will get rows 0 and 1, then 1 and 2 (calculated by wrapping
   // Along axis 1

   take(x, [[0, 3], [-1, -2]], axis=1, mode='wrap') = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 3.,  4.],
   [ 5.,  6.]]]

   The storage type of ``take`` output depends upon the input storage type:

   - take(default, default) = default
   - take(csr, default, axis=0) = csr



   Defined in src/operator/tensor/indexing_op.cc:L692
Parameters
symbol_namename of the resulting symbol
aThe input array.
indicesThe indices of the values to be extracted.
axisThe axis of input array to be taken.For input tensor of rank r, it could
modeSpecify how out-of-bound indices bahave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap"
Returns
new symbol
Symbol mxnet::cpp::take ( Symbol  a,
Symbol  indices,
int  axis = 0,
TakeMode  mode = TakeMode::kClip 
)
inline

Takes elements from an input array along the given axis.

   This function slices the input array along a particular axis with the provided

   Given data tensor of rank r >= 1, and indices tensor of rank q, gather entries
   dimension of data (by default outer-most one as axis=0) indexed by indices, and
   in an output tensor of rank q + (r - 1).

   Examples::

   x = [4.  5.  6.]

   // Trivial case, take the second element along the first axis.

   take(x, [1]) = [ 5. ]

   // The other trivial case, axis=-1, take the third element along the first axis

   take(x, [3], axis=-1, mode='clip') = [ 6. ]

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // In this case we will get rows 0 and 1, then 1 and 2. Along axis 0

   take(x, [[0,1],[1,2]]) = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 3.,  4.],
   [ 5.,  6.]]]

   // In this case we will get rows 0 and 1, then 1 and 2 (calculated by wrapping
   // Along axis 1

   take(x, [[0, 3], [-1, -2]], axis=1, mode='wrap') = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 3.,  4.],
   [ 5.,  6.]]]

   The storage type of ``take`` output depends upon the input storage type:

   - take(default, default) = default
   - take(csr, default, axis=0) = csr



   Defined in src/operator/tensor/indexing_op.cc:L692
Parameters
aThe input array.
indicesThe indices of the values to be extracted.
axisThe axis of input array to be taken.For input tensor of rank r, it could
modeSpecify how out-of-bound indices bahave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap"
Returns
new symbol
Symbol mxnet::cpp::tan ( const std::string &  symbol_name,
Symbol  data 
)
inline

Computes the element-wise tangent of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   tan([0, \pi/4, \pi/2]) = [0, 1, -inf]

   The storage type of ``tan`` output depends upon the input storage type:

   - tan(default) = default
   - tan(row_sparse) = row_sparse
   - tan(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L83
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::tan ( Symbol  data)
inline

Computes the element-wise tangent of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   tan([0, \pi/4, \pi/2]) = [0, 1, -inf]

   The storage type of ``tan`` output depends upon the input storage type:

   - tan(default) = default
   - tan(row_sparse) = row_sparse
   - tan(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L83
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::tanh ( const std::string &  symbol_name,
Symbol  data 
)
inline

Returns the hyperbolic tangent of the input array, computed element-wise.

   .. math::
   tanh(x) = sinh(x) / cosh(x)

   The storage type of ``tanh`` output depends upon the input storage type:

   - tanh(default) = default
   - tanh(row_sparse) = row_sparse
   - tanh(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L234
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::tanh ( Symbol  data)
inline

Returns the hyperbolic tangent of the input array, computed element-wise.

   .. math::
   tanh(x) = sinh(x) / cosh(x)

   The storage type of ``tanh`` output depends upon the input storage type:

   - tanh(default) = default
   - tanh(row_sparse) = row_sparse
   - tanh(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L234
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::tile ( const std::string &  symbol_name,
Symbol  data,
Shape  reps 
)
inline

Repeats the whole array multiple times.

   If ``reps`` has length *d*, and input array has dimension of *n*. There are
   three cases:

   - **n=d**. Repeat *i*-th dimension of the input by ``reps[i]`` times::

   x = [[1, 2],
   [3, 4]]

   tile(x, reps=(2,3)) = [[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]]

   - **n>d**. ``reps`` is promoted to length *n* by pre-pending 1's to it. Thus for
   an input shape ``(2,3)``, ``repos=(2,)`` is treated as ``(1,2)``::


   tile(x, reps=(2,)) = [[ 1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.]]

   - **n<d**. The input is promoted to be d-dimensional by prepending new axes. So
   shape ``(2,2)`` array is promoted to ``(1,2,2)`` for 3-D replication::

   tile(x, reps=(2,2,3)) = [[[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]],

   [[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]]]


   Defined in src/operator/tensor/matrix_op.cc:L753
Parameters
symbol_namename of the resulting symbol
dataInput data array
repsThe number of times for repeating the tensor a. Each dim size of reps must be a positive integer. If reps has length d, the result will have dimension of max(d, a.ndim); If a.ndim < d, a is promoted to be d-dimensional by prepending
Returns
new symbol
Symbol mxnet::cpp::tile ( Symbol  data,
Shape  reps 
)
inline

Repeats the whole array multiple times.

   If ``reps`` has length *d*, and input array has dimension of *n*. There are
   three cases:

   - **n=d**. Repeat *i*-th dimension of the input by ``reps[i]`` times::

   x = [[1, 2],
   [3, 4]]

   tile(x, reps=(2,3)) = [[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]]

   - **n>d**. ``reps`` is promoted to length *n* by pre-pending 1's to it. Thus for
   an input shape ``(2,3)``, ``repos=(2,)`` is treated as ``(1,2)``::


   tile(x, reps=(2,)) = [[ 1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.]]

   - **n<d**. The input is promoted to be d-dimensional by prepending new axes. So
   shape ``(2,2)`` array is promoted to ``(1,2,2)`` for 3-D replication::

   tile(x, reps=(2,2,3)) = [[[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]],

   [[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]]]


   Defined in src/operator/tensor/matrix_op.cc:L753
Parameters
dataInput data array
repsThe number of times for repeating the tensor a. Each dim size of reps must be a positive integer. If reps has length d, the result will have dimension of max(d, a.ndim); If a.ndim < d, a is promoted to be d-dimensional by prepending
Returns
new symbol
Symbol mxnet::cpp::topk ( const std::string &  symbol_name,
Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(-1),
int  k = 1,
TopkRetTyp  ret_typ = TopkRetTyp::kIndices,
bool  is_ascend = false,
TopkDtype  dtype = TopkDtype::kFloat32 
)
inline

Returns the top k elements in an input array along the given axis. The returned elements will be sorted.

Examples::

x = [[ 0.3, 0.2, 0.4], [ 0.1, 0.3, 0.2]]

// returns an index of the largest element on last axis topk(x) = [[ 2.], [ 1.]]

// returns the value of top-2 largest elements on last axis topk(x, ret_typ='value', k=2) = [[ 0.4, 0.3], [ 0.3, 0.2]]

// returns the value of top-2 smallest elements on last axis topk(x, ret_typ='value', k=2, is_ascend=1) = [[ 0.2 , 0.3], [ 0.1 , 0.2]]

// returns the value of top-2 largest elements on axis 0 topk(x, axis=0, ret_typ='value', k=2) = [[ 0.3, 0.3, 0.4], [ 0.1, 0.2, 0.2]]

// flattens and then returns list of both values and indices topk(x, ret_typ='both', k=2) = [[[ 0.4, 0.3], [ 0.3, 0.2]] , [[ 2., 0.], [

   Defined in src/operator/tensor/ordering_op.cc:L64
Parameters
symbol_namename of the resulting symbol
dataThe input array
axisAxis along which to choose the top k indices. If not given, the flattened
kNumber of top elements to select, should be always smaller than or equal to
ret_typThe return type. "value" means to return the top k values, "indices" means to return the indices of the top k values, "mask" means to return a mask array containing 0 and 1. 1 means the top k values. "both" means to return a list of both values and
is_ascendWhether to choose k largest or k smallest elements. Top K largest
dtypeDType of the output indices when ret_typ is "indices" or "both". An error
Returns
new symbol
Symbol mxnet::cpp::topk ( Symbol  data,
dmlc::optional< int >  axis = dmlc::optional<int>(-1),
int  k = 1,
TopkRetTyp  ret_typ = TopkRetTyp::kIndices,
bool  is_ascend = false,
TopkDtype  dtype = TopkDtype::kFloat32 
)
inline

Returns the top k elements in an input array along the given axis. The returned elements will be sorted.

Examples::

x = [[ 0.3, 0.2, 0.4], [ 0.1, 0.3, 0.2]]

// returns an index of the largest element on last axis topk(x) = [[ 2.], [ 1.]]

// returns the value of top-2 largest elements on last axis topk(x, ret_typ='value', k=2) = [[ 0.4, 0.3], [ 0.3, 0.2]]

// returns the value of top-2 smallest elements on last axis topk(x, ret_typ='value', k=2, is_ascend=1) = [[ 0.2 , 0.3], [ 0.1 , 0.2]]

// returns the value of top-2 largest elements on axis 0 topk(x, axis=0, ret_typ='value', k=2) = [[ 0.3, 0.3, 0.4], [ 0.1, 0.2, 0.2]]

// flattens and then returns list of both values and indices topk(x, ret_typ='both', k=2) = [[[ 0.4, 0.3], [ 0.3, 0.2]] , [[ 2., 0.], [

   Defined in src/operator/tensor/ordering_op.cc:L64
Parameters
dataThe input array
axisAxis along which to choose the top k indices. If not given, the flattened
kNumber of top elements to select, should be always smaller than or equal to
ret_typThe return type. "value" means to return the top k values, "indices" means to return the indices of the top k values, "mask" means to return a mask array containing 0 and 1. 1 means the top k values. "both" means to return a list of both values and
is_ascendWhether to choose k largest or k smallest elements. Top K largest
dtypeDType of the output indices when ret_typ is "indices" or "both". An error
Returns
new symbol
Symbol mxnet::cpp::transpose ( const std::string &  symbol_name,
Symbol  data,
Shape  axes = Shape() 
)
inline

Permutes the dimensions of an array.

   Examples::

   x = [[ 1, 2],
   [ 3, 4]]

   transpose(x) = [[ 1.,  3.],
   [ 2.,  4.]]

   x = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 5.,  6.],
   [ 7.,  8.]]]

   transpose(x) = [[[ 1.,  5.],
   [ 3.,  7.]],

   [[ 2.,  6.],
   [ 4.,  8.]]]

   transpose(x, axes=(1,0,2)) = [[[ 1.,  2.],
   [ 5.,  6.]],

   [[ 3.,  4.],
   [ 7.,  8.]]]


   Defined in src/operator/tensor/matrix_op.cc:L312
Parameters
symbol_namename of the resulting symbol
dataSource input
axesTarget axis order. By default the axes will be inverted.
Returns
new symbol
Symbol mxnet::cpp::transpose ( Symbol  data,
Shape  axes = Shape() 
)
inline

Permutes the dimensions of an array.

   Examples::

   x = [[ 1, 2],
   [ 3, 4]]

   transpose(x) = [[ 1.,  3.],
   [ 2.,  4.]]

   x = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 5.,  6.],
   [ 7.,  8.]]]

   transpose(x) = [[[ 1.,  5.],
   [ 3.,  7.]],

   [[ 2.,  6.],
   [ 4.,  8.]]]

   transpose(x, axes=(1,0,2)) = [[[ 1.,  2.],
   [ 5.,  6.]],

   [[ 3.,  4.],
   [ 7.,  8.]]]


   Defined in src/operator/tensor/matrix_op.cc:L312
Parameters
dataSource input
axesTarget axis order. By default the axes will be inverted.
Returns
new symbol
Symbol mxnet::cpp::trunc ( const std::string &  symbol_name,
Symbol  data 
)
inline

Return the element-wise truncated value of the input.

   The truncated value of the scalar x is the nearest integer i which is closer to
   zero than x is. In short, the fractional part of the signed number x is

   Example::

   trunc([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  1.,  1.,  2.]

   The storage type of ``trunc`` output depends upon the input storage type:

   - trunc(default) = default
   - trunc(row_sparse) = row_sparse
   - trunc(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L779
Parameters
symbol_namename of the resulting symbol
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::trunc ( Symbol  data)
inline

Return the element-wise truncated value of the input.

   The truncated value of the scalar x is the nearest integer i which is closer to
   zero than x is. In short, the fractional part of the signed number x is

   Example::

   trunc([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  1.,  1.,  2.]

   The storage type of ``trunc`` output depends upon the input storage type:

   - trunc(default) = default
   - trunc(row_sparse) = row_sparse
   - trunc(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L779
Parameters
dataThe input array.
Returns
new symbol
Symbol mxnet::cpp::UpSampling ( const std::string &  symbol_name,
const std::vector< Symbol > &  data,
int  scale,
UpSamplingSampleType  sample_type,
int  num_args,
int  num_filter = 0,
UpSamplingMultiInputMode  multi_input_mode = UpSamplingMultiInputMode::kConcat,
uint64_t  workspace = 512 
)
inline

Performs nearest neighbor/bilinear up sampling to inputs.

Parameters
symbol_namename of the resulting symbol
dataArray of tensors to upsample
scaleUp sampling scale
sample_typeupsampling method
num_argsNumber of inputs to be upsampled. For nearest neighbor upsampling, this can be 1-N; the size of output will be(scale*h_0,scale*w_0) and all other inputs will be upsampled to thesame size. For bilinear upsampling this must be
num_filterInput filter. Only used by bilinear sample_type.
multi_input_modeHow to handle multiple input. concat means concatenate upsampled images along the channel dimension. sum means add all images
workspaceTmp workspace for deconvolution (MB)
Returns
new symbol
Symbol mxnet::cpp::UpSampling ( const std::vector< Symbol > &  data,
int  scale,
UpSamplingSampleType  sample_type,
int  num_args,
int  num_filter = 0,
UpSamplingMultiInputMode  multi_input_mode = UpSamplingMultiInputMode::kConcat,
uint64_t  workspace = 512 
)
inline

Performs nearest neighbor/bilinear up sampling to inputs.

Parameters
dataArray of tensors to upsample
scaleUp sampling scale
sample_typeupsampling method
num_argsNumber of inputs to be upsampled. For nearest neighbor upsampling, this can be 1-N; the size of output will be(scale*h_0,scale*w_0) and all other inputs will be upsampled to thesame size. For bilinear upsampling this must be
num_filterInput filter. Only used by bilinear sample_type.
multi_input_modeHow to handle multiple input. concat means concatenate upsampled images along the channel dimension. sum means add all images
workspaceTmp workspace for deconvolution (MB)
Returns
new symbol
Symbol mxnet::cpp::where ( const std::string &  symbol_name,
Symbol  condition,
Symbol  x,
Symbol  y 
)
inline

Return the elements, either from x or y, depending on the condition.

   Given three ndarrays, condition, x, and y, return an ndarray with the elements
   depending on the elements from condition are true or false. x and y must have
   If condition has the same shape as x, each element in the output array is from
   corresponding element in the condition is true, and from y if false.

   If condition does not have the same shape as x, it must be a 1D array whose
   the same as x's first dimension size. Each row of the output array is from x's
   if the corresponding element from condition is true, and from y's row if false.

   Note that all non-zero values are interpreted as ``True`` in condition.

   Examples::

   x = [[1, 2], [3, 4]]
   y = [[5, 6], [7, 8]]
   cond = [[0, 1], [-1, 0]]

   where(cond, x, y) = [[5, 2], [3, 8]]

   csr_cond = cast_storage(cond, 'csr')

   where(csr_cond, x, y) = [[5, 2], [3, 8]]



   Defined in src/operator/tensor/control_flow_op.cc:L57
Parameters
symbol_namename of the resulting symbol
conditioncondition array
x
y
Returns
new symbol
Symbol mxnet::cpp::where ( Symbol  condition,
Symbol  x,
Symbol  y 
)
inline

Return the elements, either from x or y, depending on the condition.

   Given three ndarrays, condition, x, and y, return an ndarray with the elements
   depending on the elements from condition are true or false. x and y must have
   If condition has the same shape as x, each element in the output array is from
   corresponding element in the condition is true, and from y if false.

   If condition does not have the same shape as x, it must be a 1D array whose
   the same as x's first dimension size. Each row of the output array is from x's
   if the corresponding element from condition is true, and from y's row if false.

   Note that all non-zero values are interpreted as ``True`` in condition.

   Examples::

   x = [[1, 2], [3, 4]]
   y = [[5, 6], [7, 8]]
   cond = [[0, 1], [-1, 0]]

   where(cond, x, y) = [[5, 2], [3, 8]]

   csr_cond = cast_storage(cond, 'csr')

   where(csr_cond, x, y) = [[5, 2], [3, 8]]



   Defined in src/operator/tensor/control_flow_op.cc:L57
Parameters
conditioncondition array
x
y
Returns
new symbol
Symbol mxnet::cpp::zeros_like ( const std::string &  symbol_name,
Symbol  data 
)
inline

Return an array of zeros with the same shape, type and storage type as the input array.

The storage type of zeros_like output depends on the storage type of the

  • zeros_like(row_sparse) = row_sparse
  • zeros_like(csr) = csr
  • zeros_like(default) = default

Examples::

x = [[ 1., 1., 1.], [ 1., 1., 1.]]

zeros_like(x) = [[ 0., 0., 0.], [ 0., 0., 0.]]

Parameters
symbol_namename of the resulting symbol
dataThe input
Returns
new symbol
Symbol mxnet::cpp::zeros_like ( Symbol  data)
inline

Return an array of zeros with the same shape, type and storage type as the input array.

The storage type of zeros_like output depends on the storage type of the

  • zeros_like(row_sparse) = row_sparse
  • zeros_like(csr) = csr
  • zeros_like(default) = default

Examples::

x = [[ 1., 1., 1.], [ 1., 1., 1.]]

zeros_like(x) = [[ 0., 0., 0.], [ 0., 0., 0.]]

Parameters
dataThe input
Returns
new symbol